diff options
Diffstat (limited to 'contrib/llvm/lib/Target')
410 files changed, 31909 insertions, 22361 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 08dc340..16d0da3 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -15,7 +15,7 @@ #ifndef TARGET_ARM_H #define TARGET_ARM_H -#include "ARMBaseInfo.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -29,19 +29,7 @@ class ARMBaseTargetMachine; class FunctionPass; class JITCodeEmitter; class MachineInstr; -class MCCodeEmitter; class MCInst; -class MCInstrInfo; -class MCObjectWriter; -class MCSubtargetInfo; -class TargetAsmBackend; -class formatted_raw_ostream; - -MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -53,7 +41,6 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); -FunctionPass *createNEONMoveFixPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); @@ -61,12 +48,6 @@ FunctionPass *createThumb2SizeReductionPass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); -/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); - } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index cf333cc..5c727ad 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", "Thumb mode">; +def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true", + "Native client mode">; + //===----------------------------------------------------------------------===// // ARM Subtarget features. // @@ -85,12 +88,16 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", /// Some M architectures don't have the DSP extension (v7E-M vs. v7M) def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", - "Supports v7 DSP instructions in Thumb2.">; + "Supports v7 DSP instructions in Thumb2">; // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; +// M-series ISA? +def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", + "Is microcontroller profile ('M' series)">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -105,7 +112,7 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", [HasV5TEOps]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>; + [HasV6Ops, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops]>; @@ -182,12 +189,14 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM, - FeatureDB]>; + FeatureDB, FeatureMClass]>; // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, + FeatureDSPThumb2]>; def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, + FeatureDSPThumb2]>; // V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, @@ -203,14 +212,14 @@ def : Processor<"cortex-a9-mp", CortexA9Itineraries, // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv]>; + FeatureHWDiv, FeatureMClass]>; // V7EM Processors. def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, FeatureT2XtPk, FeatureVFP2, - FeatureVFPOnlySP]>; + FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index dbc3ee4..ea3319f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -15,15 +15,15 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" #include "ARMAsmPrinter.h" -#include "ARMAddressingModes.h" #include "ARMBuildAttrs.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" -#include "ARMMCExpr.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "InstPrinter/ARMInstPrinter.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" @@ -45,13 +45,13 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <cctype> using namespace llvm; @@ -92,7 +92,7 @@ namespace { case ARMBuildAttrs::Advanced_SIMD_arch: case ARMBuildAttrs::VFP_arch: Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); - break; + break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } @@ -100,13 +100,41 @@ namespace { }; class ObjectAttributeEmitter : public AttributeEmitter { + // This structure holds all attributes, accounting for + // their string/numeric value, so we can later emmit them + // in declaration order, keeping all in the same vector + struct AttributeItemType { + enum { + HiddenAttribute = 0, + NumericAttribute, + TextAttribute + } Type; + unsigned Tag; + unsigned IntValue; + StringRef StringValue; + } AttributeItem; + MCObjectStreamer &Streamer; StringRef CurrentVendor; - SmallString<64> Contents; + SmallVector<AttributeItemType, 64> Contents; + + // Account for the ULEB/String size of each item, + // not just the number of items + size_t ContentsSize; + // FIXME: this should be in a more generic place, but + // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf + size_t getULEBSize(int Value) { + size_t Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); // Is this really necessary? + } while (Value); + return Size; + } public: ObjectAttributeEmitter(MCObjectStreamer &Streamer_) : - Streamer(Streamer_), CurrentVendor("") { } + Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { } void MaybeSwitchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -124,20 +152,32 @@ namespace { } void EmitAttribute(unsigned Attribute, unsigned Value) { - // FIXME: should be ULEB - Contents += Attribute; - Contents += Value; + AttributeItemType attr = { + AttributeItemType::NumericAttribute, + Attribute, + Value, + StringRef("") + }; + ContentsSize += getULEBSize(Attribute); + ContentsSize += getULEBSize(Value); + Contents.push_back(attr); } void EmitTextAttribute(unsigned Attribute, StringRef String) { - Contents += Attribute; - Contents += UppercaseString(String); - Contents += 0; + AttributeItemType attr = { + AttributeItemType::TextAttribute, + Attribute, + 0, + String + }; + ContentsSize += getULEBSize(Attribute); + // String + \0 + ContentsSize += String.size()+1; + + Contents.push_back(attr); } void Finish() { - const size_t ContentsSize = Contents.size(); - // Vendor size + Vendor name + '\0' const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; @@ -151,7 +191,23 @@ namespace { Streamer.EmitIntValue(ARMBuildAttrs::File, 1); Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(Contents, 0); + // Size should have been accounted for already, now + // emit each field as its type (ULEB or String) + for (unsigned int i=0; i<Contents.size(); ++i) { + AttributeItemType item = Contents[i]; + Streamer.EmitULEB128IntValue(item.Tag, 0); + switch (item.Type) { + case AttributeItemType::NumericAttribute: + Streamer.EmitULEB128IntValue(item.IntValue, 0); + break; + case AttributeItemType::TextAttribute: + Streamer.EmitBytes(UppercaseString(item.StringValue), 0); + Streamer.EmitIntValue(0, 1); // '\0' + break; + default: + assert(0 && "Invalid attribute type"); + } + } Contents.clear(); } @@ -184,7 +240,7 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // S registers are described as bit-pieces of a register // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - + unsigned SReg = Reg - ARM::S0; bool odd = SReg & 0x1; unsigned Rx = 256 + (SReg >> 1); @@ -209,12 +265,13 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) unsigned QReg = Reg - ARM::Q0; unsigned D1 = 256 + 2 * QReg; unsigned D2 = D1 + 1; - + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); EmitInt8(dwarf::DW_OP_regx); EmitULEB128(D1); @@ -233,6 +290,8 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { } void ARMAsmPrinter::EmitFunctionEntryLabel() { + OutStreamer.ForceCodeRegion(); + if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); OutStreamer.EmitThumbFunc(CurrentFnSym); @@ -395,16 +454,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - + O << "{" << ARMInstPrinter::getRegisterName(RegBegin); - + // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This // will require changes in the register allocator that'll need to be // propagated down here if the operands change. unsigned RegOps = OpNum + 1; while (MI->getOperand(RegOps).isReg()) { - O << ", " + O << ", " << ARMInstPrinter::getRegisterName(MI->getOperand(RegOps).getReg()); RegOps++; } @@ -413,14 +472,34 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } + case 'R': // The most significant register of a pair. + case 'Q': { // The least significant register of a pair. + if (OpNum == 0) + return true; + const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); + if (!FlagsOP.isImm()) + return true; + unsigned Flags = FlagsOP.getImm(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + if (NumVals != 2) + return true; + unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; + if (RegOp >= MI->getNumOperands()) + return true; + const MachineOperand &MO = MI->getOperand(RegOp); + if (!MO.isReg()) + return true; + unsigned Reg = MO.getReg(); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } + // These modifiers are not yet supported. case 'p': // The high single-precision register of a VFP double-precision // register. case 'e': // The low doubleword register of a NEON quad register. case 'f': // The high doubleword register of a NEON quad register. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'Q': // The least significant register of a pair. - case 'R': // The most significant register of a pair. case 'H': // The highest-numbered register of a pair. return true; } @@ -437,7 +516,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. - + switch (ExtraCode[0]) { case 'A': // A memory operand for a VLD1/VST1 instruction. default: return true; // Unknown modifier. @@ -448,7 +527,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } } - + const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]"; @@ -772,13 +851,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); MCSym = OutContext.GetOrCreateSymbol(OS.str()); } else if (ACPV->isBlockAddress()) { - MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress()); + const BlockAddress *BA = + cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(); + MCSym = GetBlockAddressSymbol(BA); } else if (ACPV->isGlobalValue()) { - const GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); MCSym = GetARMGVSymbol(GV); + } else if (ACPV->isMachineBasicBlock()) { + const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB(); + MCSym = MBB->getSymbol(); } else { assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - MCSym = GetExternalSymbolSymbol(ACPV->getSymbol()); + const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); + MCSym = GetExternalSymbolSymbol(Sym); } // Create an MCSymbol for the reference. @@ -822,6 +907,9 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); + // Tag the jump table appropriately for precise disassembly. + OutStreamer.EmitJumpTable32Region(); + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); @@ -847,6 +935,11 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol, OutContext), OutContext); + // If we're generating a table of Thumb addresses in static relocation + // model, we need to add one to keep interworking correctly. + else if (AFI->isThumbFunction()) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext), + OutContext); OutStreamer.EmitValue(Expr, 4); } } @@ -859,6 +952,14 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { unsigned JTI = MO1.getIndex(); // Emit a label for the jump table. + if (MI->getOpcode() == ARM::t2TBB_JT) { + OutStreamer.EmitJumpTable8Region(); + } else if (MI->getOpcode() == ARM::t2TBH_JT) { + OutStreamer.EmitJumpTable16Region(); + } else { + OutStreamer.EmitJumpTable32Region(); + } + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); @@ -881,6 +982,8 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { MCInst BrInst; BrInst.setOpcode(ARM::t2B); BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr)); + BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + BrInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(BrInst); continue; } @@ -994,7 +1097,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { i != NumOps; ++i) RegList.push_back(MI->getOperand(i).getReg()); break; - case ARM::STR_PRE: + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); RegList.push_back(SrcReg); @@ -1074,10 +1178,20 @@ extern cl::opt<bool> EnableARMEHABI; #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { + if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY) + OutStreamer.EmitCodeRegion(); + + // Emit unwinding stuff for frame-related instructions + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + EmitUnwindingInstruction(MI); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(OutStreamer, MI)) return; + assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && + "Pseudo flag setting opcode should be expanded early"); + // Check for manual lowerings. unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -1372,6 +1486,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); EmitAlignment(2); + + // Mark the constant pool entry as data if we're not already in a data + // region. + OutStreamer.EmitDataRegion(); OutStreamer.EmitLabel(GetCPISymbol(LabelId)); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; @@ -1379,7 +1497,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MCPE.Val.ConstVal); - return; } case ARM::t2BR_JT: { @@ -1590,6 +1707,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; TmpInst.setOpcode(ARM::tB); TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr)); + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } { @@ -1804,10 +1923,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - // Emit unwinding stuff for frame-related instructions - if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) - EmitUnwindingInstruction(MI); - OutStreamer.EmitInstruction(TmpInst); } @@ -1815,20 +1930,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Target Registry Stuff //===----------------------------------------------------------------------===// -static MCInstPrinter *createARMMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); - - TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 649bd7d..408edfc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -13,11 +13,11 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -45,6 +46,10 @@ static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); +static cl::opt<bool> +WidenVMOVS("widen-vmovs", cl::Hidden, + cl::desc("Widen ARM vmovs to vmovd when possible")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { unsigned MLxOpc; // MLA / MLS opcode @@ -171,7 +176,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) .addImm(Pred).addReg(0).addReg(0); } else @@ -399,6 +404,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); int BccOpc = !AFI->isThumbFunction() ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); + bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -406,9 +412,12 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, "ARM branch conditions have two components!"); if (FBB == 0) { - if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - else + if (Cond.empty()) { // Unconditional branch? + if (isThumb) + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); + else + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); return 1; @@ -417,7 +426,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Two-way conditional branch. BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + if (isThumb) + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); + else + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; } @@ -627,7 +639,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool SPRDest = ARM::SPRRegClass.contains(DestReg); bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); - unsigned Opc; + unsigned Opc = 0; if (SPRDest && SPRSrc) Opc = ARM::VMOVS; else if (GPRDest && SPRSrc) @@ -638,19 +650,40 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; - else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVQQ; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVQQQQ; - else - llvm_unreachable("Impossible reg-to-reg copy"); - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); - MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (Opc == ARM::VORRq) + if (Opc) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) + if (Opc == ARM::VORRq) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); AddDefaultPred(MIB); + return; + } + + // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || + ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); + unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? + ARM::qsub_1 : ARM::qsub_3; + for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, i); + unsigned Src = TRI->getSubReg(SrcReg, i); + MachineInstrBuilder Mov = + AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) + .addReg(Dst, RegState::Define) + .addReg(Src, getKillRegState(KillSrc)) + .addReg(Src, getKillRegState(KillSrc))); + if (i == EndSubReg) { + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); + } + } + return; + } + llvm_unreachable("Impossible reg-to-reg copy"); } static const @@ -683,82 +716,84 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. Likewise, rGPR. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass - || RC == ARM::rGPRRegisterClass) - RC = ARM::GPRRegisterClass; - - switch (RC->getID()) { - case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) + switch (RC->getSize()) { + case 4: + if (ARM::GPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::SPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) + } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::DPRRegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::DPR_8RegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) + } else + llvm_unreachable("Unknown reg class!"); + break; + case 8: + if (ARM::DPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::QPRRegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::QPR_8RegClassID: - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + } else + llvm_unreachable("Unknown reg class!"); + break; + case 16: + if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); - } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) + } else { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addMemOperand(MMO)); - } - break; - case ARM::QQPRRegClassID: - case ARM::QQPR_VFP2RegClassID: - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - // FIXME: It's possible to only store part of the QQ register if the - // spilled def has a sub-register index. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); - } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - } - break; - case ARM::QQQQPRRegClassID: { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); - AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); - break; - } - default: - llvm_unreachable("Unknown regclass!"); + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 64: + if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + } else + llvm_unreachable("Unknown reg class!"); + break; + default: + llvm_unreachable("Unknown reg class!"); } } @@ -809,6 +844,12 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } +unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + const MachineMemOperand *Dummy; + return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); +} + void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, @@ -826,72 +867,77 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass - || RC == ARM::rGPRRegisterClass) - RC = ARM::GPRRegisterClass; - - switch (RC->getID()) { - case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) + switch (RC->getSize()) { + case 4: + if (ARM::GPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - break; - case ARM::SPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + + } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::DPRRegClassID: - case ARM::DPR_VFP2RegClassID: - case ARM::DPR_8RegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) + case 8: + if (ARM::DPRRegClass.hasSubClassEq(RC)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QPRRegClassID: - case ARM::QPR_VFP2RegClassID: - case ARM::QPR_8RegClassID: - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + case 16: + if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); - } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) - .addFrameIndex(FI) - .addMemOperand(MMO)); - } + } else { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) + .addFrameIndex(FI) + .addMemOperand(MMO)); + } + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QQPRRegClassID: - case ARM::QQPR_VFP2RegClassID: - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); - } else { - MachineInstrBuilder MIB = + } else { + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - } + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + } + } else + llvm_unreachable("Unknown reg class!"); break; - case ARM::QQQQPRRegClassID: { - MachineInstrBuilder MIB = + case 64: + if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + } else + llvm_unreachable("Unknown reg class!"); break; - } default: llvm_unreachable("Unknown regclass!"); } @@ -944,6 +990,78 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } +unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + const MachineMemOperand *Dummy; + return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); +} + +bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ + // This hook gets to expand COPY instructions before they become + // copyPhysReg() calls. Look for VMOVS instructions that can legally be + // widened to VMOVD. We prefer the VMOVD when possible because it may be + // changed into a VORR that can go down the NEON pipeline. + if (!WidenVMOVS || !MI->isCopy()) + return false; + + // Look for a copy between even S-registers. That is where we keep floats + // when using NEON v2f32 instructions for f32 arithmetic. + unsigned DstRegS = MI->getOperand(0).getReg(); + unsigned SrcRegS = MI->getOperand(1).getReg(); + if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) + return false; + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, + &ARM::DPRRegClass); + unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DstRegD || !SrcRegD) + return false; + + // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only + // legal if the COPY already defines the full DstRegD, and it isn't a + // sub-register insertion. + if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) + return false; + + // A dead copy shouldn't show up here, but reject it just in case. + if (MI->getOperand(0).isDead()) + return false; + + // All clear, widen the COPY. + DEBUG(dbgs() << "widening: " << *MI); + + // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg + // or some other super-register. + int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); + if (ImpDefIdx != -1) + MI->RemoveOperand(ImpDefIdx); + + // Change the opcode and operands. + MI->setDesc(get(ARM::VMOVD)); + MI->getOperand(0).setReg(DstRegD); + MI->getOperand(1).setReg(SrcRegD); + AddDefaultPred(MachineInstrBuilder(MI)); + + // We are now reading SrcRegD instead of SrcRegS. This may upset the + // register scavenger and machine verifier, so we need to indicate that we + // are reading an undefined value from SrcRegD, but a proper value from + // SrcRegS. + MI->getOperand(1).setIsUndef(); + MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + + // SrcRegD may actually contain an unrelated value in the ssub_1 + // sub-register. Don't kill it. Only kill the ssub_0 sub-register. + if (MI->getOperand(1).isKill()) { + MI->getOperand(1).setIsKill(false); + MI->addRegisterKilled(SrcRegS, TRI, true); + } + + DEBUG(dbgs() << "replaced by: " << *MI); + return true; +} + MachineInstr* ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -974,17 +1092,24 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { // instructions, so that's probably OK, but is PIC always correct when // we get here? if (ACPV->isGlobalValue()) - NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, - ARMCP::CPValue, 4); + NewCPV = ARMConstantPoolConstant:: + Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, + ARMCP::CPValue, 4); else if (ACPV->isExtSymbol()) - NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), - ACPV->getSymbol(), PCLabelId, 4); + NewCPV = ARMConstantPoolSymbol:: + Create(MF.getFunction()->getContext(), + cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); else if (ACPV->isBlockAddress()) - NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, - ARMCP::CPBlockAddress, 4); + NewCPV = ARMConstantPoolConstant:: + Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); else if (ACPV->isLSDA()) - NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, - ARMCP::CPLSDA, 4); + NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, + ARMCP::CPLSDA, 4); + else if (ACPV->isMachineBasicBlock()) + NewCPV = ARMConstantPoolMBB:: + Create(MF.getFunction()->getContext(), + cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); @@ -1289,7 +1414,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, // Attempt to estimate the relative costs of predication versus branching. unsigned TUnpredCost = Probability.getNumerator() * TCycles; TUnpredCost /= Probability.getDenominator(); - + uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); unsigned FUnpredCost = Comp * FCycles; FUnpredCost /= Probability.getDenominator(); @@ -1330,6 +1455,57 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { } +/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the +/// instruction is encoded with an 'S' bit is determined by the optional CPSR +/// def operand. +/// +/// This will go away once we can teach tblgen how to set the optional CPSR def +/// operand itself. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADDSri, ARM::ADDri}, + {ARM::ADDSrr, ARM::ADDrr}, + {ARM::ADDSrsi, ARM::ADDrsi}, + {ARM::ADDSrsr, ARM::ADDrsr}, + + {ARM::SUBSri, ARM::SUBri}, + {ARM::SUBSrr, ARM::SUBrr}, + {ARM::SUBSrsi, ARM::SUBrsi}, + {ARM::SUBSrsr, ARM::SUBrsr}, + + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrsi, ARM::RSBrsi}, + {ARM::RSBSrsr, ARM::RSBrsr}, + + {ARM::t2ADDSri, ARM::t2ADDri}, + {ARM::t2ADDSrr, ARM::t2ADDrr}, + {ARM::t2ADDSrs, ARM::t2ADDrs}, + + {ARM::t2SUBSri, ARM::t2SUBri}, + {ARM::t2SUBSrr, ARM::t2SUBrr}, + {ARM::t2SUBSrs, ARM::t2SUBrs}, + + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) { + if (OldOpc == OpcPair->PseudoOpc) { + return OpcPair->MachineOpc; + } + } + return 0; +} + void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, @@ -1862,7 +2038,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::STMIB_UPD: case ARM::tLDMIA: case ARM::tLDMIA_UPD: - case ARM::tSTMIA: case ARM::tSTMIA_UPD: case ARM::tPOP_RET: case ARM::tPOP: @@ -2128,7 +2303,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::STMDA_UPD: case ARM::STMDB_UPD: case ARM::STMIB_UPD: - case ARM::tSTMIA: case ARM::tSTMIA_UPD: case ARM::tPOP_RET: case ARM::tPOP: @@ -2567,6 +2741,15 @@ hasLowDefLatency(const InstrItineraryData *ItinData, return false; } +bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const { + if (convertAddSubFlagsOpcode(MI->getOpcode())) { + ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; + return false; + } + return true; +} + bool ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, unsigned &AddSubOpc, @@ -2582,3 +2765,66 @@ ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, HasLane = Entry.HasLane; return true; } + +//===----------------------------------------------------------------------===// +// Execution domains. +//===----------------------------------------------------------------------===// +// +// Some instructions go down the NEON pipeline, some go down the VFP pipeline, +// and some can go down both. The vmov instructions go down the VFP pipeline, +// but they can be changed to vorr equivalents that are executed by the NEON +// pipeline. +// +// We use the following execution domain numbering: +// +enum ARMExeDomain { + ExeGeneric = 0, + ExeVFP = 1, + ExeNEON = 2 +}; +// +// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h +// +std::pair<uint16_t, uint16_t> +ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { + // VMOVD is a VFP instruction, but can be changed to NEON if it isn't + // predicated. + if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + + // No other instructions can be swizzled, so just determine their domain. + unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; + + if (Domain & ARMII::DomainNEON) + return std::make_pair(ExeNEON, 0); + + // Certain instructions can go either way on Cortex-A8. + // Treat them as NEON instructions. + if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) + return std::make_pair(ExeNEON, 0); + + if (Domain & ARMII::DomainVFP) + return std::make_pair(ExeVFP, 0); + + return std::make_pair(ExeGeneric, 0); +} + +void +ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { + // We only know how to change VMOVD into VORR. + assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); + if (Domain != ExeNEON) + return; + + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); + + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 507e897..0f9f321 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -27,146 +27,6 @@ namespace llvm { class ARMSubtarget; class ARMBaseRegisterInfo; -/// ARMII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace ARMII { - enum { - //===------------------------------------------------------------------===// - // Instruction Flags. - - //===------------------------------------------------------------------===// - // This four-bit field describes the addressing mode used. - AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h - - // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load - // and store ops only. Generic "updating" flag is used for ld/st multiple. - // The index mode enums are declared in ARMBaseInfo.h - IndexModeShift = 5, - IndexModeMask = 3 << IndexModeShift, - - //===------------------------------------------------------------------===// - // Instruction encoding formats. - // - FormShift = 7, - FormMask = 0x3f << FormShift, - - // Pseudo instructions - Pseudo = 0 << FormShift, - - // Multiply instructions - MulFrm = 1 << FormShift, - - // Branch instructions - BrFrm = 2 << FormShift, - BrMiscFrm = 3 << FormShift, - - // Data Processing instructions - DPFrm = 4 << FormShift, - DPSoRegFrm = 5 << FormShift, - - // Load and Store - LdFrm = 6 << FormShift, - StFrm = 7 << FormShift, - LdMiscFrm = 8 << FormShift, - StMiscFrm = 9 << FormShift, - LdStMulFrm = 10 << FormShift, - - LdStExFrm = 11 << FormShift, - - // Miscellaneous arithmetic instructions - ArithMiscFrm = 12 << FormShift, - SatFrm = 13 << FormShift, - - // Extend instructions - ExtFrm = 14 << FormShift, - - // VFP formats - VFPUnaryFrm = 15 << FormShift, - VFPBinaryFrm = 16 << FormShift, - VFPConv1Frm = 17 << FormShift, - VFPConv2Frm = 18 << FormShift, - VFPConv3Frm = 19 << FormShift, - VFPConv4Frm = 20 << FormShift, - VFPConv5Frm = 21 << FormShift, - VFPLdStFrm = 22 << FormShift, - VFPLdStMulFrm = 23 << FormShift, - VFPMiscFrm = 24 << FormShift, - - // Thumb format - ThumbFrm = 25 << FormShift, - - // Miscelleaneous format - MiscFrm = 26 << FormShift, - - // NEON formats - NGetLnFrm = 27 << FormShift, - NSetLnFrm = 28 << FormShift, - NDupFrm = 29 << FormShift, - NLdStFrm = 30 << FormShift, - N1RegModImmFrm= 31 << FormShift, - N2RegFrm = 32 << FormShift, - NVCVTFrm = 33 << FormShift, - NVDupLnFrm = 34 << FormShift, - N2RegVShLFrm = 35 << FormShift, - N2RegVShRFrm = 36 << FormShift, - N3RegFrm = 37 << FormShift, - N3RegVShFrm = 38 << FormShift, - NVExtFrm = 39 << FormShift, - NVMulSLFrm = 40 << FormShift, - NVTBLFrm = 41 << FormShift, - - //===------------------------------------------------------------------===// - // Misc flags. - - // UnaryDP - Indicates this is a unary data processing instruction, i.e. - // it doesn't have a Rn operand. - UnaryDP = 1 << 13, - - // Xform16Bit - Indicates this Thumb2 instruction may be transformed into - // a 16-bit Thumb instruction if certain conditions are met. - Xform16Bit = 1 << 14, - - //===------------------------------------------------------------------===// - // Code domain. - DomainShift = 15, - DomainMask = 7 << DomainShift, - DomainGeneral = 0 << DomainShift, - DomainVFP = 1 << DomainShift, - DomainNEON = 2 << DomainShift, - DomainNEONA8 = 4 << DomainShift, - - //===------------------------------------------------------------------===// - // Field shifts - such shifts are used to set field while generating - // machine instructions. - // - // FIXME: This list will need adjusting/fixing as the MC code emitter - // takes shape and the ARMCodeEmitter.cpp bits go away. - ShiftTypeShift = 4, - - M_BitShift = 5, - ShiftImmShift = 5, - ShiftShift = 7, - N_BitShift = 7, - ImmHiShift = 8, - SoRotImmShift = 8, - RegRsShift = 8, - ExtRotImmShift = 10, - RegRdLoShift = 12, - RegRdShift = 12, - RegRdHiShift = 16, - RegRnShift = 16, - S_BitShift = 20, - W_BitShift = 21, - AM3_I_BitShift = 22, - D_BitShift = 22, - U_BitShift = 23, - P_BitShift = 24, - I_BitShift = 25, - CondShift = 28 - }; -} - class ARMBaseInstrInfo : public ARMGenInstrInfo { const ARMSubtarget &Subtarget; @@ -241,6 +101,10 @@ public: int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -259,6 +123,8 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -346,6 +212,12 @@ public: int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + + /// VFP/NEON execution domains. + std::pair<uint16_t, uint16_t> + getExecutionDomain(const MachineInstr *MI) const; + void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + private: int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -382,6 +254,9 @@ private: bool hasLowDefLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx) const; + /// verifyInstruction - Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -464,6 +339,12 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); + +/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether +/// the instruction is encoded with an 'S' bit is determined by the optional +/// CPSR def operand. +unsigned convertAddSubFlagsOpcode(unsigned OldOpc); + /// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of /// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 /// code. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ba42295..7c42342 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/Debug.h" @@ -57,7 +56,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(), TII(tii), STI(sti), + : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -354,7 +353,7 @@ const TargetRegisterClass* ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const { const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { switch (Super->getID()) { case ARM::GPRRegClassID: @@ -375,6 +374,13 @@ ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } +const TargetRegisterClass * +ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &ARM::CCRRegClass) + return 0; // Can't copy CCR registers. + return RC; +} + unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { @@ -487,19 +493,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven1); + return makeArrayRef(GPREven1); else - return ArrayRef<unsigned>(GPREven4); + return makeArrayRef(GPREven4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven2); + return makeArrayRef(GPREven2); else - return ArrayRef<unsigned>(GPREven5); + return makeArrayRef(GPREven5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPREven3); + return makeArrayRef(GPREven3); else - return ArrayRef<unsigned>(GPREven6); + return makeArrayRef(GPREven6); } } else if (HintType == ARMRI::RegPairOdd) { if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) @@ -509,19 +515,19 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd1); + return makeArrayRef(GPROdd1); else - return ArrayRef<unsigned>(GPROdd4); + return makeArrayRef(GPROdd4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd2); + return makeArrayRef(GPROdd2); else - return ArrayRef<unsigned>(GPROdd5); + return makeArrayRef(GPROdd5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return ArrayRef<unsigned>(GPROdd3); + return makeArrayRef(GPROdd3); else - return ArrayRef<unsigned>(GPROdd6); + return makeArrayRef(GPROdd6); } } return RC->getRawAllocationOrder(MF); @@ -649,10 +655,6 @@ cannotEliminateFrame(const MachineFunction &MF) const { || needsStackRealignment(MF); } -unsigned ARMBaseRegisterInfo::getRARegister() const { - return ARM::LR; -} - unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -672,99 +674,54 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { return 0; } -int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int ARMBaseRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return ARMGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} - unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const { switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. // So no R12, etc. - case ARM::R1: - return ARM::R0; - case ARM::R3: - return ARM::R2; - case ARM::R5: - return ARM::R4; + case ARM::R1: return ARM::R0; + case ARM::R3: return ARM::R2; + case ARM::R5: return ARM::R4; case ARM::R7: return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) ? 0 : ARM::R6; - case ARM::R9: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; - - case ARM::S1: - return ARM::S0; - case ARM::S3: - return ARM::S2; - case ARM::S5: - return ARM::S4; - case ARM::S7: - return ARM::S6; - case ARM::S9: - return ARM::S8; - case ARM::S11: - return ARM::S10; - case ARM::S13: - return ARM::S12; - case ARM::S15: - return ARM::S14; - case ARM::S17: - return ARM::S16; - case ARM::S19: - return ARM::S18; - case ARM::S21: - return ARM::S20; - case ARM::S23: - return ARM::S22; - case ARM::S25: - return ARM::S24; - case ARM::S27: - return ARM::S26; - case ARM::S29: - return ARM::S28; - case ARM::S31: - return ARM::S30; - - case ARM::D1: - return ARM::D0; - case ARM::D3: - return ARM::D2; - case ARM::D5: - return ARM::D4; - case ARM::D7: - return ARM::D6; - case ARM::D9: - return ARM::D8; - case ARM::D11: - return ARM::D10; - case ARM::D13: - return ARM::D12; - case ARM::D15: - return ARM::D14; - case ARM::D17: - return ARM::D16; - case ARM::D19: - return ARM::D18; - case ARM::D21: - return ARM::D20; - case ARM::D23: - return ARM::D22; - case ARM::D25: - return ARM::D24; - case ARM::D27: - return ARM::D26; - case ARM::D29: - return ARM::D28; - case ARM::D31: - return ARM::D30; + case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S1: return ARM::S0; + case ARM::S3: return ARM::S2; + case ARM::S5: return ARM::S4; + case ARM::S7: return ARM::S6; + case ARM::S9: return ARM::S8; + case ARM::S11: return ARM::S10; + case ARM::S13: return ARM::S12; + case ARM::S15: return ARM::S14; + case ARM::S17: return ARM::S16; + case ARM::S19: return ARM::S18; + case ARM::S21: return ARM::S20; + case ARM::S23: return ARM::S22; + case ARM::S25: return ARM::S24; + case ARM::S27: return ARM::S26; + case ARM::S29: return ARM::S28; + case ARM::S31: return ARM::S30; + + case ARM::D1: return ARM::D0; + case ARM::D3: return ARM::D2; + case ARM::D5: return ARM::D4; + case ARM::D7: return ARM::D6; + case ARM::D9: return ARM::D8; + case ARM::D11: return ARM::D10; + case ARM::D13: return ARM::D12; + case ARM::D15: return ARM::D14; + case ARM::D17: return ARM::D16; + case ARM::D19: return ARM::D18; + case ARM::D21: return ARM::D20; + case ARM::D23: return ARM::D22; + case ARM::D25: return ARM::D24; + case ARM::D27: return ARM::D26; + case ARM::D29: return ARM::D28; + case ARM::D31: return ARM::D30; } return 0; @@ -776,85 +733,48 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, default: break; // Return 0 if either register of the pair is a special register. // So no R12, etc. - case ARM::R0: - return ARM::R1; - case ARM::R2: - return ARM::R3; - case ARM::R4: - return ARM::R5; + case ARM::R0: return ARM::R1; + case ARM::R2: return ARM::R3; + case ARM::R4: return ARM::R5; case ARM::R6: return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) ? 0 : ARM::R7; - case ARM::R8: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; - - case ARM::S0: - return ARM::S1; - case ARM::S2: - return ARM::S3; - case ARM::S4: - return ARM::S5; - case ARM::S6: - return ARM::S7; - case ARM::S8: - return ARM::S9; - case ARM::S10: - return ARM::S11; - case ARM::S12: - return ARM::S13; - case ARM::S14: - return ARM::S15; - case ARM::S16: - return ARM::S17; - case ARM::S18: - return ARM::S19; - case ARM::S20: - return ARM::S21; - case ARM::S22: - return ARM::S23; - case ARM::S24: - return ARM::S25; - case ARM::S26: - return ARM::S27; - case ARM::S28: - return ARM::S29; - case ARM::S30: - return ARM::S31; - - case ARM::D0: - return ARM::D1; - case ARM::D2: - return ARM::D3; - case ARM::D4: - return ARM::D5; - case ARM::D6: - return ARM::D7; - case ARM::D8: - return ARM::D9; - case ARM::D10: - return ARM::D11; - case ARM::D12: - return ARM::D13; - case ARM::D14: - return ARM::D15; - case ARM::D16: - return ARM::D17; - case ARM::D18: - return ARM::D19; - case ARM::D20: - return ARM::D21; - case ARM::D22: - return ARM::D23; - case ARM::D24: - return ARM::D25; - case ARM::D26: - return ARM::D27; - case ARM::D28: - return ARM::D29; - case ARM::D30: - return ARM::D31; + case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: return ARM::S1; + case ARM::S2: return ARM::S3; + case ARM::S4: return ARM::S5; + case ARM::S6: return ARM::S7; + case ARM::S8: return ARM::S9; + case ARM::S10: return ARM::S11; + case ARM::S12: return ARM::S13; + case ARM::S14: return ARM::S15; + case ARM::S16: return ARM::S17; + case ARM::S18: return ARM::S19; + case ARM::S20: return ARM::S21; + case ARM::S22: return ARM::S23; + case ARM::S24: return ARM::S25; + case ARM::S26: return ARM::S27; + case ARM::S28: return ARM::S29; + case ARM::S30: return ARM::S31; + + case ARM::D0: return ARM::D1; + case ARM::D2: return ARM::D3; + case ARM::D4: return ARM::D5; + case ARM::D6: return ARM::D7; + case ARM::D8: return ARM::D9; + case ARM::D10: return ARM::D11; + case ARM::D12: return ARM::D13; + case ARM::D14: return ARM::D15; + case ARM::D16: return ARM::D17; + case ARM::D18: return ARM::D19; + case ARM::D20: return ARM::D21; + case ARM::D22: return ARM::D23; + case ARM::D24: return ARM::D25; + case ARM::D26: return ARM::D27; + case ARM::D28: return ARM::D29; + case ARM::D30: return ARM::D31; } return 0; @@ -1111,11 +1031,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); - MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset); + MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset)); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(AddDefaultPred(MIB)); + AddDefaultCC(MIB); } void @@ -1143,6 +1063,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); } assert (Done && "Unable to resolve frame index!"); + (void)Done; } bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index b4b4059..fee17ff 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -33,19 +33,6 @@ namespace ARMRI { }; } -/// isARMLowRegister - Returns true if the register is low register r0-r7. -/// -static inline bool isARMLowRegister(unsigned Reg) { - using namespace ARM; - switch (Reg) { - case R0: case R1: case R2: case R3: - case R4: case R5: case R6: case R7: - return true; - default: - return false; - } -} - /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { @@ -129,6 +116,8 @@ public: unsigned &NewSubIdx) const; const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getCrossCopyRegClass(const TargetRegisterClass *RC) const; const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; @@ -164,7 +153,6 @@ public: bool cannotEliminateFrame(const MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getBaseRegister() const { return BasePtr; } @@ -172,9 +160,6 @@ public: unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - bool isLowRegister(unsigned Reg) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index d6fca62..4148d4a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "jit" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" #include "ARMInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -161,11 +161,11 @@ namespace { // are already handled elsewhere. They are placeholders to allow this // encoder to continue to function until the MC encoder is sufficiently // far along that this one can be eliminated entirely. - unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) + unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) const { return 0; } - unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) + unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } - unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) + unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { return 0; } @@ -189,13 +189,17 @@ namespace { unsigned Op) const { return 0; } unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op) + unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } @@ -203,8 +207,12 @@ namespace { const { return 0; } unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) + const { return 0; } unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op) @@ -213,10 +221,6 @@ namespace { const { return 0; } unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) @@ -230,8 +234,6 @@ namespace { const { return 0; } unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getMsbOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } unsigned getSsatBitPosValue(const MachineInstr &MI, unsigned Op) const { return 0; } uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx) @@ -268,6 +270,8 @@ namespace { const { return 0;} uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0;} + uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0;} uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) @@ -632,15 +636,16 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - const GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, isa<Function>(GV), Subtarget->GVIsIndirectSymbol(GV, RelocM), (intptr_t)ACPV); - } else { - emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute); + } else { + const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); + emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); } emitWordLE(0); } else { @@ -983,7 +988,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const MCInstrDesc &MCID) const { - for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){ + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index f45ebdc..3e3a413 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -15,10 +15,10 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -739,7 +739,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; - BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); + if (!isThumb) + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); + else + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB) + .addImm(ARMCC::AL).addReg(0); ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -1151,7 +1155,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // targets will be exchanged, and the altered branch may be out of // range, so the machinery has to know about it. int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); @@ -1512,7 +1520,11 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); - BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); + if (isThumb) + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) + .addImm(ARMCC::AL).addReg(0); + else + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); @@ -1891,7 +1903,8 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); - BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB); + BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB) + .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. MF.RenumberBlocks(NewBB); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 165a1d8..aadfd47 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,79 +17,57 @@ #include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Type.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, - ARMCP::ARMCPKind K, +//===----------------------------------------------------------------------===// +// ARMConstantPoolValue +//===----------------------------------------------------------------------===// + +ARMConstantPoolValue::ARMConstantPoolValue(Type *Ty, unsigned id, + ARMCP::ARMCPKind kind, unsigned char PCAdj, - ARMCP::ARMCPModifier Modif, - bool AddCA) - : MachineConstantPoolValue((const Type*)cval->getType()), - CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), - Modifier(Modif), AddCurrentAddress(AddCA) {} - -ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, - const char *s, unsigned id, + ARMCP::ARMCPModifier modifier, + bool addCurrentAddress) + : MachineConstantPoolValue(Ty), LabelId(id), Kind(kind), + PCAdjust(PCAdj), Modifier(modifier), + AddCurrentAddress(addCurrentAddress) {} + +ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id, + ARMCP::ARMCPKind kind, unsigned char PCAdj, - ARMCP::ARMCPModifier Modif, - bool AddCA) - : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)), - CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), - PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} - -ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, - ARMCP::ARMCPModifier Modif) - : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), - CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), - Modifier(Modif), AddCurrentAddress(false) {} - -const GlobalValue *ARMConstantPoolValue::getGV() const { - return dyn_cast_or_null<GlobalValue>(CVal); -} + ARMCP::ARMCPModifier modifier, + bool addCurrentAddress) + : MachineConstantPoolValue((Type*)Type::getInt32Ty(C)), + LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier), + AddCurrentAddress(addCurrentAddress) {} -const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { - return dyn_cast_or_null<BlockAddress>(CVal); -} +ARMConstantPoolValue::~ARMConstantPoolValue() {} -static bool CPV_streq(const char *S1, const char *S2) { - if (S1 == S2) - return true; - if (S1 && S2 && strcmp(S1, S2) == 0) - return true; - return false; +const char *ARMConstantPoolValue::getModifierText() const { + switch (Modifier) { + default: llvm_unreachable("Unknown modifier!"); + // FIXME: Are these case sensitive? It'd be nice to lower-case all the + // strings if that's legal. + case ARMCP::no_modifier: return "none"; + case ARMCP::TLSGD: return "tlsgd"; + case ARMCP::GOT: return "GOT"; + case ARMCP::GOTOFF: return "GOTOFF"; + case ARMCP::GOTTPOFF: return "gottpoff"; + case ARMCP::TPOFF: return "tpoff"; + } } int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - if (CPV->CVal == CVal && - CPV->LabelId == LabelId && - CPV->PCAdjust == PCAdjust && - CPV_streq(CPV->S, S) && - CPV->Modifier == Modifier) - return i; - } - } - + assert(false && "Shouldn't be calling this directly!"); return -1; } -ARMConstantPoolValue::~ARMConstantPoolValue() { - free((void*)S); -} - void -ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(CVal); - ID.AddPointer(S); +ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddInteger(LabelId); ID.AddInteger(PCAdjust); } @@ -97,9 +75,7 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { bool ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { if (ACPV->Kind == Kind && - ACPV->CVal == CVal && ACPV->PCAdjust == PCAdjust && - CPV_streq(ACPV->S, S) && ACPV->Modifier == Modifier) { if (ACPV->LabelId == LabelId) return true; @@ -115,12 +91,7 @@ void ARMConstantPoolValue::dump() const { errs() << " " << *this; } - void ARMConstantPoolValue::print(raw_ostream &O) const { - if (CVal) - O << CVal->getName(); - else - O << S; if (Modifier) O << "(" << getModifierText() << ")"; if (PCAdjust != 0) { O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; @@ -128,3 +99,221 @@ void ARMConstantPoolValue::print(raw_ostream &O) const { O << ")"; } } + +//===----------------------------------------------------------------------===// +// ARMConstantPoolConstant +//===----------------------------------------------------------------------===// + +ARMConstantPoolConstant::ARMConstantPoolConstant(Type *Ty, + const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(Ty, ID, Kind, PCAdj, Modifier, AddCurrentAddress), + CVal(C) {} + +ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue((Type*)C->getType(), ID, Kind, PCAdj, Modifier, + AddCurrentAddress), + CVal(C) {} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) { + return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0, + ARMCP::no_modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const GlobalValue *GV, + ARMCP::ARMCPModifier Modifier) { + return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()), + GV, 0, ARMCP::CPValue, 0, + Modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, unsigned char PCAdj) { + return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, + ARMCP::no_modifier, false); +} + +ARMConstantPoolConstant * +ARMConstantPoolConstant::Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) { + return new ARMConstantPoolConstant(C, ID, Kind, PCAdj, Modifier, + AddCurrentAddress); +} + +const GlobalValue *ARMConstantPoolConstant::getGV() const { + return dyn_cast_or_null<GlobalValue>(CVal); +} + +const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { + return dyn_cast_or_null<BlockAddress>(CVal); +} + +int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV); + if (!APC) continue; + if (APC->CVal == CVal && equals(APC)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolConstant *ACPC = dyn_cast<ARMConstantPoolConstant>(ACPV); + return ACPC && ACPC->CVal == CVal && ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(CVal); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolConstant::print(raw_ostream &O) const { + O << CVal->getName(); + ARMConstantPoolValue::print(O); +} + +//===----------------------------------------------------------------------===// +// ARMConstantPoolSymbol +//===----------------------------------------------------------------------===// + +ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, + unsigned id, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, + AddCurrentAddress), + S(strdup(s)) {} + +ARMConstantPoolSymbol::~ARMConstantPoolSymbol() { + free((void*)S); +} + +ARMConstantPoolSymbol * +ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, + unsigned ID, unsigned char PCAdj) { + return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); +} + +static bool CPV_streq(const char *S1, const char *S2) { + if (S1 == S2) + return true; + if (S1 && S2 && strcmp(S1, S2) == 0) + return true; + return false; +} + +int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); + if (!APS) continue; + + if (CPV_streq(APS->S, S) && equals(APS)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV); + return ACPS && CPV_streq(ACPS->S, S) && + ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(S); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolSymbol::print(raw_ostream &O) const { + O << S; + ARMConstantPoolValue::print(O); +} + +//===----------------------------------------------------------------------===// +// ARMConstantPoolMBB +//===----------------------------------------------------------------------===// + +ARMConstantPoolMBB::ARMConstantPoolMBB(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned id, unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress) + : ARMConstantPoolValue(C, id, ARMCP::CPMachineBasicBlock, PCAdj, + Modifier, AddCurrentAddress), + MBB(mbb) {} + +ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned ID, + unsigned char PCAdj) { + return new ARMConstantPoolMBB(C, mbb, ID, PCAdj, ARMCP::no_modifier, false); +} + +int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV); + if (!APMBB) continue; + + if (APMBB->MBB == MBB && equals(APMBB)) + return i; + } + } + + return -1; +} + +bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) { + const ARMConstantPoolMBB *ACPMBB = dyn_cast<ARMConstantPoolMBB>(ACPV); + return ACPMBB && ACPMBB->MBB == MBB && + ARMConstantPoolValue::hasSameValue(ACPV); +} + +void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(MBB); + ARMConstantPoolValue::addSelectionDAGCSEId(ID); +} + +void ARMConstantPoolMBB::print(raw_ostream &O) const { + ARMConstantPoolValue::print(O); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index d008811..0d0def3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -20,17 +20,19 @@ namespace llvm { -class Constant; class BlockAddress; +class Constant; class GlobalValue; class LLVMContext; +class MachineBasicBlock; namespace ARMCP { enum ARMCPKind { CPValue, CPExtSymbol, CPBlockAddress, - CPLSDA + CPLSDA, + CPMachineBasicBlock }; enum ARMCPModifier { @@ -47,8 +49,6 @@ namespace ARMCP { /// represent PC-relative displacement between the address of the load /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - const Constant *CVal; // Constant being loaded. - const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. ARMCP::ARMCPKind Kind; // Kind of constant. unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative. @@ -56,60 +56,54 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) bool AddCurrentAddress; +protected: + ARMConstantPoolValue(Type *Ty, unsigned id, ARMCP::ARMCPKind Kind, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + + ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); public: - ARMConstantPoolValue(const Constant *cval, unsigned id, - ARMCP::ARMCPKind Kind = ARMCP::CPValue, - unsigned char PCAdj = 0, - ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, - bool AddCurrentAddress = false); - ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, - unsigned char PCAdj = 0, - ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, - bool AddCurrentAddress = false); - ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier); - ARMConstantPoolValue(); - ~ARMConstantPoolValue(); + virtual ~ARMConstantPoolValue(); - const GlobalValue *getGV() const; - const char *getSymbol() const { return S; } - const BlockAddress *getBlockAddress() const; ARMCP::ARMCPModifier getModifier() const { return Modifier; } - const char *getModifierText() const { - switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); - // FIXME: Are these case sensitive? It'd be nice to lower-case all the - // strings if that's legal. - case ARMCP::no_modifier: return "none"; - case ARMCP::TLSGD: return "tlsgd"; - case ARMCP::GOT: return "GOT"; - case ARMCP::GOTOFF: return "GOTOFF"; - case ARMCP::GOTTPOFF: return "gottpoff"; - case ARMCP::TPOFF: return "tpoff"; - } - } + const char *getModifierText() const; bool hasModifier() const { return Modifier != ARMCP::no_modifier; } + bool mustAddCurrentAddress() const { return AddCurrentAddress; } + unsigned getLabelId() const { return LabelId; } unsigned char getPCAdjustment() const { return PCAdjust; } + bool isGlobalValue() const { return Kind == ARMCP::CPValue; } bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; } - bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; } - bool isLSDA() { return Kind == ARMCP::CPLSDA; } + bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; } + bool isLSDA() const { return Kind == ARMCP::CPLSDA; } + bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } virtual unsigned getRelocationInfo() const { return 2; } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); - virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); - /// hasSameValue - Return true if this ARM constpool value - /// can share the same constantpool entry as another ARM constpool value. - bool hasSameValue(ARMConstantPoolValue *ACPV); + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + bool equals(const ARMConstantPoolValue *A) const { + return this->LabelId == A->LabelId && + this->PCAdjust == A->PCAdjust && + this->Modifier == A->Modifier; + } + virtual void print(raw_ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } - void print(raw_ostream &O) const; void dump() const; + + static bool classof(const ARMConstantPoolValue *) { return true; } }; inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { @@ -117,6 +111,123 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { return O; } +/// ARMConstantPoolConstant - ARM-specific constant pool values for Constants, +/// Functions, and BlockAddresses. +class ARMConstantPoolConstant : public ARMConstantPoolValue { + const Constant *CVal; // Constant being loaded. + + ARMConstantPoolConstant(const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + ARMConstantPoolConstant(Type *Ty, const Constant *C, + unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID); + static ARMConstantPoolConstant *Create(const GlobalValue *GV, + ARMCP::ARMCPModifier Modifier); + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj); + static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID, + ARMCP::ARMCPKind Kind, + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + + const GlobalValue *getGV() const; + const BlockAddress *getBlockAddress() const; + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + virtual void print(raw_ostream &O) const; + static bool classof(const ARMConstantPoolValue *APV) { + return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); + } + static bool classof(const ARMConstantPoolConstant *) { return true; } +}; + +/// ARMConstantPoolSymbol - ARM-specific constantpool values for external +/// symbols. +class ARMConstantPoolSymbol : public ARMConstantPoolValue { + const char *S; // ExtSymbol being loaded. + + ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + ~ARMConstantPoolSymbol(); + + static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, + unsigned ID, unsigned char PCAdj); + + const char *getSymbol() const { return S; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void print(raw_ostream &O) const; + + static bool classof(const ARMConstantPoolValue *ACPV) { + return ACPV->isExtSymbol(); + } + static bool classof(const ARMConstantPoolSymbol *) { return true; } +}; + +/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic +/// block. +class ARMConstantPoolMBB : public ARMConstantPoolValue { + const MachineBasicBlock *MBB; // Machine basic block. + + ARMConstantPoolMBB(LLVMContext &C, const MachineBasicBlock *mbb, unsigned id, + unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, + bool AddCurrentAddress); + +public: + static ARMConstantPoolMBB *Create(LLVMContext &C, + const MachineBasicBlock *mbb, + unsigned ID, unsigned char PCAdj); + + const MachineBasicBlock *getMBB() const { return MBB; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + + /// hasSameValue - Return true if this ARM constpool value can share the same + /// constantpool entry as another ARM constpool value. + virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + + virtual void print(raw_ostream &O) const; + + static bool classof(const ARMConstantPoolValue *ACPV) { + return ACPV->isMachineBasicBlock(); + } + static bool classof(const ARMConstantPoolMBB *) { return true; } +}; + } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 94b72fd..7872cb9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -16,19 +16,24 @@ #define DEBUG_TYPE "arm-pseudo" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! using namespace llvm; +static cl::opt<bool> +VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, + cl::desc("Verify machine code after expanding ARM pseudos")); + namespace { class ARMExpandPseudo : public MachineFunctionPass { public: @@ -741,8 +746,22 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::MOVCCs: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + case ARM::MOVCCsi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + (MI.getOperand(1).getReg())) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .addReg(MI.getOperand(5).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + + case ARM::MOVCCsr: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) .addReg(MI.getOperand(2).getReg(), getKillRegState(MI.getOperand(2).isKill())) @@ -837,10 +856,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { // These are just fancy MOVs insructions. - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) - .addReg(0) .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))) @@ -851,10 +869,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(1)) .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) .addReg(0); TransferImpOps(MI, MIB, MIB); @@ -953,34 +970,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandMOV32BitImm(MBB, MBBI); return true; - case ARM::VMOVQQ: { - unsigned DstReg = MI.getOperand(0).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); - unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); - unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); - unsigned SrcReg = MI.getOperand(1).getReg(); - bool SrcIsKill = MI.getOperand(1).isKill(); - unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); - unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); - MachineInstrBuilder Even = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VORRq)) - .addReg(EvenDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); - MachineInstrBuilder Odd = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VORRq)) - .addReg(OddDst, - RegState::Define | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); - TransferImpOps(MI, Even, Odd); - MI.eraseFromParent(); - return true; - } - case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = @@ -1316,6 +1305,8 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) Modified |= ExpandMBB(*MFI); + if (VerifyARMPseudo) + MF.verify(this, "After expanding ARM pseudo instructions."); return Modified; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index f469d7e..9bc7ef2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" #include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" @@ -171,8 +171,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: - bool isTypeLegal(const Type *Ty, MVT &VT); - bool isLoadTypeLegal(const Type *Ty, MVT &VT); + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -502,11 +502,19 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { // This checks to see if we can use VFP3 instructions to materialize // a constant, otherwise we have to go through the constant pool. if (TLI.isFPImmLegal(Val, VT)) { - unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; + int Imm; + unsigned Opc; + if (is64bit) { + Imm = ARM_AM::getFP64Imm(Val); + Opc = ARM::FCONSTD; + } else { + Imm = ARM_AM::getFP32Imm(Val); + Opc = ARM::FCONSTS; + } unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addFPImm(CFP)); + .addImm(Imm)); return DestReg; } @@ -590,8 +598,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, - ARMCP::CPValue, PCAdj); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); // Load value. @@ -615,8 +624,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb) - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12), - NewDestReg) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRi12), NewDestReg) .addReg(DestReg) .addImm(0); else @@ -673,7 +682,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { return 0; } -bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { +bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(Ty, true); // Only handle simple types. @@ -685,7 +694,7 @@ bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { return TLI.isTypeLegal(VT); } -bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { +bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; // If this is a type than can be sign or zero-extended to a basic operation @@ -714,7 +723,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { U = C; } - if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) + if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -749,7 +758,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); @@ -946,6 +955,10 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { } bool ARMFastISel::SelectLoad(const Instruction *I) { + // Atomic loads need special handling. + if (cast<LoadInst>(I)->isAtomic()) + return false; + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) @@ -1008,6 +1021,10 @@ bool ARMFastISel::SelectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; + // Atomic stores need special handling. + if (cast<StoreInst>(I)->isAtomic()) + return false; + // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) @@ -1085,7 +1102,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // TODO: Factor this out. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { MVT SourceVT; - const Type *Ty = CI->getOperand(0)->getType(); + Type *Ty = CI->getOperand(0)->getType(); if (CI->hasOneUse() && (CI->getParent() == I->getParent()) && isTypeLegal(Ty, SourceVT)) { bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); @@ -1201,7 +1218,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); MVT VT; - const Type *Ty = CI->getOperand(0)->getType(); + Type *Ty = CI->getOperand(0)->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1309,7 +1326,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (!Subtarget->hasVFP2()) return false; MVT DstVT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, DstVT)) return false; @@ -1328,7 +1345,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { unsigned Opc; if (Ty->isFloatTy()) Opc = ARM::VSITOS; else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; - else return 0; + else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), @@ -1343,7 +1360,7 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { if (!Subtarget->hasVFP2()) return false; MVT DstVT; - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); if (!isTypeLegal(RetTy, DstVT)) return false; @@ -1351,10 +1368,10 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { if (Op == 0) return false; unsigned Opc; - const Type *OpTy = I->getOperand(0)->getType(); + Type *OpTy = I->getOperand(0)->getType(); if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; - else return 0; + else return false; // f64->s32 or f32->s32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); @@ -1401,7 +1418,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { bool ARMFastISel::SelectSDiv(const Instruction *I) { MVT VT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1429,7 +1446,7 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) { bool ARMFastISel::SelectSRem(const Instruction *I) { MVT VT; - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) return false; @@ -1456,7 +1473,7 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // operations, but can't figure out how to. Just use the vfp instructions // if we have them. // FIXME: It'd be nice to use NEON instructions. - const Type *Ty = I->getType(); + Type *Ty = I->getType(); bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); if (isFloat && !Subtarget->hasVFP2()) return false; @@ -1711,7 +1728,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, I->getContext()); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); const Value *RV = Ret->getOperand(0); @@ -1778,7 +1795,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { CallingConv::ID CC = TLI.getLibcallCallingConv(Call); // Handle *simple* calls for now. - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1802,7 +1819,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned Arg = getRegForValue(Op); if (Arg == 0) return false; - const Type *ArgTy = Op->getType(); + Type *ArgTy = Op->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1870,13 +1887,13 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // TODO: Avoid some calling conventions? // Let SDISel handle vararg functions. - const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); if (FTy->isVarArg()) return false; // Handle *simple* calls for now. - const Type *RetTy = I->getType(); + Type *RetTy = I->getType(); MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; @@ -1915,7 +1932,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; - const Type *ArgTy = (*i)->getType(); + Type *ArgTy = (*i)->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1969,9 +1986,9 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. The high bits for a type smaller than // the register size are assumed to be undefined. - const Type *DestTy = I->getType(); + Type *DestTy = I->getType(); Value *Op = I->getOperand(0); - const Type *SrcTy = Op->getType(); + Type *SrcTy = Op->getType(); EVT SrcVT, DestVT; SrcVT = TLI.getValueType(SrcTy, true); @@ -2002,16 +2019,18 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i16: + if (!Subtarget->hasV6Ops()) return false; if (isZext) - Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr; + Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; else - Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr; + Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; break; case MVT::i8: + if (!Subtarget->hasV6Ops()) return false; if (isZext) - Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr; + Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; else - Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr; + Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; break; case MVT::i1: if (isZext) { @@ -2033,6 +2052,8 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { .addReg(SrcReg); if (isBoolZext) MIB.addImm(1); + else + MIB.addImm(0); AddOptionalDefs(MIB); UpdateValueMap(I, DestReg); return true; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 381b404..2d1de6f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMFrameLowering.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -93,7 +93,8 @@ static bool isCSRestore(MachineInstr *MI, return false; return true; } - if ((MI->getOpcode() == ARM::LDR_POST || + if ((MI->getOpcode() == ARM::LDR_POST_IMM || + MI->getOpcode() == ARM::LDR_POST_REG || MI->getOpcode() == ARM::t2LDR_POST) && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && MI->getOperand(1).getReg() == ARM::SP) @@ -413,6 +414,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MIB.addExternalSymbol(JumpTarget.getSymbolName(), JumpTarget.getTargetFlags()); } + + // Add the default predicate in Thumb mode. + if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). @@ -502,7 +506,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, } } } else if (AFI->isThumb2Function()) { - // Use add <rd>, sp, #<imm8> + // Use add <rd>, sp, #<imm8> // ldr <rd>, [sp, #<imm8>] // if at all possible to save space. if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) @@ -587,14 +591,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP).setMIFlags(MIFlags); - // ARM mode needs an extra reg0 here due to addrmode2. Will go away once - // that refactoring is complete (eventually). - if (StrOpc == ARM::STR_PRE) { - MIB.addReg(0); - MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift)); - } else - MIB.addImm(-4); + .addReg(ARM::SP).setMIFlags(MIFlags) + .addImm(-4); AddDefaultPred(MIB); } Regs.clear(); @@ -651,8 +649,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet) + if (DeleteRet) { + MIB->copyImplicitOps(&*MI); MI->eraseFromParent(); + } MI = MIB; } else if (Regs.size() == 1) { // If we adjusted the reg to PC from LR above, switch it back here. We @@ -665,7 +665,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). - if (LdrOpc == ARM::LDR_POST) { + if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { MIB.addReg(0); MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else @@ -687,7 +687,8 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; - unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; + unsigned PushOneOpc = AFI->isThumbFunction() ? + ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, MachineInstr::FrameSetup); @@ -711,7 +712,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST; + unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp index 8d77b2d..5f863ea 100644 --- a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp @@ -100,8 +100,8 @@ namespace { GlobalCmp(const TargetData *td) : TD(td) { } bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { - const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); } @@ -123,7 +123,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); - const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; @@ -150,7 +150,7 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2); + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); } @@ -176,7 +176,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) { // Ignore fancy-aligned globals for now. unsigned Alignment = I->getAlignment(); - const Type *Ty = I->getType()->getElementType(); + Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2c9481b..5ee009c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMAddressingModes.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); +static cl::opt<bool> +DisableARMIntABS("disable-arm-int-abs", cl::Hidden, + cl::desc("Enable / disable ARM integer abs transform"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -90,13 +95,20 @@ public: bool hasNoVMLxHazardUse(SDNode *N) const; bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); - bool SelectShifterOperandReg(SDValue N, SDValue &A, + bool SelectRegShifterOperand(SDValue N, SDValue &A, SDValue &B, SDValue &C, bool CheckProfitability = true); - bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, + bool SelectImmShifterOperand(SDValue N, SDValue &A, + SDValue &B, bool CheckProfitability = true); + bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, SDValue &C) { // Don't apply the profitability check - return SelectShifterOperandReg(N, A, B, C, false); + return SelectRegShifterOperand(N, A, B, C, false); + } + bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, + SDValue &B) { + // Don't apply the profitability check + return SelectImmShifterOperand(N, A, B, false); } bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); @@ -122,8 +134,13 @@ public: return true; } - bool SelectAddrMode2Offset(SDNode *Op, SDValue N, + bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrOffsetNone(SDValue N, SDValue &Base); bool SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDNode *Op, SDValue N, @@ -240,8 +257,13 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + // Select special operations if node forms integer ABS pattern + SDNode *SelectABSOp(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); + SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -291,10 +313,10 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { /// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. -static bool isScaledConstantInRange(SDValue Node, unsigned Scale, +static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant) { - assert(Scale && "Invalid scale!"); + assert(Scale > 0 && "Invalid scale!"); // Check that this is a constant. const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); @@ -365,7 +387,30 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, return ShOpcVal == ARM_AM::lsl && ShAmt == 2; } -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, +bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, + SDValue &BaseReg, + SDValue &Opc, + bool CheckProfitability) { + if (DisableShifterOp) + return false; + + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!RHS) return false; + ShImmVal = RHS->getZExtValue() & 31; + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc, @@ -373,7 +418,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, if (DisableShifterOp) return false; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate // lower complexity pattern with explicit register operand. @@ -381,19 +426,18 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, BaseReg = N.getOperand(0); unsigned ShImmVal = 0; - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) - return false; - } + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (RHS) return false; + + ShReg = N.getOperand(1); + if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + return false; Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), MVT::i32); return true; } + bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { @@ -483,13 +527,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) - // Compute R +/- (R << N) and reuse it. - return false; - // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + ARM_AM::ShiftOpc ShOpcVal = + ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); unsigned ShAmt = 0; Base = N.getOperand(0); @@ -515,16 +556,14 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { - ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't // fold it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -630,7 +669,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + ARM_AM::ShiftOpc ShOpcVal = + ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); unsigned ShAmt = 0; Base = N.getOperand(0); @@ -656,16 +696,14 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { - ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't // fold it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -683,7 +721,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, return AM2_SHOP; } -bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { unsigned Opcode = Op->getOpcode(); ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) @@ -692,16 +730,11 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) ? ARM_AM::add : ARM_AM::sub; int Val; - if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, - ARM_AM::no_shift), - MVT::i32); - return true; - } + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) + return false; Offset = N; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); unsigned ShAmt = 0; if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't fold @@ -724,6 +757,50 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, return true; } +bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. + if (AddSub == ARM_AM::sub) Val *= -1; + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(Val, MVT::i32); + return true; + } + + return false; +} + + +bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; + } + + return false; +} + +bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { + Base = N; + return true; +} bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, @@ -1079,7 +1156,7 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, if (DisableShifterOp) return false; - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate // lower complexity pattern with explicit register operand. @@ -1208,21 +1285,15 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { - // Compute R + (R << [1,2,3]) and reuse it. - Base = N; - return false; - } - // Look for (R + R) or (R + (R << [1,2,3])). unsigned ShAmt = 0; Base = N.getOperand(0); OffReg = N.getOperand(1); // Swap if it is ((R << c) + R). - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg); + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); if (ShOpcVal != ARM_AM::lsl) { - ShOpcVal = ARM_AM::getShiftOpcForNode(Base); + ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); if (ShOpcVal == ARM_AM::lsl) std::swap(Base, OffReg); } @@ -1266,10 +1337,19 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; bool Match = false; - if (LoadedVT == MVT::i32 && - SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { - Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + if (LoadedVT == MVT::i32 && isPre && + SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = ARM::LDR_PRE_IMM; + Match = true; + } else if (LoadedVT == MVT::i32 && !isPre && + SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = ARM::LDR_POST_IMM; Match = true; + } else if (LoadedVT == MVT::i32 && + SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; + Match = true; + } else if (LoadedVT == MVT::i16 && SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { Match = true; @@ -1283,20 +1363,37 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; } } else { - if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) { + if (isPre && + SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = ARM::LDRB_PRE_IMM; + } else if (!isPre && + SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = ARM::LDRB_POST_IMM; + } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { Match = true; - Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; } } } if (Match) { - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), - CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 6); + if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + MVT::i32, MVT::Other, Ops, 5); + } else { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + MVT::i32, MVT::Other, Ops, 6); + } } return NULL; @@ -1966,7 +2063,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); - unsigned Width = CountTrailingOnes_32(And_imm); + // Note: The width operand is encoded as width-1. + unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), @@ -1986,7 +2084,8 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Srl_imm = 0; if (isInt32Immediate(N->getOperand(1), Srl_imm)) { assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); - unsigned Width = 32 - Srl_imm; + // Note: The width operand is encoded as width-1. + unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) return NULL; @@ -2034,10 +2133,16 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, SDValue CPTmp0; SDValue CPTmp1; SDValue CPTmp2; - if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); } return 0; } @@ -2198,6 +2303,56 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +/// Target-specific DAG combining for ISD::XOR. +/// Target-independent combining lowers SELECT_CC nodes of the form +/// select_cc setg[ge] X, 0, X, -X +/// select_cc setgt X, -1, X, -X +/// select_cc setl[te] X, 0, -X, X +/// select_cc setlt X, 1, -X, X +/// which represent Integer ABS into: +/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// ARM instruction selection detects the latter and matches it to +/// ARM::ABS or ARM::t2ABS machine node. +SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ + SDValue XORSrc0 = N->getOperand(0); + SDValue XORSrc1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + if (DisableARMIntABS) + return NULL; + + if (Subtarget->isThumb1Only()) + return NULL; + + if (XORSrc0.getOpcode() != ISD::ADD || + XORSrc1.getOpcode() != ISD::SRA) + return NULL; + + SDValue ADDSrc0 = XORSrc0.getOperand(0); + SDValue ADDSrc1 = XORSrc0.getOperand(1); + SDValue SRASrc0 = XORSrc1.getOperand(0); + SDValue SRASrc1 = XORSrc1.getOperand(1); + ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); + EVT XType = SRASrc0.getValueType(); + unsigned Size = XType.getSizeInBits() - 1; + + if (ADDSrc1 == XORSrc1 && + ADDSrc0 == SRASrc0 && + XType.isInteger() && + SRAConstant != NULL && + Size == SRAConstant->getZExtValue()) { + + unsigned Opcode = ARM::ABS; + if (Subtarget->isThumb2()) + Opcode = ARM::t2ABS; + + return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + } + + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. @@ -2207,6 +2362,25 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); } +SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { + SmallVector<SDValue, 6> Ops; + Ops.push_back(Node->getOperand(1)); // Ptr + Ops.push_back(Node->getOperand(2)); // Low part of Val1 + Ops.push_back(Node->getOperand(3)); // High part of Val1 + if (Opc == ARM::ATOMCMPXCHG6432) { + Ops.push_back(Node->getOperand(4)); // Low part of Val2 + Ops.push_back(Node->getOperand(5)); // High part of Val2 + } + Ops.push_back(Node->getOperand(0)); // Chain + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + MVT::i32, MVT::i32, MVT::Other, + Ops.data() ,Ops.size()); + cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); + return ResNode; +} + SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); @@ -2215,6 +2389,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::XOR: { + // Select special operations if XOR node forms integer ABS pattern + SDNode *ResNode = SelectABSOp(N); + if (ResNode) + return ResNode; + // Other cases are autogenerated. + break; + } case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; @@ -2269,8 +2451,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); if (Subtarget->isThumb1Only()) { - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI, - CurDAG->getTargetConstant(0, MVT::i32)); + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); @@ -2307,7 +2490,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2323,7 +2506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); } } } @@ -2986,6 +3169,23 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); + + case ARMISD::ATOMOR64_DAG: + return SelectAtomic64(N, ARM::ATOMOR6432); + case ARMISD::ATOMXOR64_DAG: + return SelectAtomic64(N, ARM::ATOMXOR6432); + case ARMISD::ATOMADD64_DAG: + return SelectAtomic64(N, ARM::ATOMADD6432); + case ARMISD::ATOMSUB64_DAG: + return SelectAtomic64(N, ARM::ATOMSUB6432); + case ARMISD::ATOMNAND64_DAG: + return SelectAtomic64(N, ARM::ATOMNAND6432); + case ARMISD::ATOMAND64_DAG: + return SelectAtomic64(N, ARM::ATOMAND6432); + case ARMISD::ATOMSWAP64_DAG: + return SelectAtomic64(N, ARM::ATOMSWAP6432); + case ARMISD::ATOMCMPXCHG64_DAG: + return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); } return SelectCode(N); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index cf8c5ba..e44e356 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" @@ -24,6 +23,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -106,7 +107,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); @@ -178,6 +179,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2()) { @@ -419,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); } + // Use divmod compiler-rt calls for iOS 5.0 and later. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -453,7 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::SETCC, MVT::v2f64, Expand); setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -485,8 +495,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v1i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); @@ -551,6 +561,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + if (!Subtarget->isThumb1Only()) { + // FIXME: We should do this for Thumb1 as well. + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); + } + // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Custom); @@ -596,62 +614,46 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. + // FIXME: This should be checking for v6k, not just v6. if (Subtarget->hasDataBarrier() || (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // Custom lowering for 64-bit ops + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the + // Unordered/Monotonic case. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } - // 64-bit versions are always libcalls (for now) - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -839,6 +841,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; + case ARMISD::ADDC: return "ARMISD::ADDC"; + case ARMISD::ADDE: return "ARMISD::ADDE"; + case ARMISD::SUBC: return "ARMISD::SUBC"; + case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -935,6 +942,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return getPointerTy(); + return VT.changeVectorElementTypeToInteger(); +} + /// getRegClassFor - Return the register class that should be used for the /// specified value type. TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { @@ -1210,8 +1222,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool IsSibCall = false; - // Temporarily disable tail calls so things don't break. - if (!EnableARMTailCalls) + // Disable tail calls if they're not supported. + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. @@ -1336,10 +1348,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + // TODO: Disable AlwaysInline when it becomes possible + // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/false, + /*AlwaysInline=*/true, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -1404,9 +1418,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); + // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1419,8 +1433,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1441,9 +1456,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1470,8 +1484,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1940,9 +1955,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, - ARMCP::CPBlockAddress, - PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, + ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); @@ -1966,8 +1981,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, @@ -1982,11 +1997,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), + LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -2013,8 +2028,9 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, + true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2030,7 +2046,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, false, false, 0); } else { // local exec model - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2066,7 +2083,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + ARMConstantPoolConstant::Create(GV, + UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), @@ -2135,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, + PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -2167,9 +2186,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, @@ -2191,7 +2210,8 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } @@ -2224,8 +2244,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, - ARMCP::CPLSDA, PCAdj); + ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = @@ -2277,6 +2297,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } + +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // FIXME: handle "fence singlethread" more efficiently. + DebugLoc dl = Op.getDebugLoc(); + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } + + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(ARM_MB::ISH, MVT::i32)); +} + static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. @@ -2754,7 +2793,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; @@ -2993,8 +3032,8 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - EVT OperandVT = Op.getOperand(0).getValueType(); - assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && + "Invalid type for custom lowering!"); if (VT != MVT::v4f32) return DAG.UnrollVectorOp(Op.getNode()); @@ -3905,8 +3944,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Try an immediate VMVN. - uint64_t NegatedImm = (SplatBits.getZExtValue() ^ - ((1LL << SplatBitSize) - 1)); + uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, VmovVT, VT.is128BitVector(), @@ -4019,6 +4057,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (V.getOperand(0).getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // This code doesn't know how to handle shuffles where the vector + // element types do not match (this happens because type legalization + // promotes the return type of EXTRACT_VECTOR_ELT). + // FIXME: It might be appropriate to extend this code to handle + // mismatched types. + return SDValue(); } // Record this extraction against the appropriate vector if possible... @@ -4819,6 +4865,71 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { return N0; } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getNode()->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: assert(0 && "Invalid code"); + case ISD::ADDC: Opc = ARMISD::ADDC; break; + case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; + case ISD::SUBC: Opc = ARMISD::SUBC; break; + case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; + } + + if (!ExtraOp) + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2)); +} + +static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { + // Monotonic load/store is legal for all targets + if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) + return Op; + + // Aquire/Release load/store is not legal for targets without a + // dmb or equivalent available. + return SDValue(); +} + + +static void +ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, + SelectionDAG &DAG, unsigned NewOp) { + DebugLoc dl = Node->getDebugLoc(); + assert (Node->getValueType(0) == MVT::i64 && + "Only know how to expand i64 atomics"); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(Node->getOperand(0)); // Chain + Ops.push_back(Node->getOperand(1)); // Ptr + // Low part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(0))); + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(1))); + if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(0))); + // High part of Val2 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(1))); + } + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, + cast<MemSDNode>(Node)->getMemOperand()); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(Result.getValue(2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -4834,6 +4945,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -4856,7 +4968,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); - case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -4865,6 +4977,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::UDIV: return LowerUDIV(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } return SDValue(); } @@ -4886,6 +5004,30 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::ATOMIC_LOAD_ADD: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + return; + case ISD::ATOMIC_LOAD_AND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); + return; + case ISD::ATOMIC_LOAD_NAND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); + return; + case ISD::ATOMIC_LOAD_OR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); + return; + case ISD::ATOMIC_LOAD_SUB: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); + return; + case ISD::ATOMIC_LOAD_XOR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); + return; + case ISD::ATOMIC_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); + return; + case ISD::ATOMIC_CMP_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -4963,7 +5105,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp dest, oldval // bne exitMBB BB = loop1MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(dest).addReg(oldval)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -4976,8 +5121,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp scratch, #0 // bne loop1MBB BB = loop2MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5063,7 +5210,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); if (BinOpcode) { // operand order needs to go the other way for NAND if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) @@ -5074,8 +5224,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, addReg(dest).addReg(incr)).addReg(0); } - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5125,12 +5277,12 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; @@ -5170,12 +5322,17 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); // Sign extend the value, if necessary. if (signExtend && extendOpc) { oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) + .addReg(dest) + .addImm(0)); } // Build compare and cmov instructions. @@ -5184,8 +5341,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5203,79 +5362,596 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, return BB; } -static -MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I != Succ) - return *I; - llvm_unreachable("Expecting a BB with two successors!"); -} +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Op1, unsigned Op2, + bool NeedsCarry, bool IsCmpxchg) const { + // This also handles ATOMIC_SWAP, indicated by Op1==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); -// FIXME: This opcode table should obviously be expressed in the target -// description. We probably just need a "machine opcode" value in the pseudo -// instruction. But the ideal solution maybe to simply remove the "S" version -// of the opcode altogether. -struct AddSubFlagsOpcodePair { - unsigned PseudoOpc; - unsigned MachineOpc; -}; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; -static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { - {ARM::ADCSri, ARM::ADCri}, - {ARM::ADCSrr, ARM::ADCrr}, - {ARM::ADCSrs, ARM::ADCrs}, - {ARM::SBCSri, ARM::SBCri}, - {ARM::SBCSrr, ARM::SBCrr}, - {ARM::SBCSrs, ARM::SBCrs}, - {ARM::RSBSri, ARM::RSBri}, - {ARM::RSBSrr, ARM::RSBrr}, - {ARM::RSBSrs, ARM::RSBrs}, - {ARM::RSCSri, ARM::RSCri}, - {ARM::RSCSrs, ARM::RSCrs}, - {ARM::t2ADCSri, ARM::t2ADCri}, - {ARM::t2ADCSrr, ARM::t2ADCrr}, - {ARM::t2ADCSrs, ARM::t2ADCrs}, - {ARM::t2SBCSri, ARM::t2SBCri}, - {ARM::t2SBCSrr, ARM::t2SBCrr}, - {ARM::t2SBCSrs, ARM::t2SBCrs}, - {ARM::t2RSBSri, ARM::t2RSBri}, - {ARM::t2RSBSrs, ARM::t2RSBrs}, -}; + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + unsigned vallo = MI->getOperand(3).getReg(); + unsigned valhi = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); -// Convert and Add or Subtract with Carry and Flags to a generic opcode with -// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). -// -// FIXME: Somewhere we should assert that CPSR<def> is in the correct -// position to be recognized by the target descrition as the 'S' bit. -bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned OldOpc = MI->getOpcode(); - unsigned NewOpc = 0; + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); + MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); + MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + } - // This is only called for instructions that need remapping, so iterating over - // the tiny opcode table is not costly. - static const int NPairs = - sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); - for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], - *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { - if (OldOpc == Pair->PseudoOpc) { - NewOpc = Pair->MachineOpc; - break; + unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; + unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *contBB = 0, *cont2BB = 0; + if (IsCmpxchg) { + contBB = MF->CreateMachineBasicBlock(LLVM_BB); + cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); + } + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + if (IsCmpxchg) { + MF->insert(It, contBB); + MF->insert(It, cont2BB); + } + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + TargetRegisterClass *TRC = + isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + unsigned storesuccess = MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrexd r2, r3, ptr + // <binopa> r0, r2, incr + // <binopb> r1, r3, incr + // strexd storesuccess, r0, r1, ptr + // cmp storesuccess, #0 + // bne- loopMBB + // fallthrough --> exitMBB + // + // Note that the registers are explicitly specified because there is not any + // way to force the register allocator to allocate a register pair. + // + // FIXME: The hardcoded registers are not necessary for Thumb2, but we + // need to properly enforce the restriction that the two output registers + // for ldrexd must be different. + BB = loopMBB; + // Load + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(ARM::R2, RegState::Define) + .addReg(ARM::R3, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); + + if (IsCmpxchg) { + // Add early exit + for (unsigned i = 0; i < 2; i++) { + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : + ARM::CMPrr)) + .addReg(i == 0 ? destlo : desthi) + .addReg(i == 0 ? vallo : valhi)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(i == 0 ? contBB : cont2BB); + BB = (i == 0 ? contBB : cont2BB); } + + // Copy to physregs for strexd + unsigned setlo = MI->getOperand(5).getReg(); + unsigned sethi = MI->getOperand(6).getReg(); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + } else if (Op1) { + // Perform binary operation + AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) + .addReg(destlo).addReg(vallo)) + .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) + .addReg(desthi).addReg(valhi)).addReg(0); + } else { + // Copy to physregs for strexd + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); } - if (!NewOpc) - return false; + // Store + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) + .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); + // Cmp+jump + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(storesuccess).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +/// EmitBasePointerRecalculation - For functions using a base pointer, we +/// rematerialize it (via the frame pointer). +void ARMTargetLowering:: +EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + MachineFunction &MF = *MI->getParent()->getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + + if (!RI.hasBasePointer(MF)) return; + + MachineBasicBlock::iterator MBBI = MI; + + int32_t NumBytes = AFI->getFramePtrSpillOffset(); + unsigned FramePtr = RI.getFrameRegister(MF); + assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + "Base pointer without frame pointer?"); + + if (AFI->isThumb2Function()) + llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + else if (AFI->isThumbFunction()) + llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *AII, RI); + else + llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + + if (!RI.needsStackRealignment(MF)) return; + + // If there's dynamic realignment, adjust for it. + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + assert(!AFI->isThumb1OnlyFunction()); + + // Emit bic r6, r6, MaxAlign + unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; + AddDefaultCC( + AddDefaultPred( + BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign - 1))); +} + +/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and +/// registers the function context. +void ARMTargetLowering:: +SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - AddDefaultPred(MIB); - MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + const Function *F = MF->getFunction(); + + bool isThumb = Subtarget->isThumb(); + bool isThumb2 = Subtarget->isThumb2(); + + unsigned PCLabelId = AFI->createPICLabelUId(); + unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; + ARMConstantPoolValue *CPV = + ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); + + const TargetRegisterClass *TRC = + isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Grab constant pool and fixed stack memory operands. + MachineMemOperand *CPMMO = + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), + MachineMemOperand::MOLoad, 4, 4); + + MachineMemOperand *FIMMOSt = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, 4, 4); + + EmitBasePointerRecalculation(MI, MBB, DispatchBB); + + // Load the address of the dispatch MBB into the jump buffer. + if (isThumb2) { + // Incoming value: jbuf + // ldr.n r5, LCPI1_1 + // orr r5, r5, #1 + // add r5, pc + // str r5, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + // Set the low bit because of thumb mode. + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(0x01))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addImm(PCLabelId); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) + .addReg(NewVReg3, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } else if (isThumb) { + // Incoming value: jbuf + // ldr.n r1, LCPI1_4 + // add r1, pc + // mov r2, #1 + // orrs r1, r2 + // add r2, $jbuf, #+4 ; &jbuf[1] + // str r1, [r2] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId); + // Set the low bit because of thumb mode. + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) + .addReg(ARM::CPSR, RegState::Define) + .addImm(1)); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3, RegState::Kill)); + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) + .addFrameIndex(FI) + .addImm(36)); // &jbuf[1] :: pc + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg5, RegState::Kill) + .addImm(0) + .addMemOperand(FIMMOSt)); + } else { + // Incoming value: jbuf + // ldr r1, LCPI1_1 + // add r1, pc, r1 + // str r1, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) + .addConstantPoolIndex(CPI) + .addImm(0) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId)); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) + .addReg(NewVReg2, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } +} + +MachineBasicBlock *ARMTargetLowering:: +EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + int FI = MFI->getFunctionContextIndex(); + + const TargetRegisterClass *TRC = + Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; + unsigned MaxCSNum = 0; + MachineModuleInfo &MMI = MF->getMMI(); + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + if (!BB->isLandingPad()) continue; + + // FIXME: We should assert that the EH_LABEL is the first MI in the landing + // pad. + for (MachineBasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) { + if (!II->isEHLabel()) continue; + + MCSymbol *Sym = II->getOperand(0).getMCSymbol(); + if (!MMI.hasCallSiteLandingPad(Sym)) continue; + + SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); + for (SmallVectorImpl<unsigned>::iterator + CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); + CSI != CSE; ++CSI) { + CallSiteNumToLPad[*CSI].push_back(BB); + MaxCSNum = std::max(MaxCSNum, *CSI); + } + break; + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector<MachineBasicBlock*> LPadList; + SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + for (unsigned I = 1; I <= MaxCSNum; ++I) { + SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; + for (SmallVectorImpl<MachineBasicBlock*>::iterator + II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { + LPadList.push_back(*II); + InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // Create the jump table and associated information. + MachineJumpTableInfo *JTI = + MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + unsigned UId = AFI->createJumpTableUId(); + + // Create the MBBs for the dispatch code. + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsLandingPad(); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert and renumber MBBs. + MachineBasicBlock *Last = &MF->back(); + MF->insert(MF->end(), DispatchBB); + MF->insert(MF->end(), DispContBB); + MF->insert(MF->end(), TrapBB); + MF->RenumberBlocks(Last); + + // Insert code into the entry block that creates and registers the function + // context. + SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); + + MachineMemOperand *FIMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad | + MachineMemOperand::MOVolatile, 4, 4); + + if (Subtarget->isThumb2()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + + BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg1) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else if (Subtarget->isThumb()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) + .addFrameIndex(FI) + .addImm(1) + .addMemOperand(FIMMOLd)); + + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg1) + .addImm(2)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) + .addReg(NewVReg4, RegState::Kill) + .addImm(0) + .addMemOperand(JTMMOLd)); + + unsigned NewVReg6 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3)); + + BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) + .addReg(NewVReg6, RegState::Kill) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3) + .addImm(0) + .addMemOperand(JTMMOLd)); + + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId); + } + + // Add the jump table entries as successors to the MBB. + MachineBasicBlock *PrevMBB = 0; + for (std::vector<MachineBasicBlock*>::iterator + I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { + MachineBasicBlock *CurMBB = *I; + if (PrevMBB != CurMBB) + DispContBB->addSuccessor(CurMBB); + PrevMBB = CurMBB; + } + + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + for (SmallPtrSet<MachineBasicBlock*, 64>::iterator + I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { + MachineBasicBlock *BB = *I; + + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + for (MachineBasicBlock::succ_iterator + SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SMBB = *SI; + if (SMBB->isLandingPad()) { + BB->removeSuccessor(SMBB); + SMBB->setIsLandingPad(false); + } + } + + BB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (MachineBasicBlock::reverse_iterator + II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { + if (!II->getDesc().isCall()) continue; + + DenseMap<unsigned, bool> DefRegs; + for (MachineInstr::mop_iterator + OI = II->operands_begin(), OE = II->operands_end(); + OI != OE; ++OI) { + if (!OI->isReg()) continue; + DefRegs[OI->getReg()] = true; + } + + MachineInstrBuilder MIB(&*II); + + for (unsigned i = 0; SavedRegs[i] != 0; ++i) { + if (!TRC->contains(SavedRegs[i])) continue; + if (!DefRegs[SavedRegs[i]]) + MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // The instruction is gone now. MI->eraseFromParent(); - return true; + + return MBB; +} + +static +MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I != Succ) + return *I; + llvm_unreachable("Expecting a BB with two successors!"); } MachineBasicBlock * @@ -5286,12 +5962,61 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: { - if (RemapAddSubWithFlags(MI, BB)) - return BB; - MI->dump(); llvm_unreachable("Unexpected instr type to insert"); } + // The Thumb2 pre-indexed stores have the same MI operands, they just + // define them differently in the .td files from the isel patterns, so + // they need pseudos. + case ARM::t2STR_preidx: + MI->setDesc(TII->get(ARM::t2STR_PRE)); + return BB; + case ARM::t2STRB_preidx: + MI->setDesc(TII->get(ARM::t2STRB_PRE)); + return BB; + case ARM::t2STRH_preidx: + MI->setDesc(TII->get(ARM::t2STRH_PRE)); + return BB; + + case ARM::STRi_preidx: + case ARM::STRBi_preidx: { + unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? + ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; + // Decode the offset. + unsigned Offset = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; + Offset = ARM_AM::getAM2Offset(Offset); + if (isSub) + Offset = -Offset; + + MachineMemOperand *MMO = *MI->memoperands_begin(); + BuildMI(*BB, MI, dl, TII->get(NewOpc)) + .addOperand(MI->getOperand(0)) // Rn_wb + .addOperand(MI->getOperand(1)) // Rt + .addOperand(MI->getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .addOperand(MI->getOperand(5)) // pred + .addOperand(MI->getOperand(6)) + .addMemOperand(MMO); + MI->eraseFromParent(); + return BB; + } + case ARM::STRr_preidx: + case ARM::STRBr_preidx: + case ARM::STRH_preidx: { + unsigned NewOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; + case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; + case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; + } + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + MI->eraseFromParent(); + return BB; + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -5370,6 +6095,31 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + + case ARM::ATOMADD6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, + isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, + /*NeedsCarry*/ true); + case ARM::ATOMSUB6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true); + case ARM::ATOMOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, + isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMXOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, + isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMAND6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, + isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMSWAP6432: + return EmitAtomicBinary64(MI, BB, 0, 0, false); + case ARM::ATOMCMPXCHG6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -5461,13 +6211,159 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) - .addMBB(exitMBB); + if (isThumb2) + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); + else + BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } + + case ARM::ABS: + case ARM::t2ABS: { + // To insert an ABS instruction, we have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // source vreg to test against 0, the destination vreg to set, + // the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + // It transforms + // V1 = ABS V0 + // into + // V2 = MOVS V0 + // BCC (branch to SinkBB if V0 >= 0) + // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) + // SinkBB: V1 = PHI(V2, V3) + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator BBI = BB; + ++BBI; + MachineFunction *Fn = BB->getParent(); + MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); + Fn->insert(BBI, RSBBB); + Fn->insert(BBI, SinkBB); + + unsigned int ABSSrcReg = MI->getOperand(1).getReg(); + unsigned int ABSDstReg = MI->getOperand(0).getReg(); + bool isThumb2 = Subtarget->isThumb2(); + MachineRegisterInfo &MRI = Fn->getRegInfo(); + // In Thumb mode S must not be specified if source register is the SP or + // PC and if destination register is the SP, so restrict register class + unsigned NewMovDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + unsigned NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + SinkBB->splice(SinkBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(RSBBB); + BB->addSuccessor(SinkBB); + + // fall through to SinkMBB + RSBBB->addSuccessor(SinkBB); + + // insert a movs at the end of BB + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), + NewMovDstReg) + .addReg(ABSSrcReg, RegState::Kill) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::CPSR, RegState::Define); + + // insert a bcc with opposite CC to ARMCC::MI at the end of BB + BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) + .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); + + // insert rsbri in RSBBB + // Note: BCC and rsbri will be converted into predicated rsbmi + // by if-conversion pass + BuildMI(*RSBBB, RSBBB->begin(), dl, + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(NewMovDstReg, RegState::Kill) + .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + + // insert PHI in SinkBB, + // reuse ABSDstReg to not change uses of ABS instruction + BuildMI(*SinkBB, SinkBB->begin(), dl, + TII->get(ARM::PHI), ABSDstReg) + .addReg(NewRsbDstReg).addMBB(RSBBB) + .addReg(NewMovDstReg).addMBB(BB); + + // remove ABS instruction + MI->eraseFromParent(); + + // return last added BB + return SinkBB; + } + } +} + +void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.hasPostISelHook()) { + assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && + "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); + return; + } + + // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, + // RSC. Coming out of isel, they have an implicit CPSR def, but the optional + // operand is still set to noreg. If needed, set the optional operand's + // register to CPSR, and remove the redundant implicit def. + // + // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>). + + // Rename pseudo opcodes. + unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); + if (NewOpc) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); + MI->setDesc(TII->get(NewOpc)); + } + unsigned ccOutIdx = MCID.getNumOperands() - 1; + + // Any ARM instruction that sets the 's' bit should specify an optional + // "cc_out" operand in the last operand position. + if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it + // since we already have an optional CPSR def. + bool definesCPSR = false; + bool deadCPSR = false; + for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { + definesCPSR = true; + if (MO.isDead()) + deadCPSR = true; + MI->RemoveOperand(i); + break; + } + } + if (!definesCPSR) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); + if (deadCPSR) { + assert(!MI->getOperand(ccOutIdx).getReg() && + "expect uninitialized optional cc_out operand"); + return; } + + // If this instruction was defined with an optional CPSR def and its dag node + // had a live implicit CPSR def, then activate the optional CPSR def. + MachineOperand &MO = MI->getOperand(ccOutIdx); + MO.setReg(ARM::CPSR); + MO.setIsDef(true); } //===----------------------------------------------------------------------===// @@ -6975,7 +7871,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); // Simplify // mov r1, r0 @@ -6995,7 +7892,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; - if (CC == ARMCC::NE && FalseVal == RHS) { + if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { @@ -7235,7 +8132,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -7351,7 +8248,8 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, if (Ptr->getOpcode() == ISD::ADD) { isInc = true; - ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + ARM_AM::ShiftOpc ShOpcVal= + ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); @@ -7536,7 +8434,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } @@ -7559,6 +8457,9 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. + case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { @@ -7582,7 +8483,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: @@ -7618,7 +8519,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, ARM::GPRRegisterClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) - return RCPair(0U, ARM::hGPRRegisterClass); + return RCPair(0U, ARM::hGPRRegisterClass); break; case 'r': return RCPair(0U, ARM::GPRRegisterClass); @@ -7632,15 +8533,15 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, break; case 'x': if (VT == MVT::f32) - return RCPair(0U, ARM::SPR_8RegisterClass); + return RCPair(0U, ARM::SPR_8RegisterClass); if (VT.getSizeInBits() == 64) - return RCPair(0U, ARM::DPR_8RegisterClass); + return RCPair(0U, ARM::DPR_8RegisterClass); if (VT.getSizeInBits() == 128) - return RCPair(0U, ARM::QPR_8RegisterClass); + return RCPair(0U, ARM::QPR_8RegisterClass); break; case 't': if (VT == MVT::f32) - return RCPair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7680,12 +8581,12 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (ConstraintLetter) { case 'j': - // Constant suitable for movw, must be between 0 and - // 65535. - if (Subtarget->hasV6T2Ops()) - if (CVal >= 0 && CVal <= 65535) - break; - return; + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD @@ -7823,50 +8724,6 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } -int ARM::getVFPf32Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; - int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 - int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0x7ffff) - return -1; - Mantissa >>= 19; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -int ARM::getVFPf64Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; - int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 - uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0xffffffffffffLL) - return -1; - Mantissa >>= 48; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return 0; @@ -7889,9 +8746,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f32) - return ARM::getVFPf32Imm(Imm) != -1; + return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64) - return ARM::getVFPf64Imm(Imm) != -1; + return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -7933,7 +8790,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - const Type *ArgTy = I.getArgOperand(ArgI)->getType(); + Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 980fb40..5da9b27 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -71,6 +71,11 @@ namespace llvm { SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + ADDC, // Add with carry + ADDE, // Add using carry + SUBC, // Sub with carry + SUBE, // Sub using carry + VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. @@ -206,18 +211,22 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD + VST4LN_UPD, + + // 64-bit atomic ops (value split into two registers) + ATOMADD64_DAG, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + ATOMSWAP64_DAG, + ATOMCMPXCHG64_DAG }; } /// Define some predicates that are used for node matching. namespace ARM { - /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be - /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) - /// instruction, returns its 8-bit integer representation. Otherwise, - /// returns -1. - int getVFPf32Imm(const APFloat &FPImm); - int getVFPf64Imm(const APFloat &FPImm); bool isBitFieldInvertedMask(unsigned v); } @@ -240,10 +249,16 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + /// getSetCCResultType - Return the value type to use for ISD::SETCC. + virtual EVT getSetCCResultType(EVT VT) const; + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual void + AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -256,7 +271,7 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -485,12 +500,28 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Op1, + unsigned Op2, + bool NeedsCarry = false, + bool IsCmpxchg = false) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB) const; + + void SetupEntryBlockForSjLj(MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const; + + MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr *MI, + MachineBasicBlock *MBB) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 3ccf22f..7cbc911 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -25,7 +25,7 @@ def BrFrm : Format<2>; def BrMiscFrm : Format<3>; def DPFrm : Format<4>; -def DPSoRegFrm : Format<5>; +def DPSoRegRegFrm : Format<5>; def LdFrm : Format<6>; def StFrm : Format<7>; @@ -68,6 +68,7 @@ def N3RegVShFrm : Format<38>; def NVExtFrm : Format<39>; def NVMulSLFrm : Format<40>; def NVTBLFrm : Format<41>; +def DPSoRegImmFrm : Format<42>; // Misc flags. @@ -130,39 +131,15 @@ def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 // ARM special operands. // -def CondCodeOperand : AsmOperandClass { - let Name = "CondCode"; - let SuperClasses = []; -} - -def CCOutOperand : AsmOperandClass { - let Name = "CCOut"; - let SuperClasses = []; -} - -def MemBarrierOptOperand : AsmOperandClass { - let Name = "MemBarrierOpt"; - let SuperClasses = []; - let ParserMethod = "tryParseMemBarrierOptOperand"; -} - -def ProcIFlagsOperand : AsmOperandClass { - let Name = "ProcIFlags"; - let SuperClasses = []; - let ParserMethod = "tryParseProcIFlagsOperand"; -} - -def MSRMaskOperand : AsmOperandClass { - let Name = "MSRMask"; - let SuperClasses = []; - let ParserMethod = "tryParseMSRMaskOperand"; -} - // ARM imod and iflag operands, used only by the CPS instruction. def imod_op : Operand<i32> { let PrintMethod = "printCPSIMod"; } +def ProcIFlagsOperand : AsmOperandClass { + let Name = "ProcIFlags"; + let ParserMethod = "parseProcIFlagsOperand"; +} def iflags_op : Operand<i32> { let PrintMethod = "printCPSIFlag"; let ParserMatchClass = ProcIFlagsOperand; @@ -170,17 +147,21 @@ def iflags_op : Operand<i32> { // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). -def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), +def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; } +def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm), (ops (i32 14), (i32 zero_reg))> { let PrintMethod = "printPredicateOperand"; let ParserMatchClass = CondCodeOperand; + let DecoderMethod = "DecodePredicateOperand"; } // Conditional code result for instructions whose 's' bit is set, e.g. subs. +def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; } // Same as cc_out except it defaults to setting CPSR. @@ -188,16 +169,27 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; } // ARM special operands for disassembly only. // +def SetEndAsmOperand : AsmOperandClass { + let Name = "SetEndImm"; + let ParserMethod = "parseSetEndImm"; +} def setend_op : Operand<i32> { let PrintMethod = "printSetendOperand"; + let ParserMatchClass = SetEndAsmOperand; } +def MSRMaskOperand : AsmOperandClass { + let Name = "MSRMask"; + let ParserMethod = "parseMSRMaskOperand"; +} def msr_mask : Operand<i32> { let PrintMethod = "printMSRMaskOperand"; + let DecoderMethod = "DecodeMSRMask"; let ParserMatchClass = MSRMaskOperand; } @@ -211,21 +203,40 @@ def msr_mask : Operand<i32> { // 64 64 - <imm> is encoded in imm6<5:0> def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; + let DecoderMethod = "DecodeShiftRight8Imm"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; + let DecoderMethod = "DecodeShiftRight16Imm"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; + let DecoderMethod = "DecodeShiftRight32Imm"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; + let DecoderMethod = "DecodeShiftRight64Imm"; } //===----------------------------------------------------------------------===// +// ARM Assembler alias templates. +// +class ARMInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsARM]>; +class tInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsThumb]>; +class t2InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>; +class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; +class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; + +//===----------------------------------------------------------------------===// // ARM Instruction templates. // + class InstTemplate<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : Instruction { @@ -240,17 +251,22 @@ class InstTemplate<AddrMode am, int sz, IndexMode im, Domain D = d; bit isUnaryDataProc = 0; bit canXformTo16Bit = 0; + // The instruction is a 16-bit flag setting Thumb instruction. Used + // by the parser to determine whether to require the 'S' suffix on the + // mnemonic (when not in an IT block) or preclude it (when in an IT block). + bit thumbArithFlagSetting = 0; // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); - // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h. + // The layout of TSFlags should be kept in sync with ARMBaseInfo.h. let TSFlags{4-0} = AM.Value; let TSFlags{6-5} = IndexModeBits; let TSFlags{12-7} = Form; let TSFlags{13} = isUnaryDataProc; let TSFlags{14} = canXformTo16Bit; let TSFlags{17-15} = D.Value; + let TSFlags{18} = thumbArithFlagSetting; let Constraints = cstr; let Itinerary = itin; @@ -262,13 +278,17 @@ class Encoding { class InstARM<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> - : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding; + : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding { + let DecoderNamespace = "ARM"; +} // This Encoding-less class is used by Thumb1 to specify the encoding bits later // on by adding flavors to specific instructions. class InstThumb<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> - : InstTemplate<am, sz, im, f, d, cstr, itin>; + : InstTemplate<am, sz, im, f, d, cstr, itin> { + let DecoderNamespace = "Thumb"; +} class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, @@ -426,11 +446,11 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { bits<4> Rt; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 1; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; let Inst{11-0} = 0b111110011111; } @@ -450,14 +470,14 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{3-0} = Rt; } class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> - : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> { + : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> { bits<4> Rt; bits<4> Rt2; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00010; let Inst{22} = b; let Inst{21-20} = 0b00; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; @@ -515,22 +535,41 @@ class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops, let Inst{20} = isLd; // L bit let Inst{15-12} = Rt; } -class AI2stridx<bit isByte, bit isPre, dag oops, dag iops, +class AI2stridx_reg<bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; +} + +class AI2stridx_imm<bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, pattern> { // AM2 store w/ two operands: (GPR, am2offset) - // {13} 1 == Rm, 0 == imm12 // {12} isAdd // {11-0} imm12/Rm bits<14> offset; bits<4> Rn; - let Inst{25} = offset{13}; + let Inst{25} = 0; let Inst{23} = offset{12}; let Inst{19-16} = Rn; let Inst{11-0} = offset{11-0}; } + + // FIXME: Merge with the above class when addrmode2 gets used for STR, STRB // but for now use this class for STRT and STRBT. class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops, @@ -568,9 +607,11 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{7-4} = op; let Inst{3-0} = addr{3-0}; // imm3_0/Rm + + let DecoderMethod = "DecodeAddrMode3Instruction"; } -class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, +class AI3ldstidx<bits<4> op, bit op20, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : I<oops, iops, AddrMode3, 4, im, f, itin, @@ -586,48 +627,24 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, // FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB // but for now use this class for LDRSBT, LDRHT, LDSHT. -class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, +class AI3ldstidxT<bits<4> op, bit isLoad, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, im, f, itin, - opc, asm, cstr, pattern> { + : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> { // {13} 1 == imm8, 0 == Rm // {12-9} Rn // {8} isAdd // {7-4} imm7_4/zero // {3-0} imm3_0/Rm - bits<14> addr; - bits<4> Rt; - let Inst{27-25} = 0b000; - let Inst{24} = isPre; // P bit - let Inst{23} = addr{8}; // U bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{20} = op20; // L bit - let Inst{19-16} = addr{12-9}; // Rn - let Inst{15-12} = Rt; // Rt - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{7-4} = op; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3"; -} - -class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, - IndexMode im, Format f, InstrItinClass itin, string opc, - string asm, string cstr, list<dag> pattern> - : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, - pattern> { - // AM3 store w/ two operands: (GPR, am3offset) - bits<14> offset; + bits<4> addr; bits<4> Rt; - bits<4> Rn; let Inst{27-25} = 0b000; - let Inst{23} = offset{8}; - let Inst{22} = offset{9}; - let Inst{19-16} = Rn; + let Inst{24} = 0; // P bit + let Inst{21} = 1; + let Inst{20} = isLoad; // L bit + let Inst{19-16} = addr; // Rn let Inst{15-12} = Rt; // Rt - let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{7-4} = op; - let Inst{3-0} = offset{3-0}; // imm3_0/Rm } // stores @@ -648,75 +665,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin, let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{7-4} = op; let Inst{3-0} = addr{3-0}; // imm3_0/Rm -} - -// Pre-indexed stores -class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} - -// Post-indexed stores -class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - // {13} 1 == imm8, 0 == Rm - // {12-9} Rn - // {8} isAdd - // {7-4} imm7_4/zero - // {3-0} imm3_0/Rm - bits<14> addr; - bits<4> Rt; - let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{11-8} = addr{7-4}; // imm7_4/zero - let Inst{15-12} = Rt; // Rt - let Inst{19-16} = addr{12-9}; // Rn - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm - let Inst{23} = addr{8}; // U bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} -class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; + let DecoderMethod = "DecodeAddrMode3Instruction"; } // addrmode4 instructions @@ -843,6 +792,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, } // PKH instructions +def PKHLSLAsmOperand : AsmOperandClass { + let Name = "PKHLSLImm"; + let ParserMethod = "parsePKHLSLImm"; +} +def pkh_lsl_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]>{ + let PrintMethod = "printPKHLSLShiftImm"; + let ParserMatchClass = PKHLSLAsmOperand; +} +def PKHASRAsmOperand : AsmOperandClass { + let Name = "PKHASRImm"; + let ParserMethod = "parsePKHASRImm"; +} +def pkh_asr_amt: Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]>{ + let PrintMethod = "printPKHASRShiftImm"; + let ParserMatchClass = PKHASRAsmOperand; +} + class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, @@ -850,11 +816,11 @@ class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, bits<4> Rd; bits<4> Rn; bits<4> Rm; - bits<8> sh; + bits<5> sh; let Inst{27-20} = opcod; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; + let Inst{11-7} = sh; let Inst{6} = tb; let Inst{5-4} = 0b01; let Inst{3-0} = Rm; @@ -949,7 +915,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, int sz, let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; + let thumbArithFlagSetting = 1; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "ThumbSBit"; } class T1sI<dag oops, dag iops, InstrItinClass itin, @@ -1071,6 +1039,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an @@ -1091,6 +1060,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } // Special cases @@ -1103,6 +1073,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; } class ThumbXI<dag oops, dag iops, AddrMode am, int sz, @@ -1114,6 +1085,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "Thumb"; } class T2I<dag oops, dag iops, InstrItinClass itin, @@ -1132,8 +1104,8 @@ class T2Ipc<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>; class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "", + string opc, string asm, string cstr, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr, pattern> { bits<4> Rt; bits<4> Rt2; @@ -1149,6 +1121,26 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rt2{3-0}; let Inst{7-0} = addr{7-0}; } +class T2Ii8s4post<bit P, bit W, bit isLoad, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, string cstr, + list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, cstr, + pattern> { + bits<4> Rt; + bits<4> Rt2; + bits<4> addr; + bits<9> imm; + let Inst{31-25} = 0b1110100; + let Inst{24} = P; + let Inst{23} = imm{8}; + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = isLoad; + let Inst{19-16} = addr; + let Inst{15-12} = Rt{3-0}; + let Inst{11-8} = Rt2{3-0}; + let Inst{7-0} = imm{7-0}; +} class T2sI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1172,8 +1164,8 @@ class T2XIt<dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>; -// T2Iidxldst - Thumb2 indexed load / store instructions. -class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, +// T2Ipreldst - Thumb2 pre-indexed load / store instructions. +class T2Ipreldst<bit signed, bits<2> opcod, bit load, bit pre, dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -1183,25 +1175,60 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; + + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = opcod; let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt{3-0}; let Inst{11} = 1; // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed let Inst{10} = pre; // The P bit. + let Inst{9} = addr{8}; // Sign bit let Inst{8} = 1; // The W bit. + let Inst{7-0} = addr{7-0}; - bits<9> addr; - let Inst{7-0} = addr{7-0}; - let Inst{9} = addr{8}; // Sign bit + let DecoderMethod = "DecodeT2LdStPre"; +} + +// T2Ipostldst - Thumb2 post-indexed load / store instructions. +class T2Ipostldst<bit signed, bits<2> opcod, bit load, bit pre, + dag oops, dag iops, + AddrMode am, IndexMode im, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; bits<4> Rt; bits<4> Rn; + bits<9> offset; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = load; + let Inst{19-16} = Rn; let Inst{15-12} = Rt{3-0}; - let Inst{19-16} = Rn{3-0}; + let Inst{11} = 1; + // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed + let Inst{10} = pre; // The P bit. + let Inst{9} = offset{8}; // Sign bit + let Inst{8} = 1; // The W bit. + let Inst{7-0} = offset{7-0}; + + let DecoderMethod = "DecodeT2LdStPre"; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. @@ -1242,6 +1269,7 @@ class VFPI<dag oops, dag iops, AddrMode am, int sz, let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; list<Predicate> Predicates = [HasVFP2]; } @@ -1257,6 +1285,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz, let AsmString = asm; let Pattern = pattern; let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; list<Predicate> Predicates = [HasVFP2]; } @@ -1574,6 +1603,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; } // Same as NeonI except it does not have a "data type" specifier. @@ -1586,6 +1616,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let AsmString = !strconcat(opc, "${p}", "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; } class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, @@ -1600,6 +1631,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, let Inst{7-4} = op7_4; let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder"; + let DecoderNamespace = "NEONLoadStore"; bits<5> Vd; bits<6> Rn; @@ -1643,6 +1675,7 @@ class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, pattern> { let Inst{31-25} = 0b1111001; let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; } class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1651,6 +1684,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, cstr, pattern> { let Inst{31-25} = 0b1111001; let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; } // NEON "one register and a modified immediate" format. @@ -1677,6 +1711,7 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, let Inst{24} = SIMM{7}; let Inst{18-16} = SIMM{6-4}; let Inst{3-0} = SIMM{3-0}; + let DecoderMethod = "DecodeNEONModImmInstruction"; } // NEON 2 vector register format. @@ -1874,6 +1909,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, list<Predicate> Predicates = [HasNEON]; let PostEncoderMethod = "NEONThumb2DupPostEncoder"; + let DecoderNamespace = "NEONDup"; bits<5> V; bits<4> R; @@ -1915,7 +1951,6 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, bits<5> Vd; bits<5> Vm; - bits<4> lane; let Inst{22} = Vd{4}; let Inst{15-12} = Vd{3-0}; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index adcbf18..48da03f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,8 +13,8 @@ #include "ARMInstrInfo.h" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -30,14 +30,18 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; - case ARM::LDR_PRE: - case ARM::LDR_POST: + case ARM::LDR_PRE_IMM: + case ARM::LDR_PRE_REG: + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: return ARM::LDRi12; case ARM::LDRH_PRE: case ARM::LDRH_POST: return ARM::LDRH; - case ARM::LDRB_PRE: - case ARM::LDRB_POST: + case ARM::LDRB_PRE_IMM: + case ARM::LDRB_PRE_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: return ARM::LDRBi12; case ARM::LDRSH_PRE: case ARM::LDRSH_POST: @@ -45,14 +49,18 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { case ARM::LDRSB_PRE: case ARM::LDRSB_POST: return ARM::LDRSB; - case ARM::STR_PRE: - case ARM::STR_POST: + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: return ARM::STRi12; case ARM::STRH_PRE: case ARM::STRH_POST: return ARM::STRH; - case ARM::STRB_PRE: - case ARM::STRB_POST: + case ARM::STRB_PRE_IMM: + case ARM::STRB_PRE_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: return ARM::STRBi12; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index a42dd1a..2cf0f09 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -70,6 +70,18 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -120,6 +132,12 @@ def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; +def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; +def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; +def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; + def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; @@ -187,10 +205,16 @@ def IsThumb : Predicate<"Subtarget->isThumb()">, def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate<"ModeThumb,FeatureThumb2">; +def IsMClass : Predicate<"Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass">; +def IsARClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"!FeatureMClass">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">, + AssemblerPredicate<"ModeNaCl">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -263,24 +287,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_65535AsmOperand; } +class BinOpWithFlagFrag<dag res> : + PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; -/// adde and sube predicates - True based on whether the carry flag output -/// will be needed or not. -def adde_dead_carry : - PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), - [{return !N->hasAnyUseOfValue(1);}]>; -def sube_dead_carry : - PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), - [{return !N->hasAnyUseOfValue(1);}]>; -def adde_live_carry : - PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), - [{return N->hasAnyUseOfValue(1);}]>; -def sube_live_carry : - PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), - [{return N->hasAnyUseOfValue(1);}]>; - // An 'and' node with a single use. def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ return N->hasOneUse(); @@ -315,6 +326,7 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeT2BROperand"; } // FIXME: get rid of this one? @@ -345,39 +357,35 @@ def bl_target : Operand<i32> { let OperandType = "OPERAND_PCREL"; } - -// A list of registers separated by comma. Used by load/store multiple. -def RegListAsmOperand : AsmOperandClass { - let Name = "RegList"; - let SuperClasses = []; -} - -def DPRRegListAsmOperand : AsmOperandClass { - let Name = "DPRRegList"; - let SuperClasses = []; -} - -def SPRRegListAsmOperand : AsmOperandClass { - let Name = "SPRRegList"; - let SuperClasses = []; +def blx_target : Operand<i32> { + // Encoded the same as branch targets. + let EncoderMethod = "getARMBLXTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } +// A list of registers separated by comma. Used by load/store multiple. +def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; } def reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = RegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeRegListOperand"; } +def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; } def dpr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = DPRRegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeDPRRegListOperand"; } +def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; } def spr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = SPRRegListAsmOperand; let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeSPRRegListOperand"; } // An operand for the CONSTPOOL_ENTRY pseudo-instruction. @@ -397,56 +405,99 @@ def adrlabel : Operand<i32> { def neon_vcvt_imm32 : Operand<i32> { let EncoderMethod = "getNEONVcvtImm32OpValue"; + let DecoderMethod = "DecodeVCVTImmOperand"; } // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm : Operand<i32>, ImmLeaf<i32, [{ - int32_t v = (int32_t)Imm; - return v == 8 || v == 16 || v == 24; }]> { - let EncoderMethod = "getRotImmOpValue"; +def rot_imm_XFORM: SDNodeXForm<imm, [{ + switch (N->getZExtValue()){ + default: assert(0); + case 0: return CurDAG->getTargetConstant(0, MVT::i32); + case 8: return CurDAG->getTargetConstant(1, MVT::i32); + case 16: return CurDAG->getTargetConstant(2, MVT::i32); + case 24: return CurDAG->getTargetConstant(3, MVT::i32); + } +}]>; +def RotImmAsmOperand : AsmOperandClass { + let Name = "RotImm"; + let ParserMethod = "parseRotImm"; } - -def ShifterAsmOperand : AsmOperandClass { - let Name = "Shifter"; - let SuperClasses = []; +def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{ + int32_t v = N->getZExtValue(); + return v == 8 || v == 16 || v == 24; }], + rot_imm_XFORM> { + let PrintMethod = "printRotImmOperand"; + let ParserMatchClass = RotImmAsmOperand; } // shift_imm: An integer that encodes a shift amount and the type of shift -// (currently either asr or lsl) using the same encoding used for the -// immediates in so_reg operands. +// (asr or lsl). The 6-bit immediate encodes as: +// {5} 0 ==> lsl +// 1 asr +// {4-0} imm5 shift amount. +// asr #32 encoded as imm5 == 0. +def ShifterImmAsmOperand : AsmOperandClass { + let Name = "ShifterImm"; + let ParserMethod = "parseShifterImm"; +} def shift_imm : Operand<i32> { let PrintMethod = "printShiftImmOperand"; - let ParserMatchClass = ShifterAsmOperand; + let ParserMatchClass = ShifterImmAsmOperand; } -def ShiftedRegAsmOperand : AsmOperandClass { - let Name = "ShiftedReg"; +// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm. +def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } +def so_reg_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectRegShifterOperand", + [shl, srl, sra, rotr]> { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; + let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm); } -// shifter_operand operands: so_reg and so_imm. -def so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShifterOperandReg", - [shl,srl,sra,rotr]> { - let EncoderMethod = "getSORegOpValue"; - let PrintMethod = "printSORegOperand"; - let ParserMatchClass = ShiftedRegAsmOperand; - let MIOperandInfo = (ops GPR, GPR, shift_imm); +def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; } +def so_reg_imm : Operand<i32>, // reg imm + ComplexPattern<i32, 2, "SelectImmShifterOperand", + [shl, srl, sra, rotr]> { + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// FIXME: Does this need to be distinct from so_reg? +def shift_so_reg_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectShiftRegShifterOperand", + [shl,srl,sra,rotr]> { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); } + // FIXME: Does this need to be distinct from so_reg? -def shift_so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", +def shift_so_reg_imm : Operand<i32>, // reg reg imm + ComplexPattern<i32, 2, "SelectShiftImmShifterOperand", [shl,srl,sra,rotr]> { - let EncoderMethod = "getSORegOpValue"; - let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, shift_imm); + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let MIOperandInfo = (ops GPR, i32imm); } + // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. +def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; } def so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { let EncoderMethod = "getSOImmOpValue"; + let ParserMatchClass = SOImmAsmOperand; + let DecoderMethod = "DecodeSOImmOperand"; } // Break so_imm's up into two pieces. This handles immediates with up to 16 @@ -464,7 +515,7 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; -/// imm0_7 predicate - Immediate in the range [0,31]. +/// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; @@ -472,7 +523,7 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_7AsmOperand; } -/// imm0_15 predicate - Immediate in the range [0,31]. +/// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; @@ -481,68 +532,83 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. +def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; -}]>; - -/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. -def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 32; }]> { - let EncoderMethod = "getImmMinusOneOpValue"; + let ParserMatchClass = Imm0_31AsmOperand; +} + +/// imm0_255 predicate - Immediate in the range [0,255]. +def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; } +def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { + let ParserMatchClass = Imm0_255AsmOperand; } -// i32imm_hilo16 - For movt/movw - sets the MC Encoder method. -// The imm is split into imm{15-12}, imm{11-0} +// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference +// a relocatable expression. // -def i32imm_hilo16 : Operand<i32> { +// FIXME: This really needs a Thumb version separate from the ARM version. +// While the range is the same, and can thus use the same match class, +// the encoding is different so it should have a different encoder method. +def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; } +def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; + let ParserMatchClass = Imm0_65535ExprAsmOperand; } +/// imm24b - True if the 32-bit immediate is encodable in 24 bits. +def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; } +def imm24b : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm <= 0xffffff; +}]> { + let ParserMatchClass = Imm24bitAsmOperand; +} + + /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield /// e.g., 0xf000ffff +def BitfieldAsmOperand : AsmOperandClass { + let Name = "Bitfield"; + let ParserMethod = "parseBitfield"; +} def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let EncoderMethod = "getBitfieldInvertedMaskOpValue"; let PrintMethod = "printBitfieldInvMaskImmOperand"; + let DecoderMethod = "DecodeBitfieldMaskOperand"; + let ParserMatchClass = BitfieldAsmOperand; } -/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p -def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{ - return isInt<5>(Imm); +def imm1_32_XFORM: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32); }]>; - -/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p -def width_imm : Operand<i32>, ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}] > { - let EncoderMethod = "getMsbOpValue"; -} - -def ssat_imm : Operand<i32>, ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}]> { - let EncoderMethod = "getSsatBitPosValue"; +def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; } +def imm1_32 : Operand<i32>, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; + }], + imm1_32_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_32AsmOperand; +} + +def imm1_16_XFORM: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, MVT::i32); +}]>; +def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; } +def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], + imm1_16_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_16AsmOperand; } // Define ARM specific addressing modes. - -def MemMode2AsmOperand : AsmOperandClass { - let Name = "MemMode2"; - let SuperClasses = []; - let ParserMethod = "tryParseMemMode2Operand"; -} - -def MemMode3AsmOperand : AsmOperandClass { - let Name = "MemMode3"; - let SuperClasses = []; - let ParserMethod = "tryParseMemMode3Operand"; -} - // addrmode_imm12 := reg +/- imm12 // +def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } def addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode @@ -551,53 +617,129 @@ def addrmode_imm12 : Operand<i32>, let EncoderMethod = "getAddrModeImm12OpValue"; let PrintMethod = "printAddrModeImm12Operand"; + let DecoderMethod = "DecodeAddrModeImm12Operand"; + let ParserMatchClass = MemImm12OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } // ldst_so_reg := reg +/- reg shop imm // +def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } def ldst_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectLdStSOReg", []> { let EncoderMethod = "getLdStSORegOpValue"; // FIXME: Simplify the printer let PrintMethod = "printAddrMode2Operand"; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); + let DecoderMethod = "DecodeSORegMemOperand"; + let ParserMatchClass = MemRegOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift); +} + +// postidx_imm8 := +/- [0,255] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value. +def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } +def postidx_imm8 : Operand<i32> { + let PrintMethod = "printPostIdxImm8Operand"; + let ParserMatchClass = PostIdxImm8AsmOperand; + let MIOperandInfo = (ops i32imm); } +// postidx_imm8s4 := +/- [0,1020] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value, scaled by 4. +def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } +def postidx_imm8s4 : Operand<i32> { + let PrintMethod = "printPostIdxImm8s4Operand"; + let ParserMatchClass = PostIdxImm8s4AsmOperand; + let MIOperandInfo = (ops i32imm); +} + + +// postidx_reg := +/- reg +// +def PostIdxRegAsmOperand : AsmOperandClass { + let Name = "PostIdxReg"; + let ParserMethod = "parsePostIdxReg"; +} +def postidx_reg : Operand<i32> { + let EncoderMethod = "getPostIdxRegOpValue"; + let DecoderMethod = "DecodePostIdxReg"; + let PrintMethod = "printPostIdxRegOperand"; + let ParserMatchClass = PostIdxRegAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + + // addrmode2 := reg +/- imm12 // := reg +/- reg shop imm // +// FIXME: addrmode2 should be refactored the rest of the way to always +// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). +def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } def addrmode2 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; - let ParserMatchClass = MemMode2AsmOperand; + let ParserMatchClass = AddrMode2AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } -def am2offset : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode2Offset", +def PostIdxRegShiftedAsmOperand : AsmOperandClass { + let Name = "PostIdxRegShifted"; + let ParserMethod = "parsePostIdxReg"; +} +def am2offset_reg : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg", + [], [SDNPWantRoot]> { + let EncoderMethod = "getAddrMode2OffsetOpValue"; + let PrintMethod = "printAddrMode2OffsetOperand"; + // When using this for assembly, it's always as a post-index offset. + let ParserMatchClass = PostIdxRegShiftedAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// FIXME: am2offset_imm should only need the immediate, not the GPR. Having +// the GPR is purely vestigal at this point. +def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } +def am2offset_imm : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; + let ParserMatchClass = AM2OffsetImmAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } + // addrmode3 := reg +/- reg // addrmode3 := reg +/- imm8 // +// FIXME: split into imm vs. reg versions. +def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } def addrmode3 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; - let ParserMatchClass = MemMode3AsmOperand; + let ParserMatchClass = AddrMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } +// FIXME: split into imm vs. reg versions. +// FIXME: parser method to handle +/- register. +def AM3OffsetAsmOperand : AsmOperandClass { + let Name = "AM3Offset"; + let ParserMethod = "parseAM3Offset"; +} def am3offset : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode3Offset", [], [SDNPWantRoot]> { let EncoderMethod = "getAddrMode3OffsetOpValue"; let PrintMethod = "printAddrMode3OffsetOperand"; + let ParserMatchClass = AM3OffsetAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } @@ -608,28 +750,28 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { let PrintMethod = "printLdStmModeOperand"; } -def MemMode5AsmOperand : AsmOperandClass { - let Name = "MemMode5"; - let SuperClasses = []; -} - // addrmode5 := reg +/- imm8*4 // +def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } def addrmode5 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode5", []> { let PrintMethod = "printAddrMode5Operand"; - let MIOperandInfo = (ops GPR:$base, i32imm); - let ParserMatchClass = MemMode5AsmOperand; let EncoderMethod = "getAddrMode5OpValue"; + let DecoderMethod = "DecodeAddrMode5Operand"; + let ParserMatchClass = AddrMode5AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); } // addrmode6 := reg with optional alignment // +def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } def addrmode6 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; + let ParserMatchClass = AddrMode6AsmOperand; } def am6offset : Operand<i32>, @@ -638,6 +780,7 @@ def am6offset : Operand<i32>, let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; + let DecoderMethod = "DecodeGPRRegisterClass"; } // Special version of addrmode6 to handle alignment encoding for VST1/VLD1 @@ -666,19 +809,15 @@ def addrmodepc : Operand<i32>, let MIOperandInfo = (ops GPR, i32imm); } -def MemMode7AsmOperand : AsmOperandClass { - let Name = "MemMode7"; - let SuperClasses = []; -} - -// addrmode7 := reg -// Used by load/store exclusive instructions. Useful to enable right assembly -// parsing and printing. Not used for any codegen matching. +// addr_offset_none := reg // -def addrmode7 : Operand<i32> { +def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } +def addr_offset_none : Operand<i32>, + ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> { let PrintMethod = "printAddrMode7Operand"; - let MIOperandInfo = (ops GPR); - let ParserMatchClass = MemMode7AsmOperand; + let DecoderMethod = "DecodeAddrMode7Operand"; + let ParserMatchClass = MemNoOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base); } def nohash_imm : Operand<i32> { @@ -687,25 +826,30 @@ def nohash_imm : Operand<i32> { def CoprocNumAsmOperand : AsmOperandClass { let Name = "CoprocNum"; - let SuperClasses = []; - let ParserMethod = "tryParseCoprocNumOperand"; -} - -def CoprocRegAsmOperand : AsmOperandClass { - let Name = "CoprocReg"; - let SuperClasses = []; - let ParserMethod = "tryParseCoprocRegOperand"; + let ParserMethod = "parseCoprocNumOperand"; } - def p_imm : Operand<i32> { let PrintMethod = "printPImmediate"; let ParserMatchClass = CoprocNumAsmOperand; + let DecoderMethod = "DecodeCoprocessor"; } +def CoprocRegAsmOperand : AsmOperandClass { + let Name = "CoprocReg"; + let ParserMethod = "parseCoprocRegOperand"; +} def c_imm : Operand<i32> { let PrintMethod = "printCImmediate"; let ParserMatchClass = CoprocRegAsmOperand; } +def CoprocOptionAsmOperand : AsmOperandClass { + let Name = "CoprocOption"; + let ParserMethod = "parseCoprocOptionOperand"; +} +def coproc_option_imm : Operand<i32> { + let PrintMethod = "printCoprocOptionImm"; + let ParserMatchClass = CoprocOptionAsmOperand; +} //===----------------------------------------------------------------------===// @@ -748,16 +892,37 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; } - def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } // Assembly aliases for optional destination operand when it's the same @@ -773,56 +938,172 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, - so_reg:$shift, pred:$p, + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + } -/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the -/// instruction modifies the CPSR register. -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass AI1_bin_s_irs<bits<4> opcod, string opc, +/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the AsI1_bin_irs counterpart. +multiclass AsI1_rbin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { - def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + PatFrag opnode, string baseOpc, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> { bits<4> Rd; bits<4> Rn; bits<12> imm; let Inst{25} = 1; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{11-0} = imm; } - def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + [/* pattern left blank */]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; - let isCommutable = Commutable; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; } - def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; - let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + +} + +/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { +multiclass AsI1_rbin_s_is<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; + + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [/* pattern left blank */]>; + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>; + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} +} + +/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { +multiclass AsI1_bin_s_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>; + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>; + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>; } } @@ -857,128 +1138,190 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; } - def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis, + def rsi : AI1<opcod, (outs), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, + opc, "\t$Rn, $shift", + [(opnode GPR:$Rn, so_reg_imm:$shift)]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AI1<opcod, (outs), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg:$shift)]> { + [(opnode GPR:$Rn, so_reg_reg:$shift)]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; let Inst{20} = 1; let Inst{19-16} = Rn; let Inst{15-12} = 0b0000; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } + } } /// AI_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. /// FIXME: Remove the 'r' variant. Its rot_imm is zero. -multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iEXTr, opc, "\t$Rd, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { - bits<4> Rd; - bits<4> Rm; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-10} = 0b00; - let Inst{3-0} = Rm; - } - def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", - [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { - bits<4> Rd; - bits<4> Rm; - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{3-0} = Rm; - } +class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", + [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{3-0} = Rm; } -multiclass AI_ext_rrot_np<bits<8> opcod, string opc> { - def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), - IIC_iEXTr, opc, "\t$Rd, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{19-16} = 0b1111; - let Inst{11-10} = 0b00; - } - def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - bits<2> rot; - let Inst{19-16} = 0b1111; - let Inst{11-10} = rot; - } +class AI_ext_rrot_np<bits<8> opcod, string opc> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>, + Requires<[IsARM, HasV6]> { + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{11-10} = rot; } /// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { +class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", + [(set GPRnopc:$Rd, (opnode GPR:$Rn, + (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{9-4} = 0b000111; + let Inst{3-0} = Rm; +} + +class AI_exta_rrot_np<bits<8> opcod, string opc> + : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>, + Requires<[IsARM, HasV6]> { + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{11-10} = rot; +} + +/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. +multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, + string baseOpc, bit Commutable = 0> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>, + Requires<[IsARM]> { bits<4> Rd; - bits<4> Rm; bits<4> Rn; - let Inst{19-16} = Rn; + bits<12> imm; + let Inst{25} = 1; let Inst{15-12} = Rd; - let Inst{11-10} = 0b00; - let Inst{9-4} = 0b000111; - let Inst{3-0} = Rm; - } - def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, - rot_imm:$rot), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [(set GPR:$Rd, (opnode GPR:$Rn, - (rotr GPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + let Inst{19-16} = Rn; + let Inst{11-0} = imm; + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>, + Requires<[IsARM]> { bits<4> Rd; + bits<4> Rn; bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let isCommutable = Commutable; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; bits<4> Rn; - bits<2> rot; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; let Inst{15-12} = Rd; - let Inst{11-10} = rot; - let Inst{9-4} = 0b000111; - let Inst{3-0} = Rm; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; } -} - -// For disassembly only. -multiclass AI_exta_rrot_np<bits<8> opcod, string opc> { - def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{11-10} = 0b00; - } - def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, - rot_imm:$rot), - IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; bits<4> Rn; - bits<2> rot; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; - let Inst{11-10} = rot; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; } -/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. -multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc, bit Commutable = 0> { - let Uses = [CPSR] in { +/// AI1_rsc_irs - Define instructions and patterns for rsc +multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode, + string baseOpc> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -990,31 +1333,48 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM]> { + [/* pattern left blank */]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; let Inst{11-4} = 0b00000000; let Inst{25} = 0; - let isCommutable = Commutable; let Inst{3-0} = Rm; let Inst{15-12} = Rd; let Inst{19-16} = Rn; } - def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, + def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; bits<12> shift; let Inst{25} = 0; - let Inst{11-0} = shift; + let Inst{19-16} = Rn; let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>, + Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } } + // Assembly aliases for optional destination operand when it's the same // as the source operand. def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), @@ -1028,28 +1388,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, - so_reg:$shift, pred:$p, + (!cast<Instruction>(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, + so_reg_imm:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; -} - -// Carry setting variants -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; - def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - 4, IIC_iALUr, - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { - let isCommutable = Commutable; - } - def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; -} } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -1082,6 +1429,37 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii, } } +let canFoldAsLoad = 1, isReMaterializable = 1 in { +multiclass AI_ldr1nopc<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", + [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} +} + + multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { // Note: We use the complex addrmode_imm12 rather than just an input @@ -1110,6 +1488,37 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, let Inst{11-0} = shift{11-0}; } } + +multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12 : AI2ldst<0b010, 0, isByte, (outs), + (ins GPRnopc:$Rt, addrmode_imm12:$addr), + AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", + [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPRnopc:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} + + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -1140,42 +1549,66 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", - [/* For disassembly only; pattern left blank */]>, +// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. +// (These psuedos use a hand-written selection code). +let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { +def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2), + NoItinerary, []>; +} + +def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000000; } -def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", - [/* For disassembly only; pattern left blank */]>, +def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000001; } -def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", - [/* For disassembly only; pattern left blank */]>, +def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000010; } -def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", - [/* For disassembly only; pattern left blank */]>, +def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000011; } -def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", - "\t$dst, $a, $b", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { +def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", + "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1188,8 +1621,7 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", } def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6T2]> { + []>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000100; @@ -1206,14 +1638,11 @@ def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{7-4} = 0b0111; } -// Change Processor State is a system instruction -- for disassembly and -// parsing only. -// FIXME: Since the asm parser has currently no clean way to handle optional -// operands, create 3 versions of the same instruction. Once there's a clean -// framework to represent optional operands, change this behavior. +// Change Processor State +// FIXME: We should use InstAlias to handle the optional operands. class CPS<dag iops, string asm_ops> : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops), - [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> { + []>, Requires<[IsARM]> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -1229,17 +1658,18 @@ class CPS<dag iops, string asm_ops> let Inst{4-0} = mode; } +let DecoderMethod = "DecodeCPSInstruction" in { let M = 1 in - def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), + def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode), "$imod\t$iflags, $mode">; let mode = 0, M = 0 in def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">; let imod = 0, iflags = 0, M = 1 in - def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">; + def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">; +} // Preload signals the memory system of possible future data/instruction access. -// These are for disassembly only. multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, @@ -1271,6 +1701,7 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = 0b1111; let Inst{11-0} = shift{11-0}; + let Inst{4} = 0; } } @@ -1278,10 +1709,8 @@ defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>; defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; -def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM]> { +def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, + "setend\t$end", []>, Requires<[IsARM]> { bits<1> end; let Inst{31-10} = 0b1111000100000001000000; let Inst{9} = end; @@ -1351,14 +1780,17 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), - MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { + MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> { bits<4> Rd; - bits<12> label; + bits<14> label; let Inst{27-25} = 0b001; + let Inst{24} = 0; + let Inst{23-22} = label{13-12}; + let Inst{21} = 0; let Inst{20} = 0; let Inst{19-16} = 0b1111; let Inst{15-12} = Rd; - let Inst{11-0} = label; + let Inst{11-0} = label{11-0}; } def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; @@ -1424,6 +1856,7 @@ let isCall = 1, let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; } def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), @@ -1432,6 +1865,7 @@ let isCall = 1, Requires<[IsARM, IsNotDarwin]> { bits<24> func; let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; } // ARMv5T and above @@ -1516,6 +1950,7 @@ let isBranch = 1, isTerminator = 1 in { [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { bits<24> target; let Inst{23-0} = target; + let DecoderMethod = "DecodeBranchImmInstruction"; } let isBarrier = 1 in { @@ -1549,9 +1984,9 @@ let isBranch = 1, isTerminator = 1 in { } -// BLX (immediate) -- for disassembly only -def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, - "blx\t$target", [/* pattern left blank */]>, +// BLX (immediate) +def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", []>, Requires<[IsARM, HasV5T]> { let Inst{31-25} = 0b1111101; bits<25> target; @@ -1614,64 +2049,100 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } } - - - - -// Secure Monitor Call is a system instruction -- for disassembly only -def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", - [/* For disassembly only; pattern left blank */]> { +// Secure Monitor Call is a system instruction. +def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", + []> { bits<4> opt; let Inst{23-4} = 0b01100000000000000111; let Inst{3-0} = opt; } -// Supervisor Call (Software Interrupt) -- for disassembly only +// Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", - [/* For disassembly only; pattern left blank */]> { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> { bits<24> svc; let Inst{23-0} = svc; } } -// Store Return State is a system instruction -- for disassembly only -let isCodeGenOnly = 1 in { // FIXME: This should not use submode! -def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), - NoItinerary, "srs${amode}\tsp!, $mode", - [/* For disassembly only; pattern left blank */]> { +// Store Return State +class SRSI<bit wb, string asm> + : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<5> mode; let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b110; // W = 1 - let Inst{19-8} = 0xd05; - let Inst{7-5} = 0b000; + let Inst{27-25} = 0b100; + let Inst{22} = 1; + let Inst{21} = wb; + let Inst{20} = 0; + let Inst{19-16} = 0b1101; // SP + let Inst{15-5} = 0b00000101000; + let Inst{4-0} = mode; } -def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), - NoItinerary, "srs${amode}\tsp, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b100; // W = 0 - let Inst{19-8} = 0xd05; - let Inst{7-5} = 0b000; +def SRSDA : SRSI<0, "srsda\tsp, $mode"> { + let Inst{24-23} = 0; +} +def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> { + let Inst{24-23} = 0; +} +def SRSDB : SRSI<0, "srsdb\tsp, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSIA : SRSI<0, "srsia\tsp, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIB : SRSI<0, "srsib\tsp, $mode"> { + let Inst{24-23} = 0b11; +} +def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { + let Inst{24-23} = 0b11; } -// Return From Exception is a system instruction -- for disassembly only -def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), - NoItinerary, "rfe${amode}\t$base!", - [/* For disassembly only; pattern left blank */]> { +// Return From Exception +class RFEI<bit wb, string asm> + : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<4> Rn; let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b011; // W = 1 - let Inst{15-0} = 0x0a00; + let Inst{27-25} = 0b100; + let Inst{22} = 0; + let Inst{21} = wb; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-0} = 0xa00; } -def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), - NoItinerary, "rfe${amode}\t$base", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{22-20} = 0b001; // W = 0 - let Inst{15-0} = 0x0a00; +def RFEDA : RFEI<0, "rfeda\t$Rn"> { + let Inst{24-23} = 0; +} +def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> { + let Inst{24-23} = 0; +} +def RFEDB : RFEI<0, "rfedb\t$Rn"> { + let Inst{24-23} = 0b10; +} +def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> { + let Inst{24-23} = 0b10; +} +def RFEIA : RFEI<0, "rfeia\t$Rn"> { + let Inst{24-23} = 0b01; +} +def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> { + let Inst{24-23} = 0b01; +} +def RFEIB : RFEI<0, "rfeib\t$Rn"> { + let Inst{24-23} = 0b11; +} +def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { + let Inst{24-23} = 0b11; } -} // isCodeGenOnly = 1 //===----------------------------------------------------------------------===// // Load / store Instructions. @@ -1682,16 +2153,16 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, UnOpFrag<(load node:$Src)>>; -defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, +defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, UnOpFrag<(zextloadi8 node:$Src)>>; defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, +defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, - isReMaterializable = 1 in + isReMaterializable = 1, isCodeGenOnly = 1 in def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", []> { @@ -1727,34 +2198,65 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), // Indexed loads multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { - def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode2:$addr), IndexModePre, LdFrm, itin, + def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 - // {12} isAdd - // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{16-13}; + let Inst{11-0} = addr{11-0}; + let DecoderMethod = "DecodeLDRPreImm"; + let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12"; + } + + def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; let Inst{23} = addr{12}; - let Inst{19-16} = addr{17-14}; + let Inst{19-16} = addr{16-13}; let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; + let Inst{4} = 0; + let DecoderMethod = "DecodeLDRPreReg"; + let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2"; } - def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins GPR:$Rn, am2offset:$offset), + + def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), IndexModePost, LdFrm, itin, - opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { - // {13} 1 == Rm, 0 == imm12 + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; - bits<4> Rn; - let Inst{25} = offset{13}; + bits<4> addr; + let Inst{25} = 0; let Inst{23} = offset{12}; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } + } let mayLoad = 1, neverHasSideEffects = 1 in { @@ -1762,8 +2264,8 @@ defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>; defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>; } -multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> { - def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), +multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { + def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins addrmode3:$addr), IndexModePre, LdMiscFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { @@ -1773,27 +2275,31 @@ multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; + let DecoderMethod = "DecodeAddrMode3Instruction"; } - def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins GPR:$Rn, am3offset:$offset), IndexModePost, - LdMiscFrm, itin, - opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { + def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am3offset:$offset), + IndexModePost, LdMiscFrm, itin, + opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + []> { bits<10> offset; - bits<4> Rn; + bits<4> addr; let Inst{23} = offset{8}; // U bit let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; } } let mayLoad = 1, neverHasSideEffects = 1 in { -defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; -defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; -defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; +defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; +defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; +defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; let hasExtraDefRegAllocReq = 1 in { -def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), (ins addrmode3:$addr), IndexModePre, LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", @@ -1804,70 +2310,128 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + let AsmMatchConverter = "cvtLdrdPre"; } -def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins GPR:$Rn, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$Rn], $offset", - "$Rn = $Rn_wb", []> { +def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am3offset:$offset), + IndexModePost, LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { bits<10> offset; - bits<4> Rn; + bits<4> addr; let Inst{23} = offset{8}; // U bit let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{11-8} = offset{7-4}; // imm7_4/zero let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; } } // hasExtraDefRegAllocReq = 1 } // mayLoad = 1, neverHasSideEffects = 1 -// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. +// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. let mayLoad = 1, neverHasSideEffects = 1 in { -def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru, - "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 +def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; - let Inst{23} = addr{12}; + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let Inst{19-16} = addr{17-14}; - let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; -} -def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { - // {17-14} Rn - // {13} 1 == Rm, 0 == imm12 + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm - bits<18> addr; - let Inst{25} = addr{13}; - let Inst{23} = addr{12}; - let Inst{21} = 1; // overwrite - let Inst{19-16} = addr{17-14}; - let Inst{11-0} = addr{11-0}; - let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; -} -def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + +def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite -} -def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), - (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +multiclass AI3ldrT<bits<4> op, string opc> { + def i : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + (ins addr_offset_none:$addr, postidx_imm8:$offset), + IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, + "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + let AsmMatchConverter = "cvtLdExtTWriteBackImm"; + } + def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + (ins addr_offset_none:$addr, postidx_reg:$Rm), + IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, + "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Inst{3-0} = Rm{3-0}; + let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + } } + +defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">; +defm LDRHT : AI3ldrT<0b1011, "ldrht">; +defm LDRSHT : AI3ldrT<0b1111, "ldrsht">; } // Store @@ -1881,98 +2445,302 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, - "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$Rt, $src2, $addr", []>, + Requires<[IsARM, HasV5TE]> { + let Inst{21} = 0; +} // Indexed stores -def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePre, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; - -def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePost, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; - -def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePre, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, - GPR:$Rn, am2offset:$offset))]>; -def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, - GPR:$Rn, am2offset:$offset))]>; - -def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), - IndexModePre, StMiscFrm, IIC_iStore_ru, - "strh", "\t$Rt, [$Rn, $offset]!", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, - (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; - -def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), - IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, [$Rn], $offset", - "$Rn = $Rn_wb,@earlyclobber $Rn_wb", - [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, - GPR:$Rn, am3offset:$offset))]>; - -// For disassembly only +multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { + def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, + StFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; // imm12 + let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12"; + let DecoderMethod = "DecodeSTRPreImm"; + } + + def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, ldst_so_reg:$addr), + IndexModePre, StFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; + let Inst{4} = 0; // Inst{4} = 0 + let AsmMatchConverter = "cvtStWriteBackRegAddrMode2"; + let DecoderMethod = "DecodeSTRPreReg"; + } + def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, itin, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } +} + +let mayStore = 1, neverHasSideEffects = 1 in { +defm STR : AI2_stridx<0, "str", IIC_iStore_ru>; +defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>; +} + +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STR_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; + +// Pseudo-instructions for pattern matching the pre-indexed stores. We can't +// put the patterns on the instruction definitions directly as ISel wants +// the address base and offset to be separate operands, not a single +// complex operand like we represent the instructions themselves. The +// pseudos map between the two. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { +def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; +} + + + +def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode3:$addr), IndexModePre, + StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "cvtStWriteBackRegAddrMode3"; + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, + addr_offset_none:$addr, + am3offset:$offset))]> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), - (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$src1, $src2, [$base, $offset]!", - "$base = $base_wb", []>; - -// For disassembly only -def STRD_POST: AI3stdpo<(outs GPR:$base_wb), - (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStore_d_ru, - "strd", "\t$src1, $src2, [$base], $offset", - "$base = $base_wb", []>; +def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + IndexModePre, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + let AsmMatchConverter = "cvtStrdPre"; +} + +def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr, + am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 -// STRT, STRBT, and STRHT are for disassembly only. +// STRT, STRBT, and STRHT -def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), - IndexModePost, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +let mayStore = 1, neverHasSideEffects = 1 in { +def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} } -def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), - StMiscFrm, IIC_iStore_bh_ru, - "strht", "\t$Rt, $addr", "$addr.base = $base_wb", - [/* For disassembly only; pattern left blank */]> { - let Inst{21} = 1; // overwrite - let AsmMatchConverter = "CvtStWriteBackRegAddrMode3"; + +multiclass AI3strT<bits<4> op, string opc> { + def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb), + (ins GPR:$Rt, addr_offset_none:$addr, postidx_imm8:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc, + "\t$Rt, $addr, $offset", "$addr.base = $base_wb", []> { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + let AsmMatchConverter = "cvtStExtTWriteBackImm"; + } + def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb), + (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, opc, + "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Inst{3-0} = Rm{3-0}; + let AsmMatchConverter = "cvtStExtTWriteBackReg"; + } } + +defm STRHT : AI3strT<0b1011, "strht">; + + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1996,6 +2764,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b01; // Increment After let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2012,6 +2782,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b00; // Decrement After let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2028,6 +2800,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b10; // Decrement Before let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -2044,6 +2818,8 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{24-23} = 0b11; // Increment Before let Inst{21} = 1; // Writeback let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; } } @@ -2084,6 +2860,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } +def : ARMInstAlias<"movs${p} $Rd, $Rm", + (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; + // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -2097,15 +2876,33 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, let Inst{15-12} = Rd; } -def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), - DPSoRegFrm, IIC_iMOVsr, - "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>, +def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), + DPSoRegRegFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP { + bits<4> Rd; + bits<12> src; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-8} = src{11-8}; + let Inst{7} = 0; + let Inst{6-5} = src{6-5}; + let Inst{4} = 1; + let Inst{3-0} = src{3-0}; + let Inst{25} = 0; +} + +def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), + DPSoRegImmFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, UnaryDP { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; let Inst{19-16} = 0b0000; - let Inst{11-0} = src; + let Inst{11-5} = src{11-5}; + let Inst{4} = 0; + let Inst{3-0} = src{3-0}; let Inst{25} = 0; } @@ -2121,7 +2918,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), +def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", [(set GPR:$Rd, imm0_65535:$imm)]>, @@ -2133,16 +2930,22 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; } +def : InstAlias<"mov${p} $Rd, $imm", + (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p)>, + Requires<[IsARM]>; + def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; let Constraints = "$src = $Rd" in { -def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), +def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), + (ins GPR:$src, imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movt", "\t$Rd, $imm", - [(set GPR:$Rd, + [(set GPRnopc:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, Requires<[IsARM, HasV6T2]> { @@ -2153,6 +2956,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; } def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), @@ -2186,30 +2990,28 @@ def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, // Sign extenders -defm SXTB : AI_ext_rrot<0b01101010, +def SXTB : AI_ext_rrot<0b01101010, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm SXTH : AI_ext_rrot<0b01101011, +def SXTH : AI_ext_rrot<0b01101011, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm SXTAB : AI_exta_rrot<0b01101010, +def SXTAB : AI_exta_rrot<0b01101010, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm SXTAH : AI_exta_rrot<0b01101011, +def SXTAH : AI_exta_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -// For disassembly only -defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; -// For disassembly only -defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; // Zero extenders let AddedComplexity = 16 in { -defm UXTB : AI_ext_rrot<0b01101110, +def UXTB : AI_ext_rrot<0b01101110, "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm UXTH : AI_ext_rrot<0b01101111, +def UXTH : AI_ext_rrot<0b01101111, "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm UXTB16 : AI_ext_rrot<0b01101100, +def UXTB16 : AI_ext_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -2217,23 +3019,22 @@ defm UXTB16 : AI_ext_rrot<0b01101100, // instead so we can include a check for masking back in the upper // eight bits of the source into the lower eight bits of the result. //def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), -// (UXTB16r_rot GPR:$Src, 24)>; +// (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16r_rot GPR:$Src, 8)>; + (UXTB16 GPR:$Src, 1)>; -defm UXTAB : AI_exta_rrot<0b01101110, "uxtab", +def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm UXTAH : AI_exta_rrot<0b01101111, "uxtah", +def UXTAH : AI_exta_rrot<0b01101111, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. -// For disassembly only -defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; -def SBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), +def SBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -2250,7 +3051,7 @@ def SBFX : I<(outs GPR:$Rd), } def UBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), + (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -2278,148 +3079,58 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; // ADD and SUB with 's' bit set. -defm ADDS : AI1_bin_s_irs<0b0100, "adds", +// +// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by +// AdjustInstrPostInstrSelection where we determine whether or not to +// set the "s" bit based on CPSR liveness. +// +// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen +// support for an optional CPSR definition that corresponds to the DAG +// node's second value. We can then eliminate the implicit def of CPSR. +defm ADDS : AsI1_bin_s_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; -defm SUBS : AI1_bin_s_irs<0b0010, "subs", + BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AsI1_bin_s_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, "ADC", 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>, + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, "SBC">; -// ADC and SUBC with 's' bit set. -let usesCustomInserter = 1 in { -defm ADCS : AI1_adde_sube_s_irs< - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm SBCS : AI1_adde_sube_s_irs< - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -} - -def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} - -// The reg/reg form is only defined for the disassembler; for codegen it is -// equivalent to SUBrr. -def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} - -def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +defm RSB : AsI1_rbin_irs <0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">; -// RSB with 's' bit set. -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; -def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - 4, IIC_iALUr, - [/* For disassembly only; pattern left blank */]>; -def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; -} - -let Uses = [CPSR] in { -def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -// The reg/reg form is only defined for the disassembler; for codegen it is -// equivalent to SUBrr. -def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let Inst{25} = 0; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} -def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} -} +// FIXME: Eliminate them if we can write def : Pat patterns which defines +// CPSR and the implicit def of CPSR is not needed. +defm RSBS : AsI1_rbin_s_is<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1, Uses = [CPSR] in { -def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - 4, IIC_iALUi, - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; -def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - 4, IIC_iALUsr, - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; -} +defm RSC : AI1_rsc_irs<0b0111, "rsc", + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, + "RSC">; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub // assume opposite meanings of the carry flag (i.e., carry == !borrow). // See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory // details. -def : ARMPat<(add GPR:$src, so_imm_neg:$imm), - (SUBri GPR:$src, so_imm_neg:$imm)>; -def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), - (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, so_imm_neg:$imm), + (SUBri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), + (SUBSri GPR:$src, so_imm_neg:$imm)>; + // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), - (SBCri GPR:$src, so_imm_not:$imm)>; -def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), - (SBCSri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), + (SBCri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2427,12 +3138,13 @@ def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), // (mul X, 2^n+1) -> (add (X << n), X) // (mul X, 2^n-1) -> (rsb X, (X << n)) -// ARM Arithmetic Instruction -- for disassembly only +// ARM Arithmetic Instruction // GPR:$dst = GPR:$a op GPR:$b class AAI<bits<8> op27_20, bits<8> op11_4, string opc, - list<dag> pattern = [/* For disassembly only; pattern left blank */], - dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + list<dag> pattern = [], + dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), + string asm = "\t$Rd, $Rn, $Rm"> + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { bits<4> Rn; bits<4> Rd; bits<4> Rm; @@ -2443,17 +3155,19 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only +// Saturating add/subtract def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))], - (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))], - (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn), + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; +def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn), +def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], + (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; @@ -2469,7 +3183,7 @@ def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; -// Signed/Unsigned add/subtract -- for disassembly only +// Signed/Unsigned add/subtract def SASX : AAI<0b01100001, 0b11110011, "sasx">; def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; @@ -2484,7 +3198,7 @@ def USAX : AAI<0b01100101, 0b11110101, "usax">; def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; -// Signed/Unsigned halving add/subtract -- for disassembly only +// Signed/Unsigned halving add/subtract def SHASX : AAI<0b01100011, 0b11110011, "shasx">; def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; @@ -2499,7 +3213,7 @@ def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; -// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only +// Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", @@ -2531,11 +3245,11 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), let Inst{3-0} = Rn; } -// Signed/Unsigned saturate -- for disassembly only +// Signed/Unsigned saturate -def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh", - [/* For disassembly only; pattern left blank */]> { +def SSAT : AI<(outs GPRnopc:$Rd), + (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> { bits<4> Rd; bits<5> sat_imm; bits<4> Rn; @@ -2544,14 +3258,14 @@ def SSAT : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$a, shift_imm:$sh), let Inst{5-4} = 0b01; let Inst{20-16} = sat_imm; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; - let Inst{6} = sh{0}; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; let Inst{3-0} = Rn; } -def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm, - NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]> { +def SSAT16 : AI<(outs GPRnopc:$Rd), + (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> { bits<4> Rd; bits<4> sat_imm; bits<4> Rn; @@ -2562,9 +3276,9 @@ def SSAT16 : AI<(outs GPR:$Rd), (ins ssat_imm:$sat_imm, GPR:$Rn), SatFrm, let Inst{3-0} = Rn; } -def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh", - [/* For disassembly only; pattern left blank */]> { +def USAT : AI<(outs GPRnopc:$Rd), + (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> { bits<4> Rd; bits<5> sat_imm; bits<4> Rn; @@ -2572,15 +3286,15 @@ def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), let Inst{27-21} = 0b0110111; let Inst{5-4} = 0b01; let Inst{15-12} = Rd; - let Inst{11-7} = sh{7-3}; - let Inst{6} = sh{0}; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; let Inst{20-16} = sat_imm; let Inst{3-0} = Rn; } -def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm, - NoItinerary, "usat16", "\t$Rd, $sat_imm, $a", - [/* For disassembly only; pattern left blank */]> { +def USAT16 : AI<(outs GPRnopc:$Rd), + (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> { bits<4> Rd; bits<4> sat_imm; bits<4> Rn; @@ -2591,8 +3305,10 @@ def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm, let Inst{3-0} = Rn; } -def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; -def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; +def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos), + (SSAT imm:$pos, GPRnopc:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), + (USAT imm:$pos, GPRnopc:$a, 0)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -2611,6 +3327,10 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; +// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just +// like in the actual instruction encoding. The complexity of mapping the mask +// to the lsb/msb pair should be handled by ISel, not encapsulated in the +// instruction description. def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "bfc", "\t$Rd, $imm", "$src = $Rd", @@ -2622,16 +3342,16 @@ def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{6-0} = 0b0011111; let Inst{15-12} = Rd; let Inst{11-7} = imm{4-0}; // lsb - let Inst{20-16} = imm{9-5}; // width + let Inst{20-16} = imm{9-5}; // msb } // A8.6.18 BFI - Bitfield insert (Encoding A1) -def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", - [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn, - bf_inv_mask_imm:$imm))]>, - Requires<[IsARM, HasV6T2]> { +def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", + [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn, + bf_inv_mask_imm:$imm))]>, + Requires<[IsARM, HasV6T2]> { bits<4> Rd; bits<4> Rn; bits<10> imm; @@ -2643,25 +3363,6 @@ def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), let Inst{3-0} = Rn; } -// GNU as only supports this form of bfi (w/ 4 arguments) -let isAsmParserOnly = 1 in -def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, - lsb_pos_imm:$lsb, width_imm:$width), - AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd", - []>, Requires<[IsARM, HasV6T2]> { - bits<4> Rd; - bits<4> Rn; - bits<5> lsb; - bits<5> width; - let Inst{27-21} = 0b0111110; - let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 - let Inst{15-12} = Rd; - let Inst{11-7} = lsb; - let Inst{20-16} = width; // Custom encoder => lsb+width-1 - let Inst{3-0} = Rn; -} - def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, "mvn", "\t$Rd, $Rm", [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { @@ -2673,15 +3374,31 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, let Inst{15-12} = Rd; let Inst{3-0} = Rm; } -def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm, - IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP { +def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP { + bits<4> Rd; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; +} +def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP { bits<4> Rd; bits<12> shift; let Inst{25} = 0; let Inst{19-16} = 0b0000; let Inst{15-12} = Rd; - let Inst{11-0} = shift; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, @@ -2820,8 +3537,8 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), bits<4> RdHi; bits<4> Rm; bits<4> Rn; - let Inst{19-16} = RdLo; - let Inst{15-12} = RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; let Inst{11-8} = Rm; let Inst{3-0} = Rn; } @@ -2855,8 +3572,7 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, + IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b1111; } @@ -2869,8 +3585,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", - [/* For disassembly only; pattern left blank */]>, + IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), @@ -2881,8 +3596,7 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", - [/* For disassembly only; pattern left blank */]>, + IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; multiclass AI_smul<string opc, PatFrag opnode> { @@ -2925,92 +3639,95 @@ multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> { - def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + let DecoderMethod = "DecodeSMLAInstruction" in { + def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, - (opnode (sext_inreg GPR:$Rn, i16), - (sext_inreg GPR:$Rm, i16))))]>, + [(set GPRnopc:$Rd, (add GPR:$Ra, + (opnode (sext_inreg GPRnopc:$Rn, i16), + (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE]>; - def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16), - (sra GPR:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), + (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE]>; - def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), - (sext_inreg GPR:$Rm, i16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE]>; - def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), - (sra GPR:$Rm, (i32 16)))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), + (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE]>; - def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, - (sext_inreg GPR:$Rm, i16)), (i32 16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, + (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]>; - def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, - (sra GPR:$Rm, (i32 16))), (i32 16))))]>, + [(set GPRnopc:$Rd, + (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, + (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]>; + } } defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only -def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +// Halfword multiply accumulate long: SMLAL<x><y>. +def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), - IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>, +def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>; -// Helper class for AI_smld -- for disassembly only +// Helper class for AI_smld. class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; - let Inst{4} = 1; - let Inst{5} = swap; - let Inst{6} = sub; - let Inst{7} = 0; - let Inst{21-20} = 0b00; - let Inst{22} = long; let Inst{27-23} = 0b01110; + let Inst{22} = long; + let Inst{21-20} = 0b00; let Inst{11-8} = Rm; + let Inst{7} = 0; + let Inst{6} = sub; + let Inst{5} = swap; + let Inst{4} = 1; let Inst{3-0} = Rn; } class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, @@ -3024,6 +3741,8 @@ class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops, InstrItinClass itin, string opc, string asm> : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> { bits<4> Ra; + bits<4> Rd; + let Inst{19-16} = Rd; let Inst{15-12} = Ra; } class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops, @@ -3037,18 +3756,20 @@ class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops, multiclass AI_smld<bit sub, string opc> { - def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">; - def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; - def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), NoItinerary, + def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; - def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), NoItinerary, + def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; } @@ -3058,10 +3779,10 @@ defm SMLS : AI_smld<1, "smls">; multiclass AI_sdml<bit sub, string opc> { - def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; - def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; } defm SMUA : AI_sdml<0, "smua">; @@ -3100,55 +3821,38 @@ def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), (and (srl GPR:$Rm, (i32 8)), 0xFF)), (REVSH GPR:$Rm)>; -def lsl_shift_imm : SDNodeXForm<imm, [{ - unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); - return CurDAG->getTargetConstant(Sh, MVT::i32); -}]>; - -def lsl_amt : ImmLeaf<i32, [{ - return Imm > 0 && Imm < 32; -}], lsl_shift_imm>; - -def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), +def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh), IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", - [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF), - (and (shl GPR:$Rm, lsl_amt:$sh), - 0xFFFF0000)))]>, + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF), + (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh), + 0xFFFF0000)))]>, Requires<[IsARM, HasV6]>; // Alternate cases for PKHBT where identities eliminate some nodes. -def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)), - (PKHBT GPR:$Rn, GPR:$Rm, 0)>; -def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)), - (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>; - -def asr_shift_imm : SDNodeXForm<imm, [{ - unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); - return CurDAG->getTargetConstant(Sh, MVT::i32); -}]>; - -def asr_amt : ImmLeaf<i32, [{ - return Imm > 0 && Imm <= 32; -}], asr_shift_imm>; +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. -def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), +def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh), IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", - [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000), - (and (sra GPR:$Rm, asr_amt:$sh), - 0xFFFF)))]>, + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000), + (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh), + 0xFFFF)))]>, Requires<[IsARM, HasV6]>; // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)), - (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)), - (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (srl GPRnopc:$src2, imm16_31:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -3163,8 +3867,10 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm), (CMPri GPR:$src, so_imm:$imm)>; def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), (CMPrr GPR:$src, GPR:$rhs)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs), - (CMPrs GPR:$src, so_reg:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), + (CMPrsi GPR:$src, so_reg_imm:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), + (CMPrsr GPR:$src, so_reg_reg:$rhs)>; // FIXME: We have to be careful when using the CMN instruction and comparison // with 0. One would expect these two pieces of code should give identical @@ -3250,15 +3956,23 @@ def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg:$shift, pred:$p), +def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_imm:$shift, pred:$p), + 4, IIC_iCMOVsr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, + imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; +def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_reg:$shift, pred:$p), 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + let isMoveImm = 1 in def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), + (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), 4, IIC_iMOVi, []>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; @@ -3288,9 +4002,14 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), // Atomic operations intrinsics // +def MemBarrierOptOperand : AsmOperandClass { + let Name = "MemBarrierOpt"; + let ParserMethod = "parseMemBarrierOptOperand"; +} def memb_opt : Operand<i32> { let PrintMethod = "printMemBOption"; let ParserMatchClass = MemBarrierOptOperand; + let DecoderMethod = "DecodeMemBarrierOption"; } // memory barriers protect the atomic sequences @@ -3321,8 +4040,16 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def ABS : ARMPseudoInst< + (outs GPR:$dst), (ins GPR:$src), + 8, NoItinerary, []>; +} + let usesCustomInserter = 1 in { - let Uses = [CPSR] in { + let Defs = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; @@ -3437,44 +4164,47 @@ let usesCustomInserter = 1 in { } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexb", "\t$Rt, $addr", []>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, - "ldrexh", "\t$Rt, $addr", []>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, - "ldrex", "\t$Rt, $addr", []>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexh", "\t$Rt, $addr", []>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrex", "\t$Rt, $addr", []>; let hasExtraDefRegAllocReq = 1 in - def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), - NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; +def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; -def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; } let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), - NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; + (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} -// Clear-Exclusive is for disassembly only. -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", - [/* For disassembly only; pattern left blank */]>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } -// SWP/SWPB are deprecated in V6/V7 and for disassembly only. -let mayLoad = 1 in { -def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp", - [/* For disassembly only; pattern left blank */]>; -def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", - [/* For disassembly only; pattern left blank */]>; +// SWP/SWPB are deprecated in V6/V7. +let mayLoad = 1, mayStore = 1 in { +def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), + "swp", []>; +def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), + "swpb", []>; } //===----------------------------------------------------------------------===// @@ -3526,108 +4256,171 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, class ACI<dag oops, dag iops, string opc, string asm, IndexMode im = IndexModeNone> + : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, + opc, asm, "", []> { + let Inst{27-25} = 0b110; +} +class ACInoP<dag oops, dag iops, string opc, string asm, + IndexMode im = IndexModeNone> : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, - opc, asm, "", [/* For disassembly only; pattern left blank */]> { + opc, asm, "", []> { + let Inst{31-28} = 0b1111; let Inst{27-25} = 0b110; } - -multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{ - - def _OFFSET : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; +multiclass LdStCop<bit load, bit Dbit, string asm> { + def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def _PRE : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> { - let Inst{31-28} = op31_28; + def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!", IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def _POST : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> { - let Inst{31-28} = op31_28; + def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - def _OPTION : ACI<(outs), - !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option), - ops), - !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { - let Inst{31-28} = op31_28; + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_OFFSET : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; +} +multiclass LdSt2Cop<bit load, bit Dbit, string asm> { + def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_PRE : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!", - IndexModePre> { - let Inst{31-28} = op31_28; + def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!", IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_POST : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), - !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr", - IndexModePost> { - let Inst{31-28} = op31_28; + def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; } - - def L_OPTION : ACI<(outs), - !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option), - ops), - !strconcat(!strconcat(opc, "l"), cond), - "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { - let Inst{31-28} = op31_28; + def _OPTION : ACInoP<(outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; } } -defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">; -defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">; -defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">; -defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">; +defm LDC : LdStCop <1, 0, "ldc">; +defm LDCL : LdStCop <1, 1, "ldcl">; +defm STC : LdStCop <0, 0, "stc">; +defm STCL : LdStCop <0, 1, "stcl">; +defm LDC2 : LdSt2Cop<1, 0, "ldc2">; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l">; +defm STC2 : LdSt2Cop<0, 0, "stc2">; +defm STC2L : LdSt2Cop<0, 1, "stc2l">; //===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register -- for disassembly only +// Move between coprocessor and ARM core register. // class MovRCopro<string opc, bit direction, dag oops, dag iops, @@ -3660,8 +4453,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, imm:$CRm, imm:$opc2)]>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, - i32imm:$opc2), []>; + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3697,15 +4490,14 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, imm:$CRm, imm:$opc2)]>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, - i32imm:$opc2), []>; + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; -class MovRRCopro<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> +class MovRRCopro<string opc, bit direction, list<dag> pattern = []> : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { @@ -3730,8 +4522,7 @@ def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; -class MovRRCopro2<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> +class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { @@ -3758,20 +4549,22 @@ def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; //===----------------------------------------------------------------------===// -// Move between special register and ARM core register -- for disassembly only +// Move between special register and ARM core register // // Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", - [/* For disassembly only; pattern left blank */]> { +def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, apsr", []> { bits<4> Rd; let Inst{23-16} = 0b00001111; let Inst{15-12} = Rd; let Inst{7-4} = 0b0000; } -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", - [/* For disassembly only; pattern left blank */]> { +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>; + +def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, spsr", []> { bits<4> Rd; let Inst{23-16} = 0b01001111; let Inst{15-12} = Rd; @@ -3785,8 +4578,7 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", - [/* For disassembly only; pattern left blank */]> { + "msr", "\t$mask, $Rn", []> { bits<5> mask; bits<4> Rn; @@ -3800,8 +4592,7 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, } def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", - [/* For disassembly only; pattern left blank */]> { + "msr", "\t$mask, $a", []> { bits<5> mask; bits<12> a; @@ -4030,6 +4821,47 @@ def : ARMV5TEPat<(add GPR:$acc, def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; +// SXT/UXT with no rotate +let AddedComplexity = 16 in { +def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)), + (UXTAB GPR:$Rn, GPR:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)), + (UXTAH GPR:$Rn, GPR:$Rm, 0)>; +} + +def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>; +def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>; + +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)), + (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)), + (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>; + +// Atomic load/store patterns +def : ARMPat<(atomic_load_8 ldst_so_reg:$src), + (LDRBrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_8 addrmode_imm12:$src), + (LDRBi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_load_16 addrmode3:$src), + (LDRH addrmode3:$src)>; +def : ARMPat<(atomic_load_32 ldst_so_reg:$src), + (LDRrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_32 addrmode_imm12:$src), + (LDRi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val), + (STRBrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val), + (STRBi12 GPR:$val, addrmode_imm12:$ptr)>; +def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val), + (STRH GPR:$val, addrmode3:$ptr)>; +def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val), + (STRrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val), + (STRi12 GPR:$val, addrmode_imm12:$ptr)>; + //===----------------------------------------------------------------------===// // Thumb Support @@ -4070,7 +4902,103 @@ def : MnemonicAlias<"swi", "svc">; // Load / Store Multiple def : MnemonicAlias<"ldmfd", "ldm">; def : MnemonicAlias<"ldmia", "ldm">; +def : MnemonicAlias<"ldmea", "ldmdb">; def : MnemonicAlias<"stmfd", "stmdb">; def : MnemonicAlias<"stmia", "stm">; def : MnemonicAlias<"stmea", "stm">; +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the +// shift amount is zero (i.e., unspecified). +def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + Requires<[IsARM, HasV6]>; +def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p)>, + Requires<[IsARM, HasV6]>; + +// PUSH/POP aliases for STM/LDM +def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>; + +// SSAT/USAT optional shift operand. +def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn", + (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; +def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn", + (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; + + +// Extend instruction optional rotate operand. +def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb${p} $Rd, $Rm", + (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb16${p} $Rd, $Rm", + (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxth${p} $Rd, $Rm", + (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + +def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb${p} $Rd, $Rm", + (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb16${p} $Rd, $Rm", + (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxth${p} $Rd, $Rm", + (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + + +// RFE aliases +def : MnemonicAlias<"rfefa", "rfeda">; +def : MnemonicAlias<"rfeea", "rfedb">; +def : MnemonicAlias<"rfefd", "rfeia">; +def : MnemonicAlias<"rfeed", "rfeib">; +def : MnemonicAlias<"rfe", "rfeia">; + +// SRS aliases +def : MnemonicAlias<"srsfa", "srsda">; +def : MnemonicAlias<"srsea", "srsdb">; +def : MnemonicAlias<"srsfd", "srsia">; +def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srs", "srsia">; + +// QSAX == QSUBADDX +def : MnemonicAlias<"qsubaddx", "qsax">; +// SASX == SADDSUBX +def : MnemonicAlias<"saddsubx", "sasx">; +// SHASX == SHADDSUBX +def : MnemonicAlias<"shaddsubx", "shasx">; +// SHSAX == SHSUBADDX +def : MnemonicAlias<"shsubaddx", "shsax">; +// SSAX == SSUBADDX +def : MnemonicAlias<"ssubaddx", "ssax">; +// UASX == UADDSUBX +def : MnemonicAlias<"uaddsubx", "uasx">; +// UHASX == UHADDSUBX +def : MnemonicAlias<"uhaddsubx", "uhasx">; +// UHSAX == UHSUBADDX +def : MnemonicAlias<"uhsubaddx", "uhsax">; +// UQASX == UQADDSUBX +def : MnemonicAlias<"uqaddsubx", "uqasx">; +// UQSAX == UQSUBADDX +def : MnemonicAlias<"uqsubaddx", "uqsax">; +// USAX == USUBADDX +def : MnemonicAlias<"usubaddx", "usax">; + +// LDRSBT/LDRHT/LDRSHT post-index offset if optional. +// Note that the write-back output register is a dummy operand for MC (it's +// only meaningful for codegen), so we just pass zero here. +// FIXME: tblgen not cooperating with argument conversions. +//def : InstAlias<"ldrsbt${p} $Rt, $addr", +// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>; +//def : InstAlias<"ldrht${p} $Rt, $addr", +// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; +//def : InstAlias<"ldrsht${p} $Rt, $addr", +// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 0df62f4..7aad186 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -11,6 +11,35 @@ // //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NEON-specific Operands. +//===----------------------------------------------------------------------===// +def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } +def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } +def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 8; +}]> { + let ParserMatchClass = VectorIndex8Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 4; +}]> { + let ParserMatchClass = VectorIndex16Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 2; +}]> { + let ParserMatchClass = VectorIndex32Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -175,7 +204,8 @@ class VLDQQWBPseudo<InstrItinClass itin> (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; class VLDQQQQPseudo<InstrItinClass itin> - : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">; + : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, + "$src = $dst">; class VLDQQQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, @@ -190,6 +220,7 @@ class VLD1D<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), @@ -197,6 +228,7 @@ class VLD1Q<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -221,6 +253,7 @@ class VLD1DWB<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1QWB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), @@ -228,6 +261,7 @@ class VLD1QWB<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; @@ -252,12 +286,14 @@ class VLD1D3<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D3WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; @@ -280,6 +316,7 @@ class VLD1D4<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D4WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, @@ -288,6 +325,7 @@ class VLD1D4WB<bits<4> op7_4, string Dt> "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -310,6 +348,7 @@ class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, @@ -318,6 +357,7 @@ class VLD2Q<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; @@ -343,6 +383,7 @@ class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } class VLD2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, @@ -351,6 +392,7 @@ class VLD2QWB<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; @@ -384,6 +426,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -402,6 +445,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -441,6 +485,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -459,6 +504,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -530,6 +576,7 @@ class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, (i32 (LoadOp addrmode6:$Rn)), imm:$lane))]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; } class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> @@ -541,6 +588,7 @@ class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, (i32 (LoadOp addrmode6oneL32:$Rn)), imm:$lane))]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; } class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), @@ -580,7 +628,9 @@ class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []>; + "$src = $Vd, $Rn.addr = $wb", []> { + let DecoderMethod = "DecodeVLD1LN"; +} def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -607,6 +657,7 @@ class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { @@ -642,6 +693,7 @@ class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { @@ -676,6 +728,7 @@ class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { let Rm = 0b1111; + let DecoderMethod = "DecodeVLD3LN"; } def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { @@ -712,7 +765,9 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []>; + []> { + let DecoderMethod = "DecodeVLD3LN"; +} def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -748,6 +803,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN"; } def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { @@ -788,6 +844,7 @@ class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN" ; } def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { @@ -825,6 +882,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { let Pattern = [(set QPR:$dst, @@ -852,6 +910,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt> "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; @@ -864,12 +923,14 @@ class VLD1DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } class VLD1QDUPWB<bits<4> op7_4, string Dt> : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; } def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; @@ -891,6 +952,7 @@ class VLD2DUP<bits<4> op7_4, string Dt> "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; } def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; @@ -912,6 +974,7 @@ class VLD2DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; } def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; @@ -932,7 +995,8 @@ class VLD3DUP<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn), IIC_VLD3dup, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; @@ -954,7 +1018,8 @@ class VLD3DUPWB<bits<4> op7_4, string Dt> (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; @@ -977,6 +1042,7 @@ class VLD4DUP<bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; @@ -1000,6 +1066,7 @@ class VLD4DUPWB<bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; @@ -1045,6 +1112,7 @@ class VST1D<bits<4> op7_4, string Dt> IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), @@ -1052,6 +1120,7 @@ class VST1Q<bits<4> op7_4, string Dt> "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1075,6 +1144,7 @@ class VST1DWB<bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), @@ -1082,6 +1152,7 @@ class VST1QWB<bits<4> op7_4, string Dt> IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; @@ -1106,6 +1177,7 @@ class VST1D3<bits<4> op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1D3WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), @@ -1114,6 +1186,7 @@ class VST1D3WB<bits<4> op7_4, string Dt> IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8T : VST1D3<{0,0,0,?}, "8">; @@ -1137,6 +1210,7 @@ class VST1D4<bits<4> op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST1D4WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), @@ -1145,6 +1219,7 @@ class VST1D4WB<bits<4> op7_4, string Dt> "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8Q : VST1D4<{0,0,?,?}, "8">; @@ -1167,6 +1242,7 @@ class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs), @@ -1175,6 +1251,7 @@ class VST2Q<bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; @@ -1200,6 +1277,7 @@ class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } class VST2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), @@ -1208,6 +1286,7 @@ class VST2QWB<bits<4> op7_4, string Dt> "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; @@ -1241,6 +1320,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1259,6 +1339,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1298,6 +1379,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1316,6 +1398,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -1381,6 +1464,7 @@ class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { let Rm = 0b1111; + let DecoderMethod = "DecodeVST1LN"; } class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -1389,6 +1473,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ let Rm = 0b1111; + let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : VSTQLNPseudo<IIC_VST1ln> { @@ -1429,7 +1514,9 @@ class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]>; + addrmode6:$Rn, am6offset:$Rm))]> { + let DecoderMethod = "DecodeVST1LN"; +} class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : VSTQLNWBPseudo<IIC_VST1lnu> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), @@ -1465,6 +1552,7 @@ class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { @@ -1502,6 +1590,7 @@ class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", "$addr.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { @@ -1535,6 +1624,7 @@ class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { let Rm = 0b1111; + let DecoderMethod = "DecodeVST3LN"; } def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { @@ -1569,7 +1659,9 @@ class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3lnu, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", []> { + let DecoderMethod = "DecodeVST3LN"; +} def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; @@ -1604,6 +1696,7 @@ class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { @@ -1642,6 +1735,7 @@ class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { @@ -4039,6 +4133,7 @@ class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, ResTy, OpTy, OpNode> { let Inst{21-16} = op21_16; + let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", v8i16, v8i8, NEONvshlli>; @@ -4219,16 +4314,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm", def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; -let neverHasSideEffects = 1 in { -// Pseudo vector move instructions for QQ and QQQQ registers. This should -// be expanded after register allocation is completed. -def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), - NoItinerary, []>; - -def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), - NoItinerary, []>; -} // neverHasSideEffects - // VMOV : Vector Move (Immediate) let isReMaterializable = 1 in { @@ -4462,36 +4547,42 @@ def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, - ValueType Ty> - : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), - IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + ValueType Ty, Operand IdxTy> + : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), + IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy> - : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), - IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + ValueType ResTy, ValueType OpTy, Operand IdxTy> + : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), + IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), - imm:$lane)))]>; + VectorIndex32:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> { +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { + bits<3> lane; let Inst{19-17} = lane{2-0}; } -def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { + bits<2> lane; let Inst{19-18} = lane{1-0}; } -def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { + bits<1> lane; let Inst{19} = lane{0}; } -def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { + bits<3> lane; let Inst{19-17} = lane{2-0}; } -def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { + bits<2> lane; let Inst{19-18} = lane{1-0}; } -def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { + bits<1> lane; let Inst{19} = lane{0}; } @@ -4753,6 +4844,7 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // Vector Table Lookup and Table Extension. // VTBL : Vector Table Lookup +let DecoderMethod = "DecodeTBLInstruction" in { def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, @@ -4815,6 +4907,7 @@ def VTBX3Pseudo def VTBX4Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX4, "$orig = $dst", []>; +} // DecoderMethod = "DecodeTBLInstruction" //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index bfe83ec..cedb547 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -19,6 +19,19 @@ def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def imm_sr_XFORM: SDNodeXForm<imm, [{ + unsigned Imm = N->getZExtValue(); + return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), MVT::i32); +}]>; +def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; } +def imm_sr : Operand<i32>, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; +}], imm_sr_XFORM> { + let PrintMethod = "printThumbSRImm"; + let ParserMatchClass = ThumbSRImmAsmOperand; +} + def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; @@ -30,10 +43,6 @@ def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; } -def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { - let ParserMatchClass = imm0_255_asmoperand; -} def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; @@ -69,8 +78,17 @@ def t_adrlabel : Operand<i32> { } // Scaled 4 immediate. -def t_imm_s4 : Operand<i32> { +def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } +def t_imm0_1020s4 : Operand<i32> { + let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_1020s4_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; } +def t_imm0_508s4 : Operand<i32> { let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } @@ -79,113 +97,129 @@ def t_imm_s4 : Operand<i32> { let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand<OtherVT> { let EncoderMethod = "getThumbBRTargetOpValue"; + let DecoderMethod = "DecodeThumbBROperand"; } def t_bcctarget : Operand<i32> { let EncoderMethod = "getThumbBCCTargetOpValue"; + let DecoderMethod = "DecodeThumbBCCTargetOperand"; } def t_cbtarget : Operand<i32> { let EncoderMethod = "getThumbCBTargetOpValue"; + let DecoderMethod = "DecodeThumbCmpBROperand"; } def t_bltarget : Operand<i32> { let EncoderMethod = "getThumbBLTargetOpValue"; + let DecoderMethod = "DecodeThumbBLTargetOperand"; } def t_blxtarget : Operand<i32> { let EncoderMethod = "getThumbBLXTargetOpValue"; + let DecoderMethod = "DecodeThumbBLXOffset"; } } -def MemModeRegThumbAsmOperand : AsmOperandClass { - let Name = "MemModeRegThumb"; - let SuperClasses = []; -} - -def MemModeImmThumbAsmOperand : AsmOperandClass { - let Name = "MemModeImmThumb"; - let SuperClasses = []; -} - // t_addrmode_rr := reg + reg // +def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; } def t_addrmode_rr : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; + let DecoderMethod = "DecodeThumbAddrModeRR"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } // t_addrmode_rrs := reg + reg // +// We use separate scaled versions because the Select* functions need +// to explicitly check for a matching constant and return false here so that +// the reg+imm forms will match instead. This is a horrible way to do that, +// as it forces tight coupling between the methods, but it's how selectiondag +// currently works. def t_addrmode_rrs1 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; + let DecoderMethod = "DecodeThumbAddrModeRR"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } def t_addrmode_rrs2 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let DecoderMethod = "DecodeThumbAddrModeRR"; let PrintMethod = "printThumbAddrModeRROperand"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } def t_addrmode_rrs4 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> { let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let DecoderMethod = "DecodeThumbAddrModeRR"; let PrintMethod = "printThumbAddrModeRROperand"; + let ParserMatchClass = t_addrmode_rr_asm_operand; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); - let ParserMatchClass = MemModeRegThumbAsmOperand; } // t_addrmode_is4 := reg + imm5 * 4 // +def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; } def t_addrmode_is4 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S4Operand"; + let ParserMatchClass = t_addrmode_is4_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_is2 := reg + imm5 * 2 // +def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; } def t_addrmode_is2 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S2Operand"; + let ParserMatchClass = t_addrmode_is2_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_is1 := reg + imm5 // +def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; } def t_addrmode_is1 : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> { let EncoderMethod = "getAddrModeISOpValue"; + let DecoderMethod = "DecodeThumbAddrModeIS"; let PrintMethod = "printThumbAddrModeImm5S1Operand"; + let ParserMatchClass = t_addrmode_is1_asm_operand; let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_sp := sp + imm8 * 4 // +// FIXME: This really shouldn't have an explicit SP operand at all. It should +// be implicit, just like in the instruction encoding itself. +def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; } def t_addrmode_sp : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { let EncoderMethod = "getAddrModeThumbSPOpValue"; + let DecoderMethod = "DecodeThumbAddrModeSP"; let PrintMethod = "printThumbAddrModeSPOperand"; + let ParserMatchClass = t_addrmode_sp_asm_operand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_pc := <label> => pc + imm8 * 4 // def t_addrmode_pc : Operand<i32> { let EncoderMethod = "getAddrModePCOpValue"; - let ParserMatchClass = MemModeImmThumbAsmOperand; + let DecoderMethod = "DecodeThumbAddrModePC"; } //===----------------------------------------------------------------------===// @@ -207,68 +241,52 @@ def tADJCALLSTACKDOWN : Requires<[IsThumb, IsThumb1Only]>; } -// T1Disassembly - A simple class to make encoding some disassembly patterns -// easier and less verbose. -class T1Disassembly<bits<2> op1, bits<8> op2> +class T1SystemEncoding<bits<8> opc> : T1Encoding<0b101111> { - let Inst{9-8} = op1; - let Inst{7-0} = op2; + let Inst{9-8} = 0b11; + let Inst{7-0} = opc; } -def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x00>; // A8.6.110 +def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>, + T1SystemEncoding<0x00>, // A8.6.110 + Requires<[IsThumb2]>; -def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x10>; // A8.6.410 +def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>, + T1SystemEncoding<0x10>; // A8.6.410 -def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x20>; // A8.6.408 +def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>, + T1SystemEncoding<0x20>; // A8.6.408 -def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x30>; // A8.6.409 +def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>, + T1SystemEncoding<0x30>; // A8.6.409 -def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b11, 0x40>; // A8.6.157 +def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>, + T1SystemEncoding<0x40>; // A8.6.157 -// The i32imm operand $val can be used by a debugger to store more information +// The imm operand $val can be used by a debugger to store more information // about the breakpoint. -def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", - [/* For disassembly only; pattern left blank */]>, - T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> { +def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", + []>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b10; // A8.6.22 bits<8> val; let Inst{7-0} = val; } -def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe", - [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101101> { - // A8.6.156 - let Inst{9-5} = 0b10010; - let Inst{4} = 1; - let Inst{3} = 1; // Big-Endian - let Inst{2-0} = 0b000; -} - -def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle", - [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101101> { +def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", + []>, T1Encoding<0b101101> { + bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; let Inst{4} = 1; - let Inst{3} = 0; // Little-Endian + let Inst{3} = end; let Inst{2-0} = 0b000; } // Change Processor State is a system instruction -- for disassembly only. def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), - NoItinerary, "cps$imod $iflags", - [/* For disassembly only; pattern left blank */]>, + NoItinerary, "cps$imod $iflags", []>, T1Misc<0b0110011> { // A8.6.38 & B6.1.1 bit imod; @@ -277,6 +295,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), let Inst{4} = imod; let Inst{3} = 0; let Inst{2-0} = iflags; + let DecoderMethod = "DecodeThumbCPS"; } // For both thumb1 and thumb2. @@ -290,70 +309,70 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", let Inst{2-0} = dst; } -// PC relative add (ADR). -def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs", []>, - T1Encoding<{1,0,1,0,0,?}> { - // A6.2 & A8.6.10 - bits<3> dst; - bits<8> rhs; - let Inst{10-8} = dst; - let Inst{7-0} = rhs; -} - // ADD <Rd>, sp, #<imm8> -// This is rematerializable, which is particularly useful for taking the -// address of locals. -let isReMaterializable = 1 in -def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs", []>, +// FIXME: This should not be marked as having side effects, and it should be +// rematerializable. Clearing the side effect bit causes miscompilations, +// probably because the instruction can be moved around. +def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), + IIC_iALUi, "add", "\t$dst, $sp, $imm", []>, T1Encoding<{1,0,1,0,1,?}> { // A6.2 & A8.6.8 bits<3> dst; - bits<8> rhs; + bits<8> imm; let Inst{10-8} = dst; - let Inst{7-0} = rhs; + let Inst{7-0} = imm; + let DecoderMethod = "DecodeThumbAddSpecialReg"; } // ADD sp, sp, #<imm7> -def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $rhs", []>, +def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), + IIC_iALUi, "add", "\t$Rdn, $imm", []>, T1Misc<{0,0,0,0,0,?,?}> { // A6.2.5 & A8.6.8 - bits<7> rhs; - let Inst{6-0} = rhs; + bits<7> imm; + let Inst{6-0} = imm; + let DecoderMethod = "DecodeThumbAddSPImm"; } // SUB sp, sp, #<imm7> // FIXME: The encoding and the ASM string don't match up. -def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "sub\t$dst, $rhs", []>, +def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), + IIC_iALUi, "sub", "\t$Rdn, $imm", []>, T1Misc<{0,0,0,0,1,?,?}> { // A6.2.5 & A8.6.214 - bits<7> rhs; - let Inst{6-0} = rhs; + bits<7> imm; + let Inst{6-0} = imm; + let DecoderMethod = "DecodeThumbAddSPImm"; } +// Can optionally specify SP as a three operand instruction. +def : tInstAlias<"add${p} sp, sp, $imm", + (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; +def : tInstAlias<"sub${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>; + // ADD <Rm>, sp -def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>, +def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr, + "add", "\t$Rdn, $sp, $Rn", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T1 - bits<4> dst; - let Inst{7} = dst{3}; + bits<4> Rdn; + let Inst{7} = Rdn{3}; let Inst{6-3} = 0b1101; - let Inst{2-0} = dst{2-0}; + let Inst{2-0} = Rdn{2-0}; + let DecoderMethod = "DecodeThumbAddSPReg"; } // ADD sp, <Rm> -def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>, +def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, + "add", "\t$Rdn, $Rm", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T2 - bits<4> dst; + bits<4> Rm; let Inst{7} = 1; - let Inst{6-3} = dst; + let Inst{6-3} = Rm; let Inst{2-0} = 0b101; + let DecoderMethod = "DecodeThumbAddSPReg"; } //===----------------------------------------------------------------------===// @@ -390,11 +409,12 @@ let isCall = 1, Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins t_bltarget:$func, variable_ops), IIC_Br, - "bl\t$func", + (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, + "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]> { - bits<21> func; + bits<22> func; + let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; let Inst{13} = 1; let Inst{11} = 1; @@ -403,8 +423,8 @@ let isCall = 1, // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br, - "blx\t$func", + (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, + "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]> { bits<21> func; @@ -416,8 +436,8 @@ let isCall = 1, } // Also used for Thumb2 - def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx\t$func", + def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, + "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; @@ -440,43 +460,22 @@ let isCall = 1, Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], Uses = [R7, SP] in { // Also used for Thumb2 - def tBLr9 : TIx2<0b11110, 0b11, 1, - (outs), (ins pred:$p, t_bltarget:$func, variable_ops), - IIC_Br, "bl${p}\t$func", - [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsDarwin]> { - bits<21> func; - let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; - let Inst{10-0} = func{10-0}; - } + def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), + 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], + (tBL pred:$p, t_bltarget:$func)>, + Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : TIx2<0b11110, 0b11, 0, - (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - IIC_Br, "blx${p}\t$func", - [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]> { - bits<21> func; - let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; - let Inst{10-1} = func{10-1}; - let Inst{0} = 0; // func{0} is assumed zero - } + def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), + 4, IIC_Br, [(ARMcall tglobaladdr:$func)], + (tBLXi pred:$p, t_blxtarget:$func)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 - def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, - "blx${p}\t$func", - [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]>, - T1Special<{1,1,1,?}> { - // A6.2.3 & A8.6.24 - bits<4> func; - let Inst{6-3} = func; - let Inst{2-0} = 0b000; - } + def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), + 2, IIC_Br, [(ARMtcall GPR:$func)], + (tBLXr pred:$p, GPR:$func)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // ARMv4T def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), @@ -487,8 +486,8 @@ let isCall = 1, let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in - def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]>, + def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br, + "b", "\t$target", [(br bb:$target)]>, T1Encoding<{1,1,1,0,0,?}> { bits<11> target; let Inst{10-0} = target; @@ -498,8 +497,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // Just a pseudo for a tBL instruction. Needed to let regalloc know about // the clobber of LR. let Defs = [LR] in - def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target), - 4, IIC_Br, [], (tBL t_bltarget:$target)>; + def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p), + 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>; def tBR_JTr : tPseudoInst<(outs), (ins tGPR:$target, i32imm:$jt, i32imm:$id), @@ -522,31 +521,6 @@ let isBranch = 1, isTerminator = 1 in let Inst{7-0} = target; } -// Compare and branch on zero / non-zero -let isBranch = 1, isTerminator = 1 in { - def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, - "cbz\t$Rn, $target", []>, - T1Misc<{0,0,?,1,?,?,?}> { - // A8.6.27 - bits<6> target; - bits<3> Rn; - let Inst{9} = target{5}; - let Inst{7-3} = target{4-0}; - let Inst{2-0} = Rn; - } - - def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br, - "cbnz\t$cmp, $target", []>, - T1Misc<{1,0,?,1,?,?,?}> { - // A8.6.27 - bits<6> target; - bits<3> Rn; - let Inst{9} = target{5}; - let Inst{7-3} = target{4-0}; - let Inst{2-0} = Rn; - } -} - // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. @@ -562,9 +536,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Non-Darwin versions (the difference is R9). let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], Uses = [SP] in { - def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops), + def tTAILJMPdND : tPseudoExpand<(outs), + (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], - (tB t_brtarget:$dst)>, + (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotDarwin]>; def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], @@ -574,11 +549,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } -// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only +// A8.6.218 Supervisor Call (Software Interrupt) // A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1111 then SEE SVC let isCall = 1, Uses = [SP] in -def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br, +def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, "svc", "\t$imm", []>, Encoding16 { bits<8> imm; let Inst{15-12} = 0b1101; @@ -653,17 +628,17 @@ defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, let AddedComplexity = 10 in def tLDRSB : // A8.6.80 - T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_1, IIC_iLoad_bh_r, - "ldrsb", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + "ldrsb", "\t$Rt, $addr", + [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>; let AddedComplexity = 10 in def tLDRSH : // A8.6.84 - T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_2, IIC_iLoad_bh_r, - "ldrsh", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; + "ldrsh", "\t$Rt, $addr", + [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, @@ -678,7 +653,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", ".n\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, @@ -736,42 +711,53 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, // Load / store multiple Instructions. // -multiclass thumb_ldst_mult<string asm, InstrItinClass itin, - InstrItinClass itin_upd, bits<6> T1Enc, - bit L_bit> { - def IA : - T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>, - T1Encoding<T1Enc> { - bits<3> Rn; - bits<8> regs; - let Inst{10-8} = Rn; - let Inst{7-0} = regs; - } - def IA_UPD : - T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>, - T1Encoding<T1Enc> { - bits<3> Rn; - bits<8> regs; - let Inst{10-8} = Rn; - let Inst{7-0} = regs; - } -} - // These require base address to be written back or one of the loaded regs. let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, - {1,1,0,0,1,?}, 1>; - +def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IIC_iLoad_m, "ldm${p}\t$Rn, $regs", []>, T1Encoding<{1,1,0,0,1,?}> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; +} + +// Writeback version is just a pseudo, as there's no encoding difference. +// Writeback happens iff the base register is not in the destination register +// list. +def tLDMIA_UPD : + InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain, + "$Rn = $wb", IIC_iLoad_mu>, + PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> { + let Size = 2; + let OutOperandList = (outs GPR:$wb); + let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops); + let Pattern = []; + let isCodeGenOnly = 1; + let isPseudo = 1; + list<Predicate> Predicates = [IsThumb]; +} + +// There is no non-writeback version of STM for Thumb. let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, - {1,1,0,0,0,?}, 0>; +def tSTMIA_UPD : Thumb1I<(outs GPR:$wb), + (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), + AddrModeNone, 2, IIC_iStore_mu, + "stm${p}\t$Rn!, $regs", "$Rn = $wb", []>, + T1Encoding<{1,1,0,0,0,?}> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; +} } // neverHasSideEffects +def : InstAlias<"ldm${p} $Rn!, $regs", + (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>, + Requires<[IsThumb, IsThumb1Only]>; + let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, @@ -876,7 +862,7 @@ def tADC : // A8.6.2 // Add immediate def tADDi3 : // A8.6.4 T1 - T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "add", "\t$Rd, $Rm, $imm3", [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { @@ -885,8 +871,8 @@ def tADDi3 : // A8.6.4 T1 } def tADDi8 : // A8.6.4 T2 - T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), - IIC_iALUi, + T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), + (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "add", "\t$Rdn, $imm8", [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>; @@ -920,10 +906,10 @@ def tAND : // A8.6.12 // ASR immediate def tASRri : // A8.6.14 - T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "asr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -962,7 +948,7 @@ def tCMNz : // A8.6.33 // CMP immediate let isCompare = 1, Defs = [CPSR] in { -def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi, +def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi, "cmp", "\t$Rn, $imm8", [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, T1General<{1,0,1,?,?}> { @@ -1003,7 +989,7 @@ def tEOR : // A8.6.45 // LSL immediate def tLSLri : // A8.6.88 - T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5), IIC_iMOVsi, "lsl", "\t$Rd, $Rm, $imm5", [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> { @@ -1020,10 +1006,10 @@ def tLSLrr : // A8.6.89 // LSR immediate def tLSRri : // A8.6.90 - T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "lsr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -1047,6 +1033,10 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, let Inst{10-8} = Rd; let Inst{7-0} = imm8; } +// Because we have an explicit tMOVSr below, we need an alias to handle +// the immediate "movs" form here. Blech. +def : tInstAlias <"movs $Rdn, $imm", + (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>; // A7-73: MOV(2) - mov setting flag. @@ -1077,10 +1067,19 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, // Multiply register let isCommutable = 1 in def tMUL : // A8.6.105 T1 - T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), - IIC_iMUL32, - "mul", "\t$Rdn, $Rm, $Rdn", - [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>; + Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2, + IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd", + [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>, + T1DataProcessing<0b1101> { + bits<3> Rd; + bits<3> Rn; + let Inst{5-3} = Rn; + let Inst{2-0} = Rd; + let AsmMatchConverter = "cvtThumbMultiply"; +} + +def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn, + pred:$p)>; // Move inverse register def tMVN : // A8.6.107 @@ -1132,6 +1131,9 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1142,7 +1144,7 @@ def tSBC : // A8.6.151 // Subtract immediate def tSUBi3 : // A8.6.210 T1 - T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "sub", "\t$Rd, $Rm, $imm3", [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> { @@ -1151,8 +1153,8 @@ def tSUBi3 : // A8.6.210 T1 } def tSUBi8 : // A8.6.210 T2 - T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), - IIC_iALUi, + T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), + (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "sub", "\t$Rdn, $imm8", [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>; @@ -1163,8 +1165,6 @@ def tSUBrr : // A8.6.212 "sub", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>; -// TODO: A7-96: STMIA - store multiple. - // Sign-extend byte def tSXTB : // A8.6.222 T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1216,12 +1216,13 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. // assembler. def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>, + IIC_iALUi, "adr{$p}\t$Rd, $addr", []>, T1Encoding<{1,0,1,0,0,?}> { bits<3> Rd; bits<8> addr; let Inst{10-8} = Rd; let Inst{7-0} = addr; + let DecoderMethod = "DecodeThumbAddSpecialReg"; } let neverHasSideEffects = 1, isReMaterializable = 1 in @@ -1361,6 +1362,31 @@ def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; +def : T1Pat<(atomic_load_8 t_addrmode_is1:$src), + (tLDRBi t_addrmode_is1:$src)>; +def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src), + (tLDRBr t_addrmode_rrs1:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_is2:$src), + (tLDRHi t_addrmode_is2:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src), + (tLDRHr t_addrmode_rrs2:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_is4:$src), + (tLDRi t_addrmode_is4:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src), + (tLDRr t_addrmode_rrs4:$src)>; +def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val), + (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>; +def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val), + (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val), + (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val), + (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val), + (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val), + (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>; + // Large immediate handling. // Two piece imms. @@ -1395,3 +1421,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2, IIC_Br, [(brind GPR:$Rm)], (tMOVr PC, GPR:$Rm, pred:$p)>; } + + +// In Thumb1, "nop" is encoded as a "mov r8, r8". Technically, the bf00 +// encoding is available on ARMv6K, but we don't differentiate that finely. +def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; + + +// For round-trip assembly/disassembly, we have to handle a CPS instruction +// without any iflags. That's not, strictly speaking, valid syntax, but it's +// a useful extention and assembles to defined behaviour (the insn does +// nothing). +def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; +def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index c2c6cbc..471ec29 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -12,13 +12,32 @@ //===----------------------------------------------------------------------===// // IT block predicate field +def it_pred_asmoperand : AsmOperandClass { + let Name = "ITCondCode"; + let ParserMethod = "parseITCondCode"; +} def it_pred : Operand<i32> { let PrintMethod = "printMandatoryPredicateOperand"; + let ParserMatchClass = it_pred_asmoperand; } // IT block condition mask +def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; } def it_mask : Operand<i32> { let PrintMethod = "printThumbITMask"; + let ParserMatchClass = it_mask_asmoperand; +} + +// t2_shift_imm: An integer that encodes a shift amount and the type of shift +// (asr or lsl). The 6-bit immediate encodes as: +// {5} 0 ==> lsl +// 1 asr +// {4-0} imm5 shift amount. +// asr #32 not allowed +def t2_shift_imm : Operand<i32> { + let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterImmAsmOperand; + let DecoderMethod = "DecodeT2ShifterImmOperand"; } // Shifted operands. No register controlled shifts for Thumb2. @@ -28,6 +47,8 @@ def t2_so_reg : Operand<i32>, // reg imm [shl,srl,sra,rotr]> { let EncoderMethod = "getT2SORegOpValue"; let PrintMethod = "printT2SOOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; let MIOperandInfo = (ops rGPR, i32imm); } @@ -50,6 +71,7 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ }]> { let ParserMatchClass = t2_so_imm_asmoperand; let EncoderMethod = "getT2SOImmOpValue"; + let DecoderMethod = "DecodeT2SOImm"; } // t2_so_imm_not - Match an immediate that is a complement @@ -65,11 +87,6 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; -/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. -def imm1_31 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 32; -}]>; - /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ @@ -96,17 +113,20 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 +def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} def t2addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { let PrintMethod = "printAddrModeImm12Operand"; let EncoderMethod = "getAddrModeImm12OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm12"; + let ParserMatchClass = t2addrmode_imm12_asmoperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } // t2ldrlabel := imm12 def t2ldrlabel : Operand<i32> { let EncoderMethod = "getAddrModeImm12OpValue"; + let PrintMethod = "printT2LdrLabelOperand"; } @@ -116,13 +136,36 @@ def t2adrlabel : Operand<i32> { } +// t2addrmode_posimm8 := reg + imm8 +def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} +def t2addrmode_posimm8 : Operand<i32> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemPosImm8OffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +// t2addrmode_negimm8 := reg - imm8 +def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} +def t2addrmode_negimm8 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemNegImm8OffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + // t2addrmode_imm8 := reg +/- imm8 +def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; let EncoderMethod = "getT2AddrModeImm8OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8"; + let ParserMatchClass = MemImm8OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } def t2am_imm8_offset : Operand<i32>, @@ -130,38 +173,61 @@ def t2am_imm8_offset : Operand<i32>, [], [SDNPWantRoot]> { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let EncoderMethod = "getT2AddrModeImm8OffsetOpValue"; - let ParserMatchClass = MemMode5AsmOperand; + let DecoderMethod = "DecodeT2Imm8"; } // t2addrmode_imm8s4 := reg +/- (imm8 << 2) +def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} def t2addrmode_imm8s4 : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4Operand"; let EncoderMethod = "getT2AddrModeImm8s4OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm8s4"; + let ParserMatchClass = MemImm8s4OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } +def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } def t2am_imm8s4_offset : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; + let EncoderMethod = "getT2Imm8s4OpValue"; + let DecoderMethod = "DecodeT2Imm8S4"; +} + +// t2addrmode_imm0_1020s4 := reg + (imm8 << 2) +def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { + let Name = "MemImm0_1020s4Offset"; +} +def t2addrmode_imm0_1020s4 : Operand<i32> { + let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; + let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; + let DecoderMethod = "DecodeT2AddrModeImm0_1020s4"; + let ParserMatchClass = MemImm0_1020s4OffsetAsmOperand; + let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm); } // t2addrmode_so_reg := reg + (reg << imm2) +def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";} def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; let EncoderMethod = "getT2AddrModeSORegOpValue"; + let DecoderMethod = "DecodeT2AddrModeSOReg"; + let ParserMatchClass = t2addrmode_so_reg_asmoperand; let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); - let ParserMatchClass = MemMode5AsmOperand; } -// t2addrmode_reg := reg -// Used by load/store exclusive instructions. Useful to enable right assembly -// parsing and printing. Not used for any codegen matching. -// -def t2addrmode_reg : Operand<i32> { - let PrintMethod = "printAddrMode7Operand"; - let MIOperandInfo = (ops GPR); - let ParserMatchClass = MemMode7AsmOperand; +// Addresses for the TBB/TBH instructions. +def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; } +def addrmode_tbb : Operand<i32> { + let PrintMethod = "printAddrModeTBB"; + let ParserMatchClass = addrmode_tbb_asmoperand; + let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); +} +def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; } +def addrmode_tbh : Operand<i32> { + let PrintMethod = "printAddrModeTBH"; + let ParserMatchClass = addrmode_tbh_asmoperand; + let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm); } //===----------------------------------------------------------------------===// @@ -419,47 +485,6 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, } -/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -/// unary operation that produces a value. These are predicable and can be -/// changed to modify CPSR. -multiclass T2I_un_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { - // shifted imm - def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, - opc, "\t$Rd, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { - let isAsCheapAsAMove = Cheap; - let isReMaterializable = ReMat; - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; - } - // register - def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, - opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } - // shifted register - def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, - opc, ".w\t$Rd, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{19-16} = 0b1111; // Rn - } -} - /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. @@ -500,21 +525,18 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, } // Assembly aliases for optional destination operand when it's the same // as the source operand. - def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; - def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; - def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsThumb2]>; + cc_out:$s)>; } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need @@ -522,7 +544,27 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, string baseOpc, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">; + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + // Assembler aliases w/o the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>; + + // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>; +} /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen @@ -563,45 +605,28 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { /// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the /// instruction modifies the CPSR register. -let isCodeGenOnly = 1, Defs = [CPSR] in { +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { multiclass T2I_bin_s_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2TwoRegImm< + def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2ThreeReg< + def rr : T2sThreeReg< (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { - let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>; // shifted register - def rs : T2TwoRegShiftedReg< + def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } + opc, ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -614,9 +639,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, // in particular for taking the address of a local. let isReMaterializable = 1 in { def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { + (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -626,9 +651,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } // 12-bit imm def ri12 : T2I< - (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, + (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -644,9 +669,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{7-0} = imm{7-0}; } // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { + def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), + IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -658,9 +683,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } // shifted register def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), + (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -671,13 +696,13 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns /// for a binary operation that produces a value and use the carry /// bit. It's not predicable. -let Uses = [CPSR] in { +let Defs = [CPSR], Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -687,7 +712,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, // register def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -701,7 +726,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, + [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -710,64 +735,35 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } } -// Carry setting variants -// NOTE: CPSR def omitted because it will be handled by the custom inserter. -let usesCustomInserter = 1 in { -multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { - // shifted imm - def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), - 4, IIC_iALUi, - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; - // register - def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - 4, IIC_iALUr, - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { - let isCommutable = Commutable; - } - // shifted register - def rs : t2PseudoInst< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - 4, IIC_iALUsi, - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; -} -} - /// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register /// version is not needed since this is only for codegen. -let isCodeGenOnly = 1, Defs = [CPSR] in { +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2TwoRegImm< + def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>; // shifted register - def rs : T2TwoRegShiftedReg< + def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } + IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>; } } /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { +multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, + string baseOpc> { // 5-bit imm def ri : T2sTwoRegShiftImm< - (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi, + (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, opc, ".w\t$Rd, $Rm, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn @@ -784,20 +780,50 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { let Inst{15-12} = 0b1111; let Inst{7-4} = 0b0000; } + + // Optional destination register + def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + + // Assembler aliases w/o the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; + + // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + ty:$imm, pred:$p, + cc_out:$s)>; + def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>; } /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let isCompare = 1, Defs = [CPSR] in { multiclass T2I_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode> { + PatFrag opnode, string baseOpc> { +let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< - (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii, + (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii, opc, ".w\t$Rn, $imm", - [(opnode GPR:$Rn, t2_so_imm:$imm)]> { + [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -807,9 +833,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, } // register def rr : T2TwoRegCmp< - (outs), (ins GPR:$lhs, rGPR:$rhs), iir, - opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, rGPR:$rhs)]> { + (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir, + opc, ".w\t$Rn, $Rm", + [(opnode GPRnopc:$Rn, rGPR:$Rm)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -821,9 +847,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, } // shifted register def rs : T2OneRegCmpShiftedReg< - (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, + (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rn, $ShiftedRm", - [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> { + [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -831,55 +857,60 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, let Inst{11-8} = 0b1111; // Rd } } + + // Assembler aliases w/o the ".w" suffix. + // No alias here for 'rr' version as not all instantiations of this + // multiclass want one (CMP in particular, does not). + def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn, + t2_so_imm:$imm, pred:$p)>; + def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn, + t2_so_reg:$shift, + pred:$p)>; } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. multiclass T2I_ld<bit signed, bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { - def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii, + InstrItinClass iii, InstrItinClass iis, RegisterClass target, + PatFrag opnode> { + def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> { - let Inst{31-27} = 0b11111; - let Inst{26-25} = 0b00; + [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{31-25} = 0b1111100; let Inst{24} = signed; let Inst{23} = 1; let Inst{22-21} = opcod; let Inst{20} = 1; // load - - bits<4> Rt; - let Inst{15-12} = Rt; - - bits<17> addr; - let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn - let Inst{23} = addr{12}; // U + let Inst{15-12} = Rt; let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii, + def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> { + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = opcod; let Inst{20} = 1; // load + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; let Inst{11} = 1; // Offset: index==TRUE, wback==FALSE let Inst{10} = 1; // The P bit. - let Inst{8} = 0; // The W bit. - - bits<4> Rt; - let Inst{15-12} = Rt; - - bits<13> addr; - let Inst{19-16} = addr{12-9}; // Rn let Inst{9} = addr{8}; // U + let Inst{8} = 0; // The W bit. let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -895,12 +926,14 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{19-16} = addr{9-6}; // Rn let Inst{3-0} = addr{5-2}; // Rm let Inst{5-4} = addr{1-0}; // imm + + let DecoderMethod = "DecodeT2LoadShift"; } // FIXME: Is the pci variant actually needed? - def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii, + def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", - [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -918,10 +951,11 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. multiclass T2I_st<bits<2> opcod, string opc, - InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { - def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii, + InstrItinClass iii, InstrItinClass iis, RegisterClass target, + PatFrag opnode> { + def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> { + [(opnode target:$Rt, t2addrmode_imm12:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; @@ -936,9 +970,9 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii, + def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> { + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -956,9 +990,9 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{9} = addr{8}; // U let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis, + def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> { + [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -977,146 +1011,81 @@ multiclass T2I_st<bits<2> opcod, string opc, /// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, ".w\t$Rd, $Rm, ror $rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, + opc, ".w\t$Rd, $Rm$rot", + [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[IsThumb2]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, "\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot", - [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm$rot", + [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. -multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, "\t$Rd, $Rm", []>, +class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> + : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, + opc, "\t$Rd, $Rm$rot", []>, Requires<[IsThumb2, HasT2ExtractPack]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr, - opc, "\t$Rd, $Rm, ror $rot", []>, - Requires<[IsThumb2, HasT2ExtractPack]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } /// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, - opc, "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def rr_rot : T2ThreeReg<(outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", - [(set rGPR:$Rd, (opnode rGPR:$Rn, - (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> + : T2ThreeReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", + [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } -// DO variant - disassembly only, no pattern - -multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> { - def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, - opc, "\t$Rd, $Rn, $Rm", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = 0b00; // rotate - } - def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate - } +class T2I_exta_rrot_np<bits<3> opcod, string opc> + : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []> { + bits<2> rot; + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = rot; } //===----------------------------------------------------------------------===// @@ -1143,7 +1112,7 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin, // assembler. def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), (ins t2adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> { + IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -1160,6 +1129,8 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let Inst{26} = addr{11}; let Inst{14-12} = addr{10-8}; let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2Adr"; } let neverHasSideEffects = 1, isReMaterializable = 1 in @@ -1177,33 +1148,33 @@ def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, UnOpFrag<(load node:$Src)>>; // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(zextloadi16 node:$Src)>>; + rGPR, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(zextloadi8 node:$Src)>>; + rGPR, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(sextloadi16 node:$Src)>>; + rGPR, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - UnOpFrag<(sextloadi8 node:$Src)>>; + rGPR, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>; + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(zextloadi1 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)), @@ -1214,8 +1185,8 @@ def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)), // earlier? def : T2Pat<(extloadi1 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi1 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi1 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)), @@ -1223,8 +1194,8 @@ def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)), def : T2Pat<(extloadi8 t2addrmode_imm12:$addr), (t2LDRBi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi8 t2addrmode_imm8:$addr), - (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi8 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr), (t2LDRBs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)), @@ -1232,8 +1203,8 @@ def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)), def : T2Pat<(extloadi16 t2addrmode_imm12:$addr), (t2LDRHi12 t2addrmode_imm12:$addr)>; -def : T2Pat<(extloadi16 t2addrmode_imm8:$addr), - (t2LDRHi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi16 t2addrmode_negimm8:$addr), + (t2LDRHi8 t2addrmode_negimm8:$addr)>; def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), (t2LDRHs t2addrmode_so_reg:$addr)>; def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), @@ -1247,83 +1218,86 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), // Indexed loads let mayLoad = 1, neverHasSideEffects = 1 in { -def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn), +def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} -def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, - "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; +def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, + "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; -def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), +def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn", - []>; - -def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), + "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + +def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn", - []>; -def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), - (ins GPR:$base, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn", - []>; + "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", + []> { + let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; +} +def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; } // mayLoad = 1, neverHasSideEffects = 1 -// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are -// for disassembly only. +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, "\t$Rt, $addr", []> { + bits<4> Rt; + bits<13> addr; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 0; let Inst{22-21} = type; let Inst{20} = 1; // load + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt; let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. - - bits<4> Rt; - bits<13> addr; - let Inst{15-12} = Rt; - let Inst{19-16} = addr{12-9}; - let Inst{7-0} = addr{7-0}; + let Inst{7-0} = addr{7-0}; } def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; @@ -1333,67 +1307,97 @@ def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; // Store -defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, BinOpFrag<(store node:$LHS, node:$RHS)>>; defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si, - BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + rGPR, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, - BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>; + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores -def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "str", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iStore_iu, - "str", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + "str", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} +def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "strh", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; - -def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), - AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strh", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strh", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} -def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn, $addr]!", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strb", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; +} -def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), - (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), +def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iStore_iu, + "str", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_store rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn], $addr", - "$Rn = $base_wb,@earlyclobber $base_wb", - [(set GPR:$base_wb, - (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; + "strh", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, + "strb", "\t$Rt, $Rn$offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPRnopc:$Rn_wb, + (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn, + t2am_imm8_offset:$offset))]>; + +// Pseudo-instructions for pattern matching the pre-indexed stores. We can't +// put the patterns on the instruction definitions directly as ISel wants +// the address base and offset to be separate operands, not a single +// complex operand like we represent the instructions themselves. The +// pseudos map between the two. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { +def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), + (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPRnopc:$Rn_wb, + (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; +} + // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly // only. @@ -1424,21 +1428,31 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), + (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { + let AsmMatchConverter = "cvtT2LdrdPre"; + let DecoderMethod = "DecodeT2LDRDPreInstruction"; +} -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; +def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), + (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), + IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", + "$addr.base = $wb", []>; -def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), - IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; +def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $wb", []> { + let AsmMatchConverter = "cvtT2StrdPre"; + let DecoderMethod = "DecodeT2STRDPreInstruction"; +} -def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), - IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; +def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, + t2am_imm8s4_offset:$imm), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", + "$addr.base = $wb", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. @@ -1463,9 +1477,9 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{11-0} = addr{11-0}; // imm12 } - def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc, + def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1496,6 +1510,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{19-16} = addr{9-6}; // Rn let Inst{3-0} = addr{5-2}; // Rm let Inst{5-4} = addr{1-0}; // imm2 + + let DecoderMethod = "DecodeT2LoadShift"; } } @@ -1507,11 +1523,11 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; // Load / store multiple Instructions. // -multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, +multiclass thumb2_ld_mult<string asm, InstrItinClass itin, InstrItinClass itin_upd, bit L_bit> { def IA : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> { + itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { bits<4> Rn; bits<16> regs; @@ -1522,11 +1538,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 0; // No writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def IA_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { bits<4> Rn; bits<16> regs; @@ -1537,11 +1554,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def DB : T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> { + itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { bits<4> Rn; bits<16> regs; @@ -1552,11 +1570,12 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 0; // No writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } def DB_UPD : T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), - itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> { + itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { bits<4> Rn; bits<16> regs; @@ -1567,17 +1586,95 @@ multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; let Inst{19-16} = Rn; - let Inst{15-0} = regs; + let Inst{15} = 0; + let Inst{14-0} = regs{14-0}; } } let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; +defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; + +multiclass thumb2_st_mult<string asm, InstrItinClass itin, + InstrItinClass itin_upd, bit L_bit> { + def IA : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "${p}.w\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def IA_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def DB : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "db${p}\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } + def DB_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14} = regs{14}; + let Inst{13} = 0; + let Inst{12-0} = regs{12-0}; + } +} + let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; +defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; } // neverHasSideEffects @@ -1587,7 +1684,7 @@ defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; // let neverHasSideEffects = 1 in -def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, +def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -1596,6 +1693,10 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } +def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, + pred:$p, CPSR)>; +def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, + pred:$p, CPSR)>; // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, @@ -1610,12 +1711,20 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, - pred:$p, cc_out:$s)>, - Requires<[IsThumb2]>; +// cc_out is handled as part of the explicit mnemonic in the parser for 'mov'. +// Use aliases to get that to play nice here. +def : t2InstAlias<"movs${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, CPSR)>; +def : t2InstAlias<"movs${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, CPSR)>; + +def : t2InstAlias<"mov${p}.w $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, zero_reg)>; +def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, zero_reg)>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, +def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", [(set rGPR:$Rd, imm0_65535:$imm)]> { let Inst{31-27} = 0b11110; @@ -1632,6 +1741,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, let Inst{26} = imm{11}; let Inst{14-12} = imm{10-8}; let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2MOVTWInstruction"; } def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -1639,7 +1749,7 @@ def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), let Constraints = "$src = $Rd" in { def t2MOVTi16 : T2I<(outs rGPR:$Rd), - (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi, + (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi, "movt", "\t$Rd, $imm", [(set rGPR:$Rd, (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { @@ -1657,6 +1767,7 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), let Inst{26} = imm{11}; let Inst{14-12} = imm{10-8}; let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2MOVTWInstruction"; } def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -1671,28 +1782,26 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -defm t2SXTB : T2I_ext_rrot<0b100, "sxtb", +def t2SXTB : T2I_ext_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm t2SXTH : T2I_ext_rrot<0b000, "sxth", +def t2SXTH : T2I_ext_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; +def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; -defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab", +def t2SXTAB : T2I_exta_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah", +def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">; - -// TODO: SXT(A){B|H}16 - done for disassembly only +def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; // Zero extenders let AddedComplexity = 16 in { -defm t2UXTB : T2I_ext_rrot<0b101, "uxtb", +def t2UXTB : T2I_ext_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm t2UXTH : T2I_ext_rrot<0b001, "uxth", +def t2UXTH : T2I_ext_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", +def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -1700,17 +1809,17 @@ defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", // instead so we can include a check for masking back in the upper // eight bits of the source into the lower eight bits of the result. //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), -// (t2UXTB16r_rot rGPR:$Src, 24)>, +// (t2UXTB16 rGPR:$Src, 3)>, // Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot rGPR:$Src, 8)>, + (t2UXTB16 rGPR:$Src, 1)>, Requires<[HasT2ExtractPack, IsThumb2]>; -defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab", +def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah", +def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">; +def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1723,27 +1832,37 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. +// +// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by +// AdjustInstrPostInstrSelection where we determine whether or not to +// set the "s" bit based on CPSR liveness. +// +// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen +// support for an optional CPSR definition that corresponds to the DAG +// node's second value. We can then eliminate the implicit def of CPSR. defm t2ADDS : T2I_bin_s_irs <0b1000, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsi, - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +let hasPostISelHook = 1 in { defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", - BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", - BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, - node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, - node:$RHS)>>; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; +} // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +// FIXME: Eliminate them if we can write def : Pat patterns which defines +// CPSR and the implicit def of CPSR is not needed. defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", - BinOpFrag<(subc node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -1760,23 +1879,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; let AddedComplexity = 1 in -def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm), +def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), +def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), (t2SBCri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; -let AddedComplexity = 1 in -def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), - (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), - (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -1893,8 +2007,7 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, let Inst{7-4} = op7_4; } -// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only - +// Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, @@ -1906,8 +2019,7 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsThumb2, HasThumb2DSP]>; -// Signed/Unsigned saturate -- for disassembly only - +// Signed/Unsigned saturate. class T2SatI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { @@ -1918,26 +2030,26 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rd; let Inst{19-16} = Rn; - let Inst{4-0} = sat_imm{4-0}; - let Inst{21} = sh{6}; + let Inst{4-0} = sat_imm; + let Inst{21} = sh{5}; let Inst{14-12} = sh{4-2}; let Inst{7-6} = sh{1-0}; } def t2SSAT: T2SatI< - (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), - NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", - [/* For disassembly only; pattern left blank */]> { + (outs rGPR:$Rd), + (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; let Inst{15} = 0; + let Inst{5} = 0; } def t2SSAT16: T2SatI< - (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary, - "ssat16", "\t$Rd, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]>, + (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary, + "ssat16", "\t$Rd, $sat_imm, $Rn", []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; @@ -1946,30 +2058,30 @@ def t2SSAT16: T2SatI< let Inst{21} = 1; // sh = '1' let Inst{14-12} = 0b000; // imm3 = '000' let Inst{7-6} = 0b00; // imm2 = '00' + let Inst{5-4} = 0b00; } def t2USAT: T2SatI< - (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), - NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", - [/* For disassembly only; pattern left blank */]> { + (outs rGPR:$Rd), + (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; let Inst{15} = 0; } -def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), +def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), NoItinerary, - "usat16", "\t$dst, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]>, + "usat16", "\t$Rd, $sat_imm, $Rn", []>, Requires<[IsThumb2, HasThumb2DSP]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; + let Inst{31-22} = 0b1111001110; let Inst{20} = 0; let Inst{15} = 0; let Inst{21} = 1; // sh = '1' let Inst{14-12} = 0b000; // imm3 = '000' let Inst{7-6} = 0b00; // imm2 = '00' + let Inst{5-4} = 0b00; } def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; @@ -1979,10 +2091,14 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>; -defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; -defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; -defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, + BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">; +defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, + BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">; +defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, + BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">; +defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, + BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), @@ -2090,7 +2206,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), } def t2SBFX: T2TwoRegBitFI< - (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -2099,7 +2215,7 @@ def t2SBFX: T2TwoRegBitFI< } def t2UBFX: T2TwoRegBitFI< - (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -2125,26 +2241,6 @@ let Constraints = "$src = $Rd" in { let msb{4-0} = imm{9-5}; let lsb{4-0} = imm{4-0}; } - - // GNU as only supports this form of bfi (w/ 4 arguments) - let isAsmParserOnly = 1 in - def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd), - (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit, - width_imm:$width), - IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", - []> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; // should be 0. - let Inst{25} = 1; - let Inst{24-20} = 0b10110; - let Inst{15} = 0; - let Inst{5} = 0; // should be 0. - - bits<5> lsbit; - bits<5> width; - let msb{4-0} = width; // Custom encoder => lsb+width-1 - let lsb{4-0} = lsbit; - } } defm t2ORN : T2I_bin_irs<0b0011, "orn", @@ -2152,13 +2248,53 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>, "t2ORN", 0, "">; +/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a +/// unary operation that produces a value. These are predicable and can be +/// changed to modify CPSR. +multiclass T2I_un_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { + // shifted imm + def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, + opc, "\t$Rd, $imm", + [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { + let isAsCheapAsAMove = Cheap; + let isReMaterializable = ReMat; + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; + } + // register + def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, + opc, ".w\t$Rd, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, + opc, ".w\t$Rd, $ShiftedRm", + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{19-16} = 0b1111; // Rn + } +} + // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, UnOpFrag<(not node:$Src)>, 1, 1>; - let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; @@ -2209,9 +2345,9 @@ def t2MLS: T2FourReg< let neverHasSideEffects = 1 in { let isCommutable = 1 in { def t2SMULL : T2MulLong<0b000, 0b0000, - (outs rGPR:$Rd, rGPR:$Ra), + (outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - "smull", "\t$Rd, $Ra, $Rn, $Rm", []>; + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>; def t2UMULL : T2MulLong<0b010, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -2468,7 +2604,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only +// Halfword multiple accumulate long: SMLAL<x><y> def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, @@ -2487,8 +2623,6 @@ def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), Requires<[IsThumb2, HasThumb2DSP]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -// These are for disassembly only. - def t2SMUAD: T2ThreeReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>, @@ -2513,7 +2647,7 @@ def t2SMUSDX:T2ThreeReg_mac< Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2ThreeReg_mac< +def t2SMLAD : T2FourReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", "\t$Rd, $Rn, $Rm, $Ra", []>, @@ -2532,20 +2666,20 @@ def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), - (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld", - "\t$Ra, $Rd, $Rm, $Rn", []>, + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld", + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", - "\t$Ra, $Rd, $Rm, $Rn", []>, + "\t$Ra, $Rd, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; //===----------------------------------------------------------------------===// @@ -2613,10 +2747,10 @@ def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), (t2REVSH rGPR:$Rm)>; def t2PKHBT : T2ThreeReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_lsl_amt:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), - (and (shl rGPR:$Rm, lsl_amt:$sh), + (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; @@ -2625,9 +2759,9 @@ def t2PKHBT : T2ThreeReg< let Inst{5} = 0; // BT form let Inst{4} = 0; - bits<8> sh; - let Inst{14-12} = sh{7-5}; - let Inst{7-6} = sh{4-3}; + bits<5> sh; + let Inst{14-12} = sh{4-2}; + let Inst{7-6} = sh{1-0}; } // Alternate cases for PKHBT where identities eliminate some nodes. @@ -2635,16 +2769,16 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), - (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, + (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. def t2PKHTB : T2ThreeReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, pkh_asr_amt:$sh), IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), - (and (sra rGPR:$Rm, asr_amt:$sh), + (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; @@ -2653,19 +2787,19 @@ def t2PKHTB : T2ThreeReg< let Inst{5} = 1; // TB form let Inst{4} = 0; - bits<8> sh; - let Inst{14-12} = sh{7-5}; - let Inst{7-6} = sh{4-3}; + bits<5> sh; + let Inst{14-12} = sh{4-2}; + let Inst{7-6} = sh{1-0}; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), - (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, + (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), - (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, + (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; //===----------------------------------------------------------------------===// @@ -2673,14 +2807,14 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">; -def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm), - (t2CMPri GPR:$lhs, t2_so_imm:$imm)>; -def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs), - (t2CMPrr GPR:$lhs, rGPR:$rhs)>; -def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs), - (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), + (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs), + (t2CMPrr GPRnopc:$lhs, rGPR:$rhs)>; +def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs), + (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>; //FIXME: Disable CMN, as CCodes are backwards from compare expectations // Compare-to-zero still works out, just not the relationals @@ -2688,20 +2822,23 @@ def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs), // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; + BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>, + "t2CMNz">; //def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), // (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), - (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), + (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>; defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, + "t2TST">; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, + "t2TEQ">; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use @@ -2723,7 +2860,7 @@ def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), // FIXME: Pseudo-ize these. For now, just mark codegen only. let isCodeGenOnly = 1 in { let isMoveImm = 1 in -def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm), +def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm), IIC_iCMOVi, "movw", "\t$Rd, $imm", []>, RegConstraint<"$false = $Rd"> { @@ -2807,20 +2944,19 @@ def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, } def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dsb", "\t$opt", - [/* For disassembly only; pattern left blank */]>, + "dsb", "\t$opt", []>, Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } -// ISB has only full system option -- for disassembly only -def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasV7]> { +def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, + "isb", "\t$opt", + []>, Requires<[IsThumb2, HasDB]> { + bits<4> opt; let Inst{31-4} = 0xf3bf8f6; - let Inst{3-0} = 0b1111; + let Inst{3-0} = opt; } class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, @@ -2858,28 +2994,27 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", []> { + bits<4> Rt; + bits<12> addr; let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; - let Inst{11-8} = 0b1111; - let Inst{7-0} = 0b00000000; // imm8 = 0 - - bits<4> Rt; - bits<4> addr; - let Inst{19-16} = addr; + let Inst{19-16} = addr{11-8}; let Inst{15-12} = Rt; + let Inst{11-8} = 0b1111; + let Inst{7-0} = addr{7-0}; } let hasExtraDefRegAllocReq = 1 in def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), - (ins t2addrmode_reg:$addr), + (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { @@ -2890,33 +3025,33 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), - (ins rGPR:$Rt, t2addrmode_reg:$addr), + (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", []>; def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), - (ins rGPR:$Rt, t2addrmode_reg:$addr), + (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, + t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", []> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000100; - let Inst{7-0} = 0b00000000; // imm8 = 0 - bits<4> Rd; - bits<4> addr; bits<4> Rt; - let Inst{11-8} = Rd; - let Inst{19-16} = addr; + bits<12> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000100; + let Inst{19-16} = addr{11-8}; let Inst{15-12} = Rt; + let Inst{11-8} = Rd; + let Inst{7-0} = addr{7-0}; } } let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), - (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { @@ -2924,9 +3059,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), let Inst{11-8} = Rt2; } -// Clear-Exclusive is for disassembly only. -def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", - [/* For disassembly only; pattern left blank */]>, +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; @@ -2986,8 +3119,8 @@ def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in -def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br, - "b.w\t$target", +def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, + "b", ".w\t$target", [(br bb:$target)]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; @@ -3009,15 +3142,13 @@ def t2BR_JT : t2PseudoInst<(outs), // FIXME: Add a non-pc based case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), - 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), - 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; -def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, - "tbb", "\t[$Rn, $Rm]", []> { +def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, + "tbb", "\t$addr", []> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3025,10 +3156,12 @@ def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, let Inst{15-5} = 0b11110000000; let Inst{4} = 0; // B form let Inst{3-0} = Rm; + + let DecoderMethod = "DecodeThumbTableBranch"; } -def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, - "tbh", "\t[$Rn, $Rm, lsl #1]", []> { +def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, + "tbh", "\t$addr", []> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3036,13 +3169,15 @@ def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, let Inst{15-5} = 0b11110000000; let Inst{4} = 1; // H form let Inst{3-0} = Rm; + + let DecoderMethod = "DecodeThumbTableBranch"; } } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier // FIXME: should be able to write a pattern for ARMBrcond, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects ", "two operands. :( let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, "b", ".w\t$target", @@ -3060,6 +3195,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{13} = target{18}; let Inst{21-16} = target{17-12}; let Inst{10-0} = target{11-1}; + + let DecoderMethod = "DecodeThumb2BCCInstruction"; } // Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so @@ -3068,9 +3205,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin version. let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], Uses = [SP] in - def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops), + def tTAILJMPd: tPseudoExpand<(outs), + (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], - (t2B uncondbrtarget:$dst)>, + (t2B uncondbrtarget:$dst, pred:$p)>, Requires<[IsThumb2, IsDarwin]>; } @@ -3087,30 +3225,55 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), bits<4> mask; let Inst{7-4} = cc; let Inst{3-0} = mask; + + let DecoderMethod = "DecodeIT"; } // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", - [/* For disassembly only; pattern left blank */]> { +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> { + bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; let Inst{25-20} = 0b111100; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - - bits<4> func; let Inst{19-16} = func; + let Inst{15-0} = 0b1000111100000000; +} + +// Compare and branch on zero / non-zero +let isBranch = 1, isTerminator = 1 in { + def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + "cbz\t$Rn, $target", []>, + T1Misc<{0,0,?,1,?,?,?}>, + Requires<[IsThumb2]> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } + + def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + "cbnz\t$Rn, $target", []>, + T1Misc<{1,0,?,1,?,?,?}>, + Requires<[IsThumb2]> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } } -// Change Processor State is a system instruction -- for disassembly and -// parsing only. + +// Change Processor State is a system instruction. // FIXME: Since the asm parser has currently no clean way to handle optional // operands, create 3 versions of the same instruction. Once there's a clean // framework to represent optional operands, change this behavior. class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, - !strconcat("cps", asm_op), - [/* For disassembly only; pattern left blank */]> { + !strconcat("cps", asm_op), []> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -3126,6 +3289,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, let Inst{8} = M; let Inst{7-5} = iflags; let Inst{4-0} = mode; + let DecoderMethod = "DecodeT2CPSInstruction"; } let M = 1 in @@ -3135,14 +3299,12 @@ let mode = 0, M = 0 in def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod.w\t$iflags">; let imod = 0, iflags = 0, M = 1 in - def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">; + def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -// Helper class for disassembly only. class T2I_hint<bits<8> op7_0, string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]> { + : T2I<(outs), (ins), NoItinerary, opc, asm, []> { let Inst{31-20} = 0xf3a; let Inst{19-16} = 0b1111; let Inst{15-14} = 0b10; @@ -3158,20 +3320,17 @@ def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; def t2SEV : T2I_hint<0b00000100, "sev", ".w">; def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { - let Inst{31-20} = 0xf3a; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - let Inst{10-8} = 0b000; - let Inst{7-4} = 0b1111; - bits<4> opt; + let Inst{31-20} = 0b111100111010; + let Inst{19-16} = 0b1111; + let Inst{15-8} = 0b10000000; + let Inst{7-4} = 0b1111; let Inst{3-0} = opt; } -// Secure Monitor Call is a system instruction -- for disassembly only +// Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", - [/* For disassembly only; pattern left blank */]> { +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; let Inst{15-12} = 0b1000; @@ -3180,32 +3339,30 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", let Inst{19-16} = opt; } -class T2SRS<bits<12> op31_20, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { - let Inst{31-20} = op31_20{11-0}; - bits<5> mode; + let Inst{31-25} = 0b1110100; + let Inst{24-23} = Op; + let Inst{22} = 0; + let Inst{21} = W; + let Inst{20-16} = 0b01101; + let Inst{15-5} = 0b11000000000; let Inst{4-0} = mode{4-0}; } -// Store Return State is a system instruction -- for disassembly only -def t2SRSDBW : T2SRS<0b111010000010, - (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSDB : T2SRS<0b111010000000, - (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSIAW : T2SRS<0b111010011010, - (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]>; -def t2SRSIA : T2SRS<0b111010011000, - (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", - [/* For disassembly only; pattern left blank */]>; - -// Return From Exception is a system instruction -- for disassembly only +// Store Return State is a system instruction. +def t2SRSDB_UPD : T2SRS<0b00, 1, (outs), (ins imm0_31:$mode), NoItinerary, + "srsdb", "\tsp!, $mode", []>; +def t2SRSDB : T2SRS<0b00, 0, (outs), (ins imm0_31:$mode), NoItinerary, + "srsdb","\tsp, $mode", []>; +def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary, + "srsia","\tsp!, $mode", []>; +def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary, + "srsia","\tsp, $mode", []>; +// Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { @@ -3277,53 +3434,186 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), imm:$cp))]>, Requires<[IsThumb2]>; +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), + NoItinerary, []>, Requires<[IsThumb2]>; +} + +//===----------------------------------------------------------------------===// +// Coprocessor load/store -- for disassembly only +// +class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm> + : T2I<oops, iops, NoItinerary, opc, asm, []> { + let Inst{31-28} = op31_28; + let Inst{27-25} = 0b110; +} + +multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { + def _OFFSET : T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _PRE : T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr!"> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _POST: T2CI<op31_28, + (outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset"> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _OPTION : T2CI<op31_28, (outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option"> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; + } +} + +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; + + //===----------------------------------------------------------------------===// // Move between special register and ARM core register -- for disassembly only // +// Move to ARM core register from Special Register -class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { - let Inst{31-20} = op31_20{11-0}; - let Inst{15-14} = op15_14{1-0}; - let Inst{12} = op12{0}; +// A/R class MRS. +// +// A/R class can only move from CPSR or SPSR. +def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>, + Requires<[IsThumb2,IsARClass]> { + bits<4> Rd; + let Inst{31-12} = 0b11110011111011111000; + let Inst{11-8} = Rd; + let Inst{7-0} = 0b0000; } -class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> { +def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; + +def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>, + Requires<[IsThumb2,IsARClass]> { bits<4> Rd; + let Inst{31-12} = 0b11110011111111111000; + let Inst{11-8} = Rd; + let Inst{7-0} = 0b0000; +} + +// M class MRS. +// +// This MRS has a mask field in bits 7-0 and can take more values than +// the A/R class (a full msr_mask). +def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, + "mrs", "\t$Rd, $mask", []>, + Requires<[IsThumb2,IsMClass]> { + bits<4> Rd; + bits<8> mask; + let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; let Inst{19-16} = 0b1111; + let Inst{7-0} = mask; } -def t2MRS : T2MRS<0b111100111110, 0b10, 0, - (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", - [/* For disassembly only; pattern left blank */]>; -def t2MRSsys : T2MRS<0b111100111111, 0b10, 0, - (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", - [/* For disassembly only; pattern left blank */]>; // Move from ARM core register to Special Register // +// A/R class MSR. +// // No need to have both system and application versions, the encodings are the // same and the assembly parser has no way to distinguish between them. The mask // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. -def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, - 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn), - NoItinerary, "msr", "\t$mask, $Rn", - [/* For disassembly only; pattern left blank */]> { +def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), + NoItinerary, "msr", "\t$mask, $Rn", []>, + Requires<[IsThumb2,IsARClass]> { bits<5> mask; bits<4> Rn; - let Inst{19-16} = Rn; + let Inst{31-21} = 0b11110011100; let Inst{20} = mask{4}; // R Bit - let Inst{13} = 0b0; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; let Inst{11-8} = mask{3-0}; + let Inst{7-0} = 0; } +// M class MSR. +// +// Move from ARM core register to Special Register +def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), + NoItinerary, "msr", "\t$SYSm, $Rn", []>, + Requires<[IsThumb2,IsMClass]> { + bits<8> SYSm; + bits<4> Rn; + let Inst{31-21} = 0b11110011100; + let Inst{20} = 0b0; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; + let Inst{7-0} = SYSm; +} + + //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register // @@ -3389,13 +3679,12 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, - (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - []>; + (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), []>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, - (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), []>; + (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), []>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3465,3 +3754,269 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{19-16} = CRn; let Inst{23-20} = opc1; } + + + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// SXT/UXT with no rotate +let AddedComplexity = 16 in { +def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +} + +def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>, + Requires<[IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + +// Atomic load/store patterns +def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr), + (t2LDRBi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_8 t2addrmode_negimm8:$addr), + (t2LDRBi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_8 t2addrmode_so_reg:$addr), + (t2LDRBs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_imm12:$addr), + (t2LDRHi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_negimm8:$addr), + (t2LDRHi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_16 t2addrmode_so_reg:$addr), + (t2LDRHs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_imm12:$addr), + (t2LDRi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_negimm8:$addr), + (t2LDRi8 t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_load_32 t2addrmode_so_reg:$addr), + (t2LDRs t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_imm12:$addr, GPR:$val), + (t2STRBi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRBi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_8 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRBs GPR:$val, t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_imm12:$addr, GPR:$val), + (t2STRHi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRHi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_16 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRHs GPR:$val, t2addrmode_so_reg:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_imm12:$addr, GPR:$val), + (t2STRi12 GPR:$val, t2addrmode_imm12:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val), + (t2STRi8 GPR:$val, t2addrmode_negimm8:$addr)>; +def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val), + (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>; + + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +// Aliases for ADC without the ".w" optional width specifier. +def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm", + (t2ADCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm", + (t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for SBC without the ".w" optional width specifier. +def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm", + (t2SBCrr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", + (t2SBCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for ADD without the ".w" optional width specifier. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", + (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", + (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Aliases for SUB without the ".w" optional width specifier. +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", + (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", + (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// Alias for compares without the ".w" optional width specifier. +def : t2InstAlias<"cmn${p} $Rn, $Rm", + (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"teq${p} $Rn, $Rm", + (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"tst${p} $Rn, $Rm", + (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; + +// Memory barriers +def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>; + +// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional +// width specifier. +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; + +// Alias for MVN without the ".w" optional width specifier. +def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", + (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", + (t2MVNs rGPR:$Rd, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; + +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT when the +// shift amount is zero (i.e., unspecified). +def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", + (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", + (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + +// PUSH/POP aliases for STM/LDM +def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; + +// Alias for REV/REV16/REVSH without the ".w" optional width specifier. +def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>; +def : t2InstAlias<"revsh${p} $Rd, $Rm", (t2REVSH rGPR:$Rd, rGPR:$Rm, pred:$p)>; + + +// Alias for RSB without the ".w" optional width specifier, and with optional +// implied destination register. +def : t2InstAlias<"rsb${s}${p} $Rd, $Rn, $imm", + (t2RSBri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $imm", + (t2RSBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $Rm", + (t2RSBrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"rsb${s}${p} $Rdn, $ShiftedRm", + (t2RSBrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, + cc_out:$s)>; + +// SSAT/USAT optional shift operand. +def : t2InstAlias<"ssat${p} $Rd, $sat_imm, $Rn", + (t2SSAT rGPR:$Rd, imm1_32:$sat_imm, rGPR:$Rn, 0, pred:$p)>; +def : t2InstAlias<"usat${p} $Rd, $sat_imm, $Rn", + (t2USAT rGPR:$Rd, imm0_31:$sat_imm, rGPR:$Rn, 0, pred:$p)>; + +// STM w/o the .w suffix. +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; + +// Alias for STR, STRB, and STRH without the ".w" optional +// width specifier. +def : t2InstAlias<"str${p} $Rt, $addr", + (t2STRi12 GPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"strb${p} $Rt, $addr", + (t2STRBi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; +def : t2InstAlias<"strh${p} $Rt, $addr", + (t2STRHi12 rGPR:$Rt, t2addrmode_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"str${p} $Rt, $addr", + (t2STRs GPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"strb${p} $Rt, $addr", + (t2STRBs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"strh${p} $Rt, $addr", + (t2STRHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; + +// Extend instruction optional rotate operand. +def : t2InstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"sxtb${p} $Rd, $Rm", + (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtb16${p} $Rd, $Rm", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxth${p} $Rd, $Rm", + (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxtb${p}.w $Rd, $Rm", + (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"sxth${p}.w $Rd, $Rm", + (t2SXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtb${p} $Rd, $Rm", + (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxtb16${p} $Rd, $Rm", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxth${p} $Rd, $Rm", + (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +def : t2InstAlias<"uxtb${p}.w $Rd, $Rm", + (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; +def : t2InstAlias<"uxth${p}.w $Rd, $Rm", + (t2UXTH rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; + +// Extend instruction w/o the ".w" optional width specifier. +def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", + (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"uxtb16${p} $Rd, $Rm$rot", + (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", + (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; + +def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", + (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot", + (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; +def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", + (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index f1f3cb9..e746cf2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -31,18 +31,34 @@ def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; // Operand Definitions. // +// 8-bit floating-point immediate encodings. +def FPImmOperand : AsmOperandClass { + let Name = "FPImm"; + let ParserMethod = "parseFPImm"; +} + def vfp_f32imm : Operand<f32>, PatLeaf<(f32 fpimm), [{ - return ARM::getVFPf32Imm(N->getValueAPF()) != -1; - }]> { - let PrintMethod = "printVFPf32ImmOperand"; + return ARM_AM::getFP32Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP32Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; } def vfp_f64imm : Operand<f64>, PatLeaf<(f64 fpimm), [{ - return ARM::getVFPf64Imm(N->getValueAPF()) != -1; - }]> { - let PrintMethod = "printVFPf64ImmOperand"; + return ARM_AM::getFP64Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP64Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; } @@ -385,26 +401,26 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. -def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", +def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", +def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", +def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", +def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, @@ -511,14 +527,25 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, } def VMOVRRS : AVConv3I<0b11000101, 0b1010, - (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), - IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2), + IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { + bits<5> src1; + bits<4> Rt; + bits<4> Rt2; + + // Encode instruction operands. + let Inst{3-0} = src1{3-0}; + let Inst{5} = src1{4}; + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{7-6} = 0b00; // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + let DecoderMethod = "DecodeVMOVRRS"; } } // neverHasSideEffects @@ -552,11 +579,24 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { + // Instruction operands. + bits<5> dst1; + bits<4> src1; + bits<4> src2; + + // Encode instruction operands. + let Inst{3-0} = dst1{3-0}; + let Inst{5} = dst1{4}; + let Inst{15-12} = src1; + let Inst{19-16} = src2; + let Inst{7-6} = 0b00; // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + let DecoderMethod = "DecodeVMOVSRR"; } // FMRDH: SPR -> GPR @@ -1084,45 +1124,42 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$Dd, $imm", [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { - // Instruction operands. - bits<5> Dd; - bits<32> imm; - - // Encode instruction operands. - let Inst{15-12} = Dd{3-0}; - let Inst{22} = Dd{4}; - let Inst{19} = imm{31}; - let Inst{18-16} = imm{22-20}; - let Inst{3-0} = imm{19-16}; + bits<5> Dd; + bits<8> imm; - // Encode remaining instruction bits. let Inst{27-23} = 0b11101; + let Inst{22} = Dd{4}; let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Dd{3-0}; let Inst{11-9} = 0b101; let Inst{8} = 1; // Double precision. let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; } def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_fpUNA32, "vmov", ".f32\t$Sd, $imm", [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { - // Instruction operands. - bits<5> Sd; - bits<32> imm; - - // Encode instruction operands. - let Inst{15-12} = Sd{4-1}; - let Inst{22} = Sd{0}; - let Inst{19} = imm{31}; // The immediate is handled as a double. - let Inst{18-16} = imm{22-20}; - let Inst{3-0} = imm{19-16}; + bits<5> Sd; + bits<8> imm; - // Encode remaining instruction bits. let Inst{27-23} = 0b11101; + let Inst{22} = Sd{0}; let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Sd{4-1}; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision. let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; } } + +//===----------------------------------------------------------------------===// +// Assembler aliases. +// + +def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; + diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c6efea1..faa8ba7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -763,9 +764,9 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode) { switch (Opc) { case ARM::LDRi12: - return ARM::LDR_PRE; + return ARM::LDR_PRE_IMM; case ARM::STRi12: - return ARM::STR_PRE; + return ARM::STR_PRE_IMM; case ARM::VLDRS: return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; case ARM::VLDRD: @@ -789,9 +790,9 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, ARM_AM::AddrOpc Mode) { switch (Opc) { case ARM::LDRi12: - return ARM::LDR_POST; + return ARM::LDR_POST_IMM; case ARM::STRi12: - return ARM::STR_POST; + return ARM::STR_POST_IMM; case ARM::VLDRS: return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; case ARM::VLDRD: @@ -892,12 +893,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - unsigned Offset = 0; - if (isAM2) - Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - else if (!isAM5) - Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; - if (isAM5) { // VLDM[SD}_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the @@ -911,28 +906,44 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, .addReg(MO.getReg(), (isLd ? getDefRegState(true) : getKillRegState(MO.isKill()))); } else if (isLd) { - if (isAM2) - // LDR_PRE, LDR_POST, - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); - else + if (isAM2) { + // LDR_PRE, LDR_POST + if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } else { + int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + } + } else { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2LDR_PRE, t2LDR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } } else { MachineOperand &MO = MI->getOperand(0); - if (isAM2) + // FIXME: post-indexed stores use am2offset_imm, which still encodes + // the vestigal zero-reg offset register. When that's fixed, this clause + // can be removed entirely. + if (isAM2 && NewOpc == ARM::STR_POST_IMM) { + int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); - else + } else { + int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } } MBB.erase(MBBI); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 7411b59..daa126d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -14,7 +14,7 @@ #include "ARM.h" #include "ARMAsmPrinter.h" -#include "ARMMCExpr.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/Constants.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/MC/MCExpr.h" diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index 76eb496..036822d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -182,8 +182,10 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; -def FPSCR : ARMReg<1, "fpscr">; -def ITSTATE : ARMReg<2, "itstate">; +def APSR : ARMReg<1, "apsr">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. def FPSID : ARMReg<0, "fpsid">; @@ -213,6 +215,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), }]; } +// GPRs without the PC. Some ARM instructions do not allow the PC in +// certain operand slots, particularly as the destination. Primarily +// useful for disassembly. +def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + +// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the +// implied SP argument list. +// FIXME: It would be better to not use this at all and refactor the +// instructions to not have SP an an explicit argument. That makes +// frame index resolution a bit trickier, though. +def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>; + // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC // or SP (R13 or R15) are used. The ARM ISA refers to these operands @@ -328,5 +347,6 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { + let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index ef0aaf2..a3a3d58 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -138,13 +138,12 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // Adjust parameters for memset, EABI uses format (ptr, size, value), // GNU library uses (ptr, value, size) // See RTABI section 4.3.4 -SDValue -ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const -{ +SDValue ARMSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { // Use default for non AAPCS subtargets if (!Subtarget->isAAPCS_ABI()) return SDValue(); @@ -155,7 +154,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index ec1bf5c..6419a73 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -14,10 +14,27 @@ #ifndef ARMSELECTIONDAGINFO_H #define ARMSELECTIONDAGINFO_H +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { +namespace ARM_AM { + static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { + switch (Opcode) { + default: return ARM_AM::no_shift; + case ISD::SHL: return ARM_AM::lsl; + case ISD::SRL: return ARM_AM::lsr; + case ISD::SRA: return ARM_AM::asr; + case ISD::ROTR: return ARM_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ARMISD::RRX: return ARM_AM::rrx; + } + } +} // end namespace ARM_AM + class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 1cab9e4..247d6be 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -53,11 +53,14 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVMLxForwarding(false) , SlowFPBrcc(false) , InThumbMode(false) + , InNaClMode(false) , HasThumb2(false) + , IsMClass(false) , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , UseMovt(false) + , SupportsTailCall(false) , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) @@ -111,6 +114,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); + const Triple &T = getTargetTriple(); + SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index c650872..b63e108 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -70,9 +70,16 @@ protected: /// InThumbMode - True if compiling for Thumb, false for ARM. bool InThumbMode; + /// InNaClMode - True if targeting Native Client + bool InNaClMode; + /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; + /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - + /// v6m, v7m for example. + bool IsMClass; + /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; @@ -86,6 +93,11 @@ protected: /// imms (including global addresses). bool UseMovt; + /// SupportsTailCall - True if the OS supports tail call. The dynamic linker + /// must be able to synthesize call stubs for interworking between ARM and + /// Thumb. + bool SupportsTailCall; + /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF /// only so far) bool HasFP16; @@ -209,6 +221,9 @@ protected: const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } + bool isTargetNaCl() const { + return TargetTriple.getOS() == Triple::NativeClient; + } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } @@ -218,10 +233,13 @@ protected: bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } + bool isMClass() const { return IsMClass; } + bool isARClass() const { return !IsMClass; } bool isR9Reserved() const { return IsR9Reserved; } bool useMovt() const { return UseMovt && hasV6T2Ops(); } + bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return AllowsUnalignedMem; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index f0b176a..96b1e89 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -15,77 +15,50 @@ #include "ARM.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - - if (TheTriple.isOSWindows()) { - llvm_unreachable("ARM does not support Windows COFF format"); - return NULL; - } - - return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); -} +static cl::opt<bool> +EnableGlobalMerge("global-merge", cl::Hidden, + cl::desc("Enable global merge pass"), + cl::init(true)); extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); - - // Register the MC Code Emitter - TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend); - TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer); - TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer); - } /// TargetMachine ctor - Create an ARM architecture model. /// -ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { - DefRelocModel = getRelocationModel(); - // Default to soft float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Soft; } -ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget), +ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" - "v128:32:128-v64:32:64-n32") : + "v128:32:128-v64:32:64-n32-S32") : + Subtarget.isAAPCS_ABI() ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "v128:64:128-v64:64:64-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32")), + "v128:64:128-v64:64:64-n32-S32")), ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), @@ -95,20 +68,24 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, "support ARM mode execution!"); } -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : ARMBaseTargetMachine(T, TT, CPU, FS), +ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "i16:16:32-i8:8:32-i1:8:32-" - "v128:32:128-v64:32:64-a:0:32-n32") : + "v128:32:128-v64:32:64-a:0:32-n32-S32") : + Subtarget.isAAPCS_ABI() ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "i16:16:32-i8:8:32-i1:8:32-" + "v128:64:128-v64:64:64-a:0:32-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32")), + "v128:64:128-v64:64:64-a:0:32-n32-S32")), ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), @@ -117,10 +94,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -// Pass Pipeline Configuration bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) PM.add(createARMGlobalMergePass(getTargetLowering())); return false; @@ -139,7 +115,6 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, PM.add(createARMLoadStoreOptimizationPass(true)); if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); - return true; } @@ -150,7 +125,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, if (!Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); if (Subtarget.hasNEON()) - PM.add(createNEONMoveFixPass()); + PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow @@ -179,10 +154,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // FIXME: Move this to TargetJITInfo! - if (DefRelocModel == Reloc::Default) - setRelocationModel(Reloc::Static); - // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index bc3d46a..c8c601c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -37,11 +37,11 @@ protected: private: ARMJITInfo JITInfo; InstrItineraryData InstrItins; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. public: - ARMBaseTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ARMBaseTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -69,8 +69,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; public: - ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ARMTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -108,8 +109,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; public: - ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + ThumbTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index d9a5fa2..14d35ba 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -7,16 +7,15 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -30,7 +29,7 @@ using namespace llvm; namespace { -class ARMBaseAsmLexer : public TargetAsmLexer { +class ARMBaseAsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; const AsmToken &lexDefinite() { @@ -43,7 +42,7 @@ protected: rmap_ty RegisterMap; - void InitRegisterMap(const TargetRegisterInfo *info) { + void InitRegisterMap(const MCRegisterInfo *info) { unsigned numRegs = info->getNumRegs(); for (unsigned i = 0; i < numRegs; ++i) { @@ -77,33 +76,23 @@ protected: } public: ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : MCTargetAsmLexer(T), AsmInfo(MAI) { } }; class ARMAsmLexer : public ARMBaseAsmLexer { public: - ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("arm-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; class ThumbAsmLexer : public ARMBaseAsmLexer { public: - ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI) : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("thumb-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; @@ -149,6 +138,6 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() { } extern "C" void LLVMInitializeARMAsmLexer() { - RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); - RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); + RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget); + RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); } diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a474127..24f15b4 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,11 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMMCExpr.h" -#include "ARMBaseRegisterInfo.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -20,12 +18,17 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -37,49 +40,65 @@ namespace { class ARMOperand; -class ARMAsmParser : public TargetAsmParser { +class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + struct { + ARMCC::CondCodes Cond; // Condition for IT block. + unsigned Mask:4; // Condition mask for instructions. + // Starting at first 1 (from lsb). + // '1' condition as indicated in IT. + // '0' inverse of condition (else). + // Count of instructions in IT block is + // 4 - trailingzeroes(mask) + + bool FirstCond; // Explicit flag for when we're parsing the + // First instruction in the IT block. It's + // implied in the mask, so needs special + // handling. + + unsigned CurPosition; // Current position in parsing of IT + // block. In range [0,3]. Initialized + // according to count of instructions in block. + // ~0U if no active IT block. + } ITState; + bool inITBlock() { return ITState.CurPosition != ~0U;} + void forwardITPosition() { + if (!inITBlock()) return; + // Move to the next instruction in the IT block, if there is one. If not, + // mark the block as done. + unsigned TZ = CountTrailingZeros_32(ITState.Mask); + if (++ITState.CurPosition == 5 - TZ) + ITState.CurPosition = ~0U; // Done with the IT block after this. + } + + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - int TryParseRegister(); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); - int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &, - ARMII::AddrMode AddrMode); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); - bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); - const MCExpr *ApplyPrefixToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind Variant); - - - bool ParseMemoryOffsetReg(bool &Negative, - bool &OffsetRegShifted, - enum ARM_AM::ShiftOpc &ShiftType, - const MCExpr *&ShiftAmount, - const MCExpr *&Offset, - bool &OffsetIsReg, - int &OffsetRegNum, - SMLoc &E); - bool ParseShift(enum ARM_AM::ShiftOpc &St, - const MCExpr *&ShiftAmount, SMLoc &E); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool ParseDirectiveThumb(SMLoc L); - bool ParseDirectiveThumbFunc(SMLoc L); - bool ParseDirectiveCode(SMLoc L); - bool ParseDirectiveSyntax(SMLoc L); - - bool MatchAndEmitInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); - void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + int tryParseRegister(); + bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); + int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); + bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); + bool parsePrefix(ARMMCExpr::VariantKind &RefKind); + bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, + unsigned &ShiftAmount); + bool parseDirectiveWord(unsigned Size, SMLoc L); + bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveThumbFunc(SMLoc L); + bool parseDirectiveCode(SMLoc L); + bool parseDirectiveSyntax(SMLoc L); + + StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, + bool &CarrySetting, unsigned &ProcessorIMod, + StringRef &ITMask); + void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); bool isThumb() const { @@ -89,10 +108,22 @@ class ARMAsmParser : public TargetAsmParser { bool isThumbOne() const { return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0; } + bool isThumbTwo() const { + return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2); + } + bool hasV6Ops() const { + return STI.getFeatureBits() & ARM::HasV6Ops; + } + bool hasV7Ops() const { + return STI.getFeatureBits() & ARM::HasV7Ops; + } void SwitchMode() { unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } + bool isMClass() const { + return STI.getFeatureBits() & ARM::FeatureMClass; + } /// @name Auto-generated Match Functions /// { @@ -102,43 +133,108 @@ class ARMAsmParser : public TargetAsmParser { /// } - OperandMatchResultTy tryParseCoprocNumOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseCoprocRegOperand( + OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseCoprocNumOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemBarrierOptOperand( + OperandMatchResultTy parseCoprocRegOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseProcIFlagsOperand( + OperandMatchResultTy parseCoprocOptionOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMSRMaskOperand( + OperandMatchResultTy parseMemBarrierOptOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemMode2Operand( + OperandMatchResultTy parseProcIFlagsOperand( SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy tryParseMemMode3Operand( + OperandMatchResultTy parseMSRMaskOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O, + StringRef Op, int Low, int High); + OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + return parsePKHImm(O, "lsl", 0, 31); + } + OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + return parsePKHImm(O, "asr", 1, 32); + } + OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); // Asm Match Converter Methods - bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtStrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + + bool validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + void processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + bool shouldOmitCCOutOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); public: + enum ARMMatchResultTy { + Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresNotITBlock, + Match_RequiresV6, + Match_RequiresThumb2 + }; + ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : TargetAsmParser(), STI(_STI), Parser(_Parser) { + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + + // Not in an ITBlock to start with. + ITState.CurPosition = ~0U; } - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - virtual bool ParseDirective(AsmToken DirectiveID); + // Implementation of the MCTargetAsmParser interface: + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseDirective(AsmToken DirectiveID); + + unsigned checkTargetMatchPredicate(MCInst &Inst); + + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); }; } // end anonymous namespace @@ -148,22 +244,30 @@ namespace { /// instruction. class ARMOperand : public MCParsedAsmOperand { enum KindTy { - CondCode, - CCOut, - CoprocNum, - CoprocReg, - Immediate, - MemBarrierOpt, - Memory, - MSRMask, - ProcIFlags, - Register, - RegisterList, - DPRRegisterList, - SPRRegisterList, - ShiftedRegister, - Shifter, - Token + k_CondCode, + k_CCOut, + k_ITCondMask, + k_CoprocNum, + k_CoprocReg, + k_CoprocOption, + k_Immediate, + k_FPImmediate, + k_MemBarrierOpt, + k_Memory, + k_PostIndexRegister, + k_MSRMask, + k_ProcIFlags, + k_VectorIndex, + k_Register, + k_RegisterList, + k_DPRRegisterList, + k_SPRRegisterList, + k_ShiftedRegister, + k_ShiftedImmediate, + k_ShifterImmediate, + k_RotateImmediate, + k_BitfieldDescriptor, + k_Token } Kind; SMLoc StartLoc, EndLoc; @@ -175,12 +279,20 @@ class ARMOperand : public MCParsedAsmOperand { } CC; struct { - ARM_MB::MemBOpt Val; - } MBOpt; + unsigned Val; + } Cop; struct { unsigned Val; - } Cop; + } CoprocOption; + + struct { + unsigned Mask:4; + } ITMask; + + struct { + ARM_MB::MemBOpt Val; + } MBOpt; struct { ARM_PROC::IFlags Val; @@ -200,37 +312,60 @@ class ARMOperand : public MCParsedAsmOperand { } Reg; struct { + unsigned Val; + } VectorIndex; + + struct { const MCExpr *Val; } Imm; + struct { + unsigned Val; // encoded 8-bit representation + } FPImm; + /// Combined record for all forms of ARM address expressions. struct { - ARMII::AddrMode AddrMode; unsigned BaseRegNum; - union { - unsigned RegNum; ///< Offset register num, when OffsetIsReg. - const MCExpr *Value; ///< Offset value, when !OffsetIsReg. - } Offset; - const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true - unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true - unsigned Preindexed : 1; - unsigned Postindexed : 1; - unsigned OffsetIsReg : 1; - unsigned Negative : 1; // only used when OffsetIsReg is true - unsigned Writeback : 1; - } Mem; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + } Memory; struct { + unsigned RegNum; + bool isAdd; ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + } PostIdxReg; + + struct { + bool isASR; unsigned Imm; - } Shift; + } ShifterImm; struct { ARM_AM::ShiftOpc ShiftTy; unsigned SrcReg; unsigned ShiftReg; unsigned ShiftImm; - } ShiftedReg; + } RegShiftedReg; + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + } RegShiftedImm; + struct { + unsigned Imm; + } RotImm; + struct { + unsigned LSB; + unsigned Width; + } Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -240,45 +375,69 @@ public: StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { - case CondCode: + case k_CondCode: CC = o.CC; break; - case Token: + case k_ITCondMask: + ITMask = o.ITMask; + break; + case k_Token: Tok = o.Tok; break; - case CCOut: - case Register: + case k_CCOut: + case k_Register: Reg = o.Reg; break; - case RegisterList: - case DPRRegisterList: - case SPRRegisterList: + case k_RegisterList: + case k_DPRRegisterList: + case k_SPRRegisterList: Registers = o.Registers; break; - case CoprocNum: - case CoprocReg: + case k_CoprocNum: + case k_CoprocReg: Cop = o.Cop; break; - case Immediate: + case k_CoprocOption: + CoprocOption = o.CoprocOption; + break; + case k_Immediate: Imm = o.Imm; break; - case MemBarrierOpt: + case k_FPImmediate: + FPImm = o.FPImm; + break; + case k_MemBarrierOpt: MBOpt = o.MBOpt; break; - case Memory: - Mem = o.Mem; + case k_Memory: + Memory = o.Memory; + break; + case k_PostIndexRegister: + PostIdxReg = o.PostIdxReg; break; - case MSRMask: + case k_MSRMask: MMask = o.MMask; break; - case ProcIFlags: + case k_ProcIFlags: IFlags = o.IFlags; break; - case Shifter: - Shift = o.Shift; + case k_ShifterImmediate: + ShifterImm = o.ShifterImm; + break; + case k_ShiftedRegister: + RegShiftedReg = o.RegShiftedReg; + break; + case k_ShiftedImmediate: + RegShiftedImm = o.RegShiftedImm; break; - case ShiftedRegister: - ShiftedReg = o.ShiftedReg; + case k_RotateImmediate: + RotImm = o.RotImm; + break; + case k_BitfieldDescriptor: + Bitfield = o.Bitfield; + break; + case k_VectorIndex: + VectorIndex = o.VectorIndex; break; } } @@ -289,94 +448,96 @@ public: SMLoc getEndLoc() const { return EndLoc; } ARMCC::CondCodes getCondCode() const { - assert(Kind == CondCode && "Invalid access!"); + assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; } unsigned getCoproc() const { - assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!"); + assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!"); return Cop.Val; } StringRef getToken() const { - assert(Kind == Token && "Invalid access!"); + assert(Kind == k_Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } unsigned getReg() const { - assert((Kind == Register || Kind == CCOut) && "Invalid access!"); + assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!"); return Reg.RegNum; } const SmallVectorImpl<unsigned> &getRegList() const { - assert((Kind == RegisterList || Kind == DPRRegisterList || - Kind == SPRRegisterList) && "Invalid access!"); + assert((Kind == k_RegisterList || Kind == k_DPRRegisterList || + Kind == k_SPRRegisterList) && "Invalid access!"); return Registers; } const MCExpr *getImm() const { - assert(Kind == Immediate && "Invalid access!"); + assert(Kind == k_Immediate && "Invalid access!"); return Imm.Val; } + unsigned getFPImm() const { + assert(Kind == k_FPImmediate && "Invalid access!"); + return FPImm.Val; + } + + unsigned getVectorIndex() const { + assert(Kind == k_VectorIndex && "Invalid access!"); + return VectorIndex.Val; + } + ARM_MB::MemBOpt getMemBarrierOpt() const { - assert(Kind == MemBarrierOpt && "Invalid access!"); + assert(Kind == k_MemBarrierOpt && "Invalid access!"); return MBOpt.Val; } ARM_PROC::IFlags getProcIFlags() const { - assert(Kind == ProcIFlags && "Invalid access!"); + assert(Kind == k_ProcIFlags && "Invalid access!"); return IFlags.Val; } unsigned getMSRMask() const { - assert(Kind == MSRMask && "Invalid access!"); + assert(Kind == k_MSRMask && "Invalid access!"); return MMask.Val; } - /// @name Memory Operand Accessors - /// @{ - ARMII::AddrMode getMemAddrMode() const { - return Mem.AddrMode; - } - unsigned getMemBaseRegNum() const { - return Mem.BaseRegNum; - } - unsigned getMemOffsetRegNum() const { - assert(Mem.OffsetIsReg && "Invalid access!"); - return Mem.Offset.RegNum; - } - const MCExpr *getMemOffset() const { - assert(!Mem.OffsetIsReg && "Invalid access!"); - return Mem.Offset.Value; - } - unsigned getMemOffsetRegShifted() const { - assert(Mem.OffsetIsReg && "Invalid access!"); - return Mem.OffsetRegShifted; + bool isCoprocNum() const { return Kind == k_CoprocNum; } + bool isCoprocReg() const { return Kind == k_CoprocReg; } + bool isCoprocOption() const { return Kind == k_CoprocOption; } + bool isCondCode() const { return Kind == k_CondCode; } + bool isCCOut() const { return Kind == k_CCOut; } + bool isITMask() const { return Kind == k_ITCondMask; } + bool isITCondCode() const { return Kind == k_CondCode; } + bool isImm() const { return Kind == k_Immediate; } + bool isFPImm() const { return Kind == k_FPImmediate; } + bool isImm8s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } - const MCExpr *getMemShiftAmount() const { - assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); - return Mem.ShiftAmount; + bool isImm0_1020s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; } - enum ARM_AM::ShiftOpc getMemShiftType() const { - assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); - return Mem.ShiftType; + bool isImm0_508s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } - bool getMemPreindexed() const { return Mem.Preindexed; } - bool getMemPostindexed() const { return Mem.Postindexed; } - bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; } - bool getMemNegative() const { return Mem.Negative; } - bool getMemWriteback() const { return Mem.Writeback; } - - /// @} - - bool isCoprocNum() const { return Kind == CoprocNum; } - bool isCoprocReg() const { return Kind == CoprocReg; } - bool isCondCode() const { return Kind == CondCode; } - bool isCCOut() const { return Kind == CCOut; } - bool isImm() const { return Kind == Immediate; } bool isImm0_255() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -384,7 +545,7 @@ public: return Value >= 0 && Value < 256; } bool isImm0_7() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -392,130 +553,365 @@ public: return Value >= 0 && Value < 8; } bool isImm0_15() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 16; } + bool isImm0_31() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 32; + } + bool isImm1_16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 17; + } + bool isImm1_32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 33; + } bool isImm0_65535() const { - if (Kind != Immediate) + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } - bool isT2SOImm() const { - if (Kind != Immediate) + bool isImm0_65535Expr() const { + if (Kind != k_Immediate) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; + // If it's not a constant expression, it'll generate a fixup and be + // handled later. + if (!CE) return true; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(Value) != -1; + return Value >= 0 && Value < 65536; } - bool isReg() const { return Kind == Register; } - bool isRegList() const { return Kind == RegisterList; } - bool isDPRRegList() const { return Kind == DPRRegisterList; } - bool isSPRRegList() const { return Kind == SPRRegisterList; } - bool isToken() const { return Kind == Token; } - bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } - bool isMemory() const { return Kind == Memory; } - bool isShifter() const { return Kind == Shifter; } - bool isShiftedReg() const { return Kind == ShiftedRegister; } - bool isMemMode2() const { - if (getMemAddrMode() != ARMII::AddrMode2) + bool isImm24bit() const { + if (Kind != k_Immediate) return false; - - if (getMemOffsetIsReg()) - return true; - - if (getMemNegative() && - !(getMemPostindexed() || getMemPreindexed())) + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value <= 0xffffff; + } + bool isImmThumbSR() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - - // The offset must be in the range 0-4095 (imm12). - if (Value > 4095 || Value < -4095) + return Value > 0 && Value < 33; + } + bool isPKHLSLImm() const { + if (Kind != k_Immediate) return false; - - return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 32; } - bool isMemMode3() const { - if (getMemAddrMode() != ARMII::AddrMode3) + bool isPKHASRImm() const { + if (Kind != k_Immediate) return false; - - if (getMemOffsetIsReg()) { - if (getMemOffsetRegShifted()) - return false; // No shift with offset reg allowed - return true; - } - - if (getMemNegative() && - !(getMemPostindexed() || getMemPreindexed())) + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isARMSOImm() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - - // The offset must be in the range 0-255 (imm8). - if (Value > 255 || Value < -255) + return ARM_AM::getSOImmVal(Value) != -1; + } + bool isT2SOImm() const { + if (Kind != k_Immediate) return false; - - return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(Value) != -1; } - bool isMemMode5() const { - if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || - getMemNegative()) + bool isSetEndImm() const { + if (Kind != k_Immediate) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; - - // The offset must be a multiple of 4 in the range 0-1020. int64_t Value = CE->getValue(); - return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); - } - bool isMemMode7() const { - if (!isMemory() || - getMemPreindexed() || - getMemPostindexed() || - getMemOffsetIsReg() || - getMemNegative() || - getMemWriteback()) + return Value == 1 || Value == 0; + } + bool isReg() const { return Kind == k_Register; } + bool isRegList() const { return Kind == k_RegisterList; } + bool isDPRRegList() const { return Kind == k_DPRRegisterList; } + bool isSPRRegList() const { return Kind == k_SPRRegisterList; } + bool isToken() const { return Kind == k_Token; } + bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } + bool isMemory() const { return Kind == k_Memory; } + bool isShifterImm() const { return Kind == k_ShifterImmediate; } + bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } + bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } + bool isRotImm() const { return Kind == k_RotateImmediate; } + bool isBitfield() const { return Kind == k_BitfieldDescriptor; } + bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } + bool isPostIdxReg() const { + return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift; + } + bool isMemNoOffset(bool alignOK = false) const { + if (!isMemory()) return false; - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + // No offset of any kind. + return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && + (alignOK || Memory.Alignment == 0); + } + bool isAlignedMemory() const { + return isMemNoOffset(true); + } + bool isAddrMode2() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return true; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -4096 && Val < 4096; + } + bool isAM2OffsetImm() const { + if (Kind != k_Immediate) + return false; + // Immediate offset in range [-4095, 4095]. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; - - if (CE->getValue()) + int64_t Val = CE->getValue(); + return Val > -4096 && Val < 4096; + } + bool isAddrMode3() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // No shifts are legal for AM3. + if (Memory.ShiftType != ARM_AM::no_shift) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return true; + // Immediate offset in range [-255, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -256 && Val < 256; + } + bool isAM3Offset() const { + if (Kind != k_Immediate && Kind != k_PostIndexRegister) + return false; + if (Kind == k_PostIndexRegister) + return PostIdxReg.ShiftTy == ARM_AM::no_shift; + // Immediate offset in range [-255, 255]. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Val = CE->getValue(); + // Special case, #-0 is INT32_MIN. + return (Val > -256 && Val < 256) || Val == INT32_MIN; + } + bool isAddrMode5() const { + if (!isMemory() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return false; + // Immediate offset in range [-1020, 1020] and a multiple of 4. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || + Val == INT32_MIN; + } + bool isMemTBB() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) + return false; + return true; + } + bool isMemTBH() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 || + Memory.Alignment != 0 ) + return false; + return true; + } + bool isMemRegOffset() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0) return false; - return true; } - bool isMemModeRegThumb() const { - if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) + bool isT2MemRegOffset() const { + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.Alignment != 0) + return false; + // Only lsl #{0, 1, 2, 3} allowed. + if (Memory.ShiftType == ARM_AM::no_shift) + return true; + if (Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm > 3) return false; return true; } - bool isMemModeImmThumb() const { - if (!isMemory() || getMemOffsetIsReg() || getMemWriteback()) + bool isMemThumbRR() const { + // Thumb reg+reg addressing is simple. Just two registers, a base and + // an offset. No shifts, negations or any other complicating factors. + if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) + return false; + return isARMLowRegister(Memory.BaseRegNum) && + (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum)); + } + bool isMemThumbRIs4() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 124]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 124 && (Val % 4) == 0; + } + bool isMemThumbRIs2() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 62]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 62 && (Val % 2) == 0; + } + bool isMemThumbRIs1() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) + return false; + // Immediate offset in range [0, 31]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 31; + } + bool isMemThumbSPI() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || + Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0) + return false; + // Immediate offset, multiple of 4 in range [0, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 1020 && (Val % 4) == 0; + } + bool isMemImm8s4Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset a multiple of 4 in range [-1020, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= -1020 && Val <= 1020 && (Val & 3) == 0; + } + bool isMemImm0_1020s4Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset a multiple of 4 in range [0, 1020]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val <= 1020 && (Val & 3) == 0; + } + bool isMemImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-255, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val == INT32_MIN) || (Val > -256 && Val < 256); + } + bool isMemPosImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [0, 255]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val >= 0 && Val < 256; + } + bool isMemNegImm8Offset() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-255, -1]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return Val > -256 && Val < 0; + } + bool isMemUImm12Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + return true; + + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Immediate offset in range [0, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= 0 && Val < 4096); + } + bool isMemImm12Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + } + bool isPostIdxImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Val = CE->getValue(); + return (Val > -256 && Val < 256) || (Val == INT32_MIN); + } + bool isPostIdxImm8s4() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; + int64_t Val = CE->getValue(); + return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) || + (Val == INT32_MIN); + } - // The offset must be a multiple of 4 in the range 0-124. - uint64_t Value = CE->getValue(); - return ((Value & 0x3) == 0 && Value <= 124); + bool isMSRMask() const { return Kind == k_MSRMask; } + bool isProcIFlags() const { return Kind == k_ProcIFlags; } + + bool isVectorIndex8() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 8; + } + bool isVectorIndex16() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 4; } - bool isMSRMask() const { return Kind == MSRMask; } - bool isProcIFlags() const { return Kind == ProcIFlags; } + bool isVectorIndex32() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 2; + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. @@ -544,6 +940,21 @@ public: Inst.addOperand(MCOperand::CreateImm(getCoproc())); } + void addCoprocOptionOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(CoprocOption.Val)); + } + + void addITMaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(ITMask.Mask)); + } + + void addITCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + } + void addCCOutOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -554,22 +965,27 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } - void addShiftedRegOperands(MCInst &Inst, unsigned N) const { + void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!"); - assert((ShiftedReg.ShiftReg == 0 || - ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) && - "Invalid shifted register operand!"); - Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg)); - Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg)); + assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!"); + Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); + Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( - ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm))); + ARM_AM::getSORegOpc(RegShiftedReg.ShiftTy, RegShiftedReg.ShiftImm))); } - void addShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); + Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); Inst.addOperand(MCOperand::CreateImm( - ARM_AM::getSORegOpc(Shift.ShiftTy, 0))); + ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); + } + + void addShifterImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm((ShifterImm.isASR << 5) | + ShifterImm.Imm)); } void addRegListOperands(MCInst &Inst, unsigned N) const { @@ -588,11 +1004,57 @@ public: addRegListOperands(Inst, N); } + void addRotImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Encoded as val>>3. The printer handles display as 8, 16, 24. + Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3)); + } + + void addBitfieldOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Munge the lsb/width into a bitfield mask. + unsigned lsb = Bitfield.LSB; + unsigned width = Bitfield.Width; + // Make a 32-bit mask w/ the referenced bits clear and all other bits set. + uint32_t Mask = ~(((uint32_t)0xffffffff >> lsb) << (32 - width) >> + (32 - (lsb + width))); + Inst.addOperand(MCOperand::CreateImm(Mask)); + } + void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addFPImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getFPImm())); + } + + void addImm8s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // FIXME: We really want to scale the value here, but the LDRD/STRD + // instruction don't encode operands that way yet. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); + } + + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); + } + void addImm0_255Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); @@ -608,137 +1070,344 @@ public: addExpr(Inst, getImm()); } + void addImm0_31Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm1_16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate-1, and we store in the instruction + // the bits as encoded, so subtract off one here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + + void addImm1_32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate-1, and we store in the instruction + // the bits as encoded, so subtract off one here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + } + void addImm0_65535Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm24bitOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImmThumbSROperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The constant encodes as the immediate, except for 32, which encodes as + // zero. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Imm = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm))); + } + + void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addPKHASRImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // An ASR value of 32 encodes as 0, so that's how we want to add it to + // the instruction as well. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val)); + } + + void addARMSOImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addT2SOImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addSetEndImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } - void addMemMode7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemMode7() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - (void)CE; - assert((CE || CE->getValue() == 0) && - "No offset operand support in mode 7"); + void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); } - void addMemMode2Operands(MCInst &Inst, unsigned N) const { - assert(isMemMode2() && "Invalid mode or number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Memory.Alignment)); + } - if (getMemOffsetIsReg()) { - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + void addAddrMode2Operands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + if (!Memory.OffsetRegNum) { + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); + } else { + // For register offset, we encode the shift type and negation flag + // here. + Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); + } + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; - ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift; - int64_t ShiftAmount = 0; + void addAM2OffsetImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant AM2OffsetImm operand!"); + int32_t Val = CE->getValue(); + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - if (getMemOffsetRegShifted()) { - ShOpc = getMemShiftType(); - const MCConstantExpr *CE = - dyn_cast<MCConstantExpr>(getMemShiftAmount()); - ShiftAmount = CE->getValue(); - } + void addAddrMode3Operands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + if (!Memory.OffsetRegNum) { + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM3Opc(AddSub, Val); + } else { + // For register offset, we encode the shift type and negation flag + // here. + Val = ARM_AM::getAM3Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, 0); + } + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount, - ShOpc, IdxMode))); + void addAM3OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + if (Kind == k_PostIndexRegister) { + int32_t Val = + ARM_AM::getAM3Opc(PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub, 0); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); return; } - // Create a operand placeholder to always yield the same number of operands. + // Constant offset. + const MCConstantExpr *CE = static_cast<const MCConstantExpr*>(getImm()); + int32_t Val = CE->getValue(); + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM3Opc(AddSub, Val); Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 2 offset operand!"); - int64_t Offset = CE->getValue(); + void addAddrMode5Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // The lower two bits are always zero and as such are not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM5Opc(AddSub, Val); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add, - Offset, ARM_AM::no_shift, IdxMode))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub, - -Offset, ARM_AM::no_shift, IdxMode))); + void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // The lower two bits are always zero and as such are not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const { + addMemImm8OffsetOperands(Inst, N); + } + + void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const { + addMemImm8OffsetOperands(Inst, N); } - void addMemMode3Operands(MCInst &Inst, unsigned N) const { - assert(isMemMode3() && "Invalid mode or number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If this is an immediate, it's a label reference. + if (Kind == k_Immediate) { + addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } - if (getMemOffsetIsReg()) { - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + // Otherwise, it's a normal memory reg+offset. + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0, - IdxMode))); + void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If this is an immediate, it's a label reference. + if (Kind == k_Immediate) { + addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::CreateImm(0)); return; } - // Create a operand placeholder to always yield the same number of operands. - Inst.addOperand(MCOperand::CreateReg(0)); + // Otherwise, it's a normal memory reg+offset. + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 3 offset operand!"); - int64_t Offset = CE->getValue(); + void addMemTBBOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + } - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add, - Offset, IdxMode))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub, - -Offset, IdxMode))); + void addMemTBHOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); } - void addMemMode5Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemMode5() && "Invalid number of operands!"); + void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - assert(!getMemOffsetIsReg() && "Invalid mode 5 operand"); + void addT2MemRegOffsetOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + Inst.addOperand(MCOperand::CreateImm(Memory.ShiftImm)); + } - // FIXME: #-0 is encoded differently than #0. Does the parser preserve - // the difference? - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode 5 offset operand!"); + void addMemThumbRROperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); + } - // The MCInst offset operand doesn't include the low two bits (like - // the instruction encoding). - int64_t Offset = CE->getValue() / 4; - if (Offset >= 0) - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, - Offset))); - else - Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, - -Offset))); + void addMemThumbRIs4Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); } - void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + void addMemThumbRIs2Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 2) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); } - void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); - assert(CE && "Non-constant mode offset operand!"); - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + void addMemThumbRIs1Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue()) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addMemThumbSPIOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + int64_t Val = Memory.OffsetImm ? (Memory.OffsetImm->getValue() / 4) : 0; + Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + + void addPostIdxImm8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant post-idx-imm8 operand!"); + int Imm = CE->getValue(); + bool isAdd = Imm >= 0; + if (Imm == INT32_MIN) Imm = 0; + Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addPostIdxImm8s4Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + assert(CE && "non-constant post-idx-imm8s4 operand!"); + int Imm = CE->getValue(); + bool isAdd = Imm >= 0; + if (Imm == INT32_MIN) Imm = 0; + // Immediate is scaled by 4. + Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addPostIdxRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + Inst.addOperand(MCOperand::CreateImm(PostIdxReg.isAdd)); + } + + void addPostIdxRegShiftedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(PostIdxReg.RegNum)); + // The sign, shift type, and shift amount are encoded in a single operand + // using the AM2 encoding helpers. + ARM_AM::AddrOpc opc = PostIdxReg.isAdd ? ARM_AM::add : ARM_AM::sub; + unsigned Imm = ARM_AM::getAM2Opc(opc, PostIdxReg.ShiftImm, + PostIdxReg.ShiftTy); + Inst.addOperand(MCOperand::CreateImm(Imm)); } void addMSRMaskOperands(MCInst &Inst, unsigned N) const { @@ -751,10 +1420,33 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndex16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndex32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + virtual void print(raw_ostream &OS) const; + static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) { + ARMOperand *Op = new ARMOperand(k_ITCondMask); + Op->ITMask.Mask = Mask; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - ARMOperand *Op = new ARMOperand(CondCode); + ARMOperand *Op = new ARMOperand(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -762,7 +1454,7 @@ public: } static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(CoprocNum); + ARMOperand *Op = new ARMOperand(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -770,15 +1462,23 @@ public: } static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(CoprocReg); + ARMOperand *Op = new ARMOperand(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } + static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_CoprocOption); + Op->Cop.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { - ARMOperand *Op = new ARMOperand(CCOut); + ARMOperand *Op = new ARMOperand(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; @@ -786,7 +1486,7 @@ public: } static ARMOperand *CreateToken(StringRef Str, SMLoc S) { - ARMOperand *Op = new ARMOperand(Token); + ARMOperand *Op = new ARMOperand(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -795,7 +1495,7 @@ public: } static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Register); + ARMOperand *Op = new ARMOperand(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; @@ -807,20 +1507,52 @@ public: unsigned ShiftReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(ShiftedRegister); - Op->ShiftedReg.ShiftTy = ShTy; - Op->ShiftedReg.SrcReg = SrcReg; - Op->ShiftedReg.ShiftReg = ShiftReg; - Op->ShiftedReg.ShiftImm = ShiftImm; + ARMOperand *Op = new ARMOperand(k_ShiftedRegister); + Op->RegShiftedReg.ShiftTy = ShTy; + Op->RegShiftedReg.SrcReg = SrcReg; + Op->RegShiftedReg.ShiftReg = ShiftReg; + Op->RegShiftedReg.ShiftImm = ShiftImm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, + unsigned SrcReg, + unsigned ShiftImm, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_ShiftedImmediate); + Op->RegShiftedImm.ShiftTy = ShTy; + Op->RegShiftedImm.SrcReg = SrcReg; + Op->RegShiftedImm.ShiftImm = ShiftImm; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, + static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Shifter); - Op->Shift.ShiftTy = ShTy; + ARMOperand *Op = new ARMOperand(k_ShifterImmediate); + Op->ShifterImm.isASR = isASR; + Op->ShifterImm.Imm = Imm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_RotateImmediate); + Op->RotImm.Imm = Imm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor); + Op->Bitfield.LSB = LSB; + Op->Bitfield.Width = Width; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -829,12 +1561,13 @@ public: static ARMOperand * CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { - KindTy Kind = RegisterList; + KindTy Kind = k_RegisterList; - if (ARM::DPRRegClass.contains(Regs.front().first)) - Kind = DPRRegisterList; - else if (ARM::SPRRegClass.contains(Regs.front().first)) - Kind = SPRRegisterList; + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first)) + Kind = k_DPRRegisterList; + else if (ARMMCRegisterClasses[ARM::SPRRegClassID]. + contains(Regs.front().first)) + Kind = k_SPRRegisterList; ARMOperand *Op = new ARMOperand(Kind); for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator @@ -846,55 +1579,68 @@ public: return Op; } + static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, + MCContext &Ctx) { + ARMOperand *Op = new ARMOperand(k_VectorIndex); + Op->VectorIndex.Val = Idx; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(Immediate); + ARMOperand *Op = new ARMOperand(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum, - bool OffsetIsReg, const MCExpr *Offset, - int OffsetRegNum, bool OffsetRegShifted, - enum ARM_AM::ShiftOpc ShiftType, - const MCExpr *ShiftAmount, bool Preindexed, - bool Postindexed, bool Negative, bool Writeback, + static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { + ARMOperand *Op = new ARMOperand(k_FPImmediate); + Op->FPImm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateMem(unsigned BaseRegNum, + const MCConstantExpr *OffsetImm, + unsigned OffsetRegNum, + ARM_AM::ShiftOpc ShiftType, + unsigned ShiftImm, + unsigned Alignment, + bool isNegative, SMLoc S, SMLoc E) { - assert((OffsetRegNum == -1 || OffsetIsReg) && - "OffsetRegNum must imply OffsetIsReg!"); - assert((!OffsetRegShifted || OffsetIsReg) && - "OffsetRegShifted must imply OffsetIsReg!"); - assert((Offset || OffsetIsReg) && - "Offset must exists unless register offset is used!"); - assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) && - "Cannot have shift amount without shifted register offset!"); - assert((!Offset || !OffsetIsReg) && - "Cannot have expression offset and register offset!"); - - ARMOperand *Op = new ARMOperand(Memory); - Op->Mem.AddrMode = AddrMode; - Op->Mem.BaseRegNum = BaseRegNum; - Op->Mem.OffsetIsReg = OffsetIsReg; - if (OffsetIsReg) - Op->Mem.Offset.RegNum = OffsetRegNum; - else - Op->Mem.Offset.Value = Offset; - Op->Mem.OffsetRegShifted = OffsetRegShifted; - Op->Mem.ShiftType = ShiftType; - Op->Mem.ShiftAmount = ShiftAmount; - Op->Mem.Preindexed = Preindexed; - Op->Mem.Postindexed = Postindexed; - Op->Mem.Negative = Negative; - Op->Mem.Writeback = Writeback; + ARMOperand *Op = new ARMOperand(k_Memory); + Op->Memory.BaseRegNum = BaseRegNum; + Op->Memory.OffsetImm = OffsetImm; + Op->Memory.OffsetRegNum = OffsetRegNum; + Op->Memory.ShiftType = ShiftType; + Op->Memory.ShiftImm = ShiftImm; + Op->Memory.Alignment = Alignment; + Op->Memory.isNegative = isNegative; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd, + ARM_AM::ShiftOpc ShiftTy, + unsigned ShiftImm, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_PostIndexRegister); + Op->PostIdxReg.RegNum = RegNum; + Op->PostIdxReg.isAdd = isAdd; + Op->PostIdxReg.ShiftTy = ShiftTy; + Op->PostIdxReg.ShiftImm = ShiftImm; Op->StartLoc = S; Op->EndLoc = E; return Op; } static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - ARMOperand *Op = new ARMOperand(MemBarrierOpt); + ARMOperand *Op = new ARMOperand(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -902,7 +1648,7 @@ public: } static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - ARMOperand *Op = new ARMOperand(ProcIFlags); + ARMOperand *Op = new ARMOperand(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; @@ -910,7 +1656,7 @@ public: } static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) { - ARMOperand *Op = new ARMOperand(MSRMask); + ARMOperand *Op = new ARMOperand(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -922,53 +1668,56 @@ public: void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { - case CondCode: + case k_FPImmediate: + OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm()) + << ") >"; + break; + case k_CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; break; - case CCOut: + case k_CCOut: OS << "<ccout " << getReg() << ">"; break; - case CoprocNum: + case k_ITCondMask: { + static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)", + "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)", + "(tee)", "(eee)" }; + assert((ITMask.Mask & 0xf) == ITMask.Mask); + OS << "<it-mask " << MaskStr[ITMask.Mask] << ">"; + break; + } + case k_CoprocNum: OS << "<coprocessor number: " << getCoproc() << ">"; break; - case CoprocReg: + case k_CoprocReg: OS << "<coprocessor register: " << getCoproc() << ">"; break; - case MSRMask: + case k_CoprocOption: + OS << "<coprocessor option: " << CoprocOption.Val << ">"; + break; + case k_MSRMask: OS << "<mask: " << getMSRMask() << ">"; break; - case Immediate: + case k_Immediate: getImm()->print(OS); break; - case MemBarrierOpt: + case k_MemBarrierOpt: OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">"; break; - case Memory: + case k_Memory: OS << "<memory " - << "am:" << ARMII::AddrModeToString(getMemAddrMode()) - << " base:" << getMemBaseRegNum(); - if (getMemOffsetIsReg()) { - OS << " offset:<register " << getMemOffsetRegNum(); - if (getMemOffsetRegShifted()) { - OS << " offset-shift-type:" << getMemShiftType(); - OS << " offset-shift-amount:" << *getMemShiftAmount(); - } - } else { - OS << " offset:" << *getMemOffset(); - } - if (getMemOffsetIsReg()) - OS << " (offset-is-reg)"; - if (getMemPreindexed()) - OS << " (pre-indexed)"; - if (getMemPostindexed()) - OS << " (post-indexed)"; - if (getMemNegative()) - OS << " (negative)"; - if (getMemWriteback()) - OS << " (writeback)"; + << " base:" << Memory.BaseRegNum; + OS << ">"; + break; + case k_PostIndexRegister: + OS << "post-idx register " << (PostIdxReg.isAdd ? "" : "-") + << PostIdxReg.RegNum; + if (PostIdxReg.ShiftTy != ARM_AM::no_shift) + OS << ARM_AM::getShiftOpcStr(PostIdxReg.ShiftTy) << " " + << PostIdxReg.ShiftImm; OS << ">"; break; - case ProcIFlags: { + case k_ProcIFlags: { OS << "<ARM_PROC::"; unsigned IFlags = getProcIFlags(); for (int i=2; i >= 0; --i) @@ -977,23 +1726,38 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ">"; break; } - case Register: + case k_Register: OS << "<register " << getReg() << ">"; break; - case Shifter: - OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">"; + case k_ShifterImmediate: + OS << "<shift " << (ShifterImm.isASR ? "asr" : "lsl") + << " #" << ShifterImm.Imm << ">"; + break; + case k_ShiftedRegister: + OS << "<so_reg_reg " + << RegShiftedReg.SrcReg + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm)) + << ", " << RegShiftedReg.ShiftReg << ", " + << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm) + << ">"; break; - case ShiftedRegister: - OS << "<so_reg" - << ShiftedReg.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm)) - << ", " << ShiftedReg.ShiftReg << ", " - << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) + case k_ShiftedImmediate: + OS << "<so_reg_imm " + << RegShiftedImm.SrcReg + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm)) + << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm) << ">"; break; - case RegisterList: - case DPRRegisterList: - case SPRRegisterList: { + case k_RotateImmediate: + OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; + break; + case k_BitfieldDescriptor: + OS << "<bitfield " << "lsb: " << Bitfield.LSB + << ", width: " << Bitfield.Width << ">"; + break; + case k_RegisterList: + case k_DPRRegisterList: + case k_SPRRegisterList: { OS << "<register_list "; const SmallVectorImpl<unsigned> &RegList = getRegList(); @@ -1006,9 +1770,12 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ">"; break; } - case Token: + case k_Token: OS << "'" << getToken() << "'"; break; + case k_VectorIndex: + OS << "<vectorindex " << getVectorIndex() << ">"; + break; } } @@ -1021,7 +1788,7 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - RegNo = TryParseRegister(); + RegNo = tryParseRegister(); return (RegNo == (unsigned)-1); } @@ -1030,9 +1797,9 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo, /// and if it is a register name the token is eaten and the register number is /// returned. Otherwise return -1. /// -int ARMAsmParser::TryParseRegister() { +int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) return -1; // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. @@ -1050,6 +1817,39 @@ int ARMAsmParser::TryParseRegister() { if (!RegNum) return -1; Parser.Lex(); // Eat identifier token. + +#if 0 + // Also check for an index operand. This is only legal for vector registers, + // but that'll get caught OK in operand matching, so we don't need to + // explicitly filter everything else out here. + if (Parser.getTok().is(AsmToken::LBrac)) { + SMLoc SIdx = Parser.getTok().getLoc(); + Parser.Lex(); // Eat left bracket token. + + const MCExpr *ImmVal; + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ImmVal)) + return MatchOperand_ParseFail; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), + SIdx, E, + getContext())); + } +#endif + return RegNum; } @@ -1058,7 +1858,7 @@ int ARMAsmParser::TryParseRegister() { // occurs, return -1. An irrecoverable error is one where tokens have been // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). -int ARMAsmParser::TryParseShiftRegister( +int ARMAsmParser::tryParseShiftRegister( SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); @@ -1120,7 +1920,7 @@ int ARMAsmParser::TryParseShiftRegister( return -1; } } else if (Parser.getTok().is(AsmToken::Identifier)) { - ShiftReg = TryParseRegister(); + ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); if (ShiftReg == -1) { Error (L, "expected immediate or register in shift operand"); @@ -1133,8 +1933,12 @@ int ARMAsmParser::TryParseShiftRegister( } } - Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, - ShiftReg, Imm, + if (ShiftReg && ShiftTy != ARM_AM::rrx) + Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, + ShiftReg, Imm, + S, Parser.getTok().getLoc())); + else + Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm, S, Parser.getTok().getLoc())); return 0; @@ -1148,9 +1952,9 @@ int ARMAsmParser::TryParseShiftRegister( /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. bool ARMAsmParser:: -TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - int RegNo = TryParseRegister(); + int RegNo = tryParseRegister(); if (RegNo == -1) return true; @@ -1161,6 +1965,37 @@ TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), ExclaimTok.getLoc())); Parser.Lex(); // Eat exclaim token + return false; + } + + // Also check for an index operand. This is only legal for vector registers, + // but that'll get caught OK in operand matching, so we don't need to + // explicitly filter everything else out here. + if (Parser.getTok().is(AsmToken::LBrac)) { + SMLoc SIdx = Parser.getTok().getLoc(); + Parser.Lex(); // Eat left bracket token. + + const MCExpr *ImmVal; + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ImmVal)) + return MatchOperand_ParseFail; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), + SIdx, E, + getContext())); } return false; @@ -1209,14 +2044,50 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { return -1; } -/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The +/// parseITCondCode - Try to parse a condition code for an IT instruction. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + unsigned CC = StringSwitch<unsigned>(Tok.getString()) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("cs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("cc", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC == ~0U) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the token. + + Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S)); + + return MatchOperand_Success; +} + +/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; int Num = MatchCoprocessorOperandName(Tok.getString(), 'p'); if (Num == -1) @@ -1227,14 +2098,15 @@ tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The +/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c'); if (Reg == -1) @@ -1245,93 +2117,155 @@ tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// Parse a register list, return it if successful else return null. The first -/// token must be a '{' when called. -bool ARMAsmParser:: -ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LCurly) && - "Token is not a Left Curly Brace"); +/// parseCoprocOptionOperand - Try to parse an coprocessor option operand. +/// coproc_option : '{' imm0_255 '}' +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - // Read the rest of the registers in the list. - unsigned PrevRegNum = 0; - SmallVector<std::pair<unsigned, SMLoc>, 32> Registers; - - do { - bool IsRange = Parser.getTok().is(AsmToken::Minus); - Parser.Lex(); // Eat non-identifier token. - - const AsmToken &RegTok = Parser.getTok(); - SMLoc RegLoc = RegTok.getLoc(); - if (RegTok.isNot(AsmToken::Identifier)) { - Error(RegLoc, "register expected"); - return true; - } - - int RegNum = TryParseRegister(); - if (RegNum == -1) { - Error(RegLoc, "register expected"); - return true; - } - - if (IsRange) { - int Reg = PrevRegNum; - do { - ++Reg; - Registers.push_back(std::make_pair(Reg, RegLoc)); - } while (Reg != RegNum); - } else { - Registers.push_back(std::make_pair(RegNum, RegLoc)); - } - - PrevRegNum = RegNum; - } while (Parser.getTok().is(AsmToken::Comma) || - Parser.getTok().is(AsmToken::Minus)); + // If this isn't a '{', this isn't a coprocessor immediate operand. + if (Parser.getTok().isNot(AsmToken::LCurly)) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the '{' - // Process the right curly brace of the list. - const AsmToken &RCurlyTok = Parser.getTok(); - if (RCurlyTok.isNot(AsmToken::RCurly)) { - Error(RCurlyTok.getLoc(), "'}' expected"); - return true; + const MCExpr *Expr; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(Expr)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE || CE->getValue() < 0 || CE->getValue() > 255) { + Error(Loc, "coprocessor option must be an immediate in range [0, 255]"); + return MatchOperand_ParseFail; + } + int Val = CE->getValue(); - SMLoc E = RCurlyTok.getLoc(); - Parser.Lex(); // Eat right curly brace token. - - // Verify the register list. - SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator - RI = Registers.begin(), RE = Registers.end(); + // Check for and consume the closing '}' + if (Parser.getTok().isNot(AsmToken::RCurly)) + return MatchOperand_ParseFail; + SMLoc E = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the '}' - unsigned HighRegNum = getARMRegisterNumbering(RI->first); - bool EmittedWarning = false; + Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E)); + return MatchOperand_Success; +} - DenseMap<unsigned, bool> RegMap; - RegMap[HighRegNum] = true; +// For register list parsing, we need to map from raw GPR register numbering +// to the enumeration values. The enumeration values aren't sorted by +// register number due to our using "sp", "lr" and "pc" as canonical names. +static unsigned getNextRegister(unsigned Reg) { + // If this is a GPR, we need to do it manually, otherwise we can rely + // on the sort ordering of the enumeration since the other reg-classes + // are sane. + if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + return Reg + 1; + switch(Reg) { + default: assert(0 && "Invalid GPR number!"); + case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2; + case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4; + case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6; + case ARM::R6: return ARM::R7; case ARM::R7: return ARM::R8; + case ARM::R8: return ARM::R9; case ARM::R9: return ARM::R10; + case ARM::R10: return ARM::R11; case ARM::R11: return ARM::R12; + case ARM::R12: return ARM::SP; case ARM::SP: return ARM::LR; + case ARM::LR: return ARM::PC; case ARM::PC: return ARM::R0; + } +} - for (++RI; RI != RE; ++RI) { - const std::pair<unsigned, SMLoc> &RegInfo = *RI; - unsigned Reg = getARMRegisterNumbering(RegInfo.first); +/// Parse a register list. +bool ARMAsmParser:: +parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LCurly) && + "Token is not a Left Curly Brace"); + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '{' token. + SMLoc RegLoc = Parser.getTok().getLoc(); - if (RegMap[Reg]) { - Error(RegInfo.second, "register duplicated in register list"); - return true; + // Check the first register in the list to see what register class + // this is a list of. + int Reg = tryParseRegister(); + if (Reg == -1) + return Error(RegLoc, "register expected"); + + MCRegisterClass *RC; + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; + else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::DPRRegClassID]; + else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg)) + RC = &ARMMCRegisterClasses[ARM::SPRRegClassID]; + else + return Error(RegLoc, "invalid register in register list"); + + // The reglist instructions have at most 16 registers, so reserve + // space for that many. + SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + // Store the first register. + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + + // This starts immediately after the first register token in the list, + // so we can see either a comma or a minus (range separator) as a legal + // next token. + while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)) { + if (Parser.getTok().is(AsmToken::Minus)) { + Parser.Lex(); // Eat the comma. + SMLoc EndLoc = Parser.getTok().getLoc(); + int EndReg = tryParseRegister(); + if (EndReg == -1) + return Error(EndLoc, "register expected"); + // If the register is the same as the start reg, there's nothing + // more to do. + if (Reg == EndReg) + continue; + // The register must be in the same register class as the first. + if (!RC->contains(EndReg)) + return Error(EndLoc, "invalid register in register list"); + // Ranges must go from low to high. + if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg)) + return Error(EndLoc, "bad range in register list"); + + // Add all the registers in the range to the register list. + while (Reg != EndReg) { + Reg = getNextRegister(Reg); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + } + continue; } - - if (!EmittedWarning && Reg < HighRegNum) - Warning(RegInfo.second, - "register not in ascending order in register list"); - - RegMap[Reg] = true; - HighRegNum = std::max(Reg, HighRegNum); + Parser.Lex(); // Eat the comma. + RegLoc = Parser.getTok().getLoc(); + int OldReg = Reg; + Reg = tryParseRegister(); + if (Reg == -1) + return Error(RegLoc, "register expected"); + // The register must be in the same register class as the first. + if (!RC->contains(Reg)) + return Error(RegLoc, "invalid register in register list"); + // List must be monotonically increasing. + if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) + return Error(RegLoc, "register list not in ascending order"); + // VFP register lists must also be contiguous. + // It's OK to use the enumeration values directly here rather, as the + // VFP register classes have the enum sorted properly. + if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && + Reg != OldReg + 1) + return Error(RegLoc, "non-contiguous register range"); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); } + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RCurly)) + return Error(E, "'}' expected"); + Parser.Lex(); // Eat '}' token. + Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); return false; } -/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. +/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); @@ -1360,28 +2294,32 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction. +/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef IFlagsStr = Tok.getString(); + // An iflags string of "none" is interpreted to mean that none of the AIF + // bits are set. Not a terribly useful instruction, but a valid encoding. unsigned IFlags = 0; - for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { - unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) - .Case("a", ARM_PROC::A) - .Case("i", ARM_PROC::I) - .Case("f", ARM_PROC::F) - .Default(~0U); - - // If some specific iflag is already set, it means that some letter is - // present more than once, this is not acceptable. - if (Flag == ~0U || (IFlags & Flag)) - return MatchOperand_NoMatch; + if (IFlagsStr != "none") { + for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { + unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) + .Case("a", ARM_PROC::A) + .Case("i", ARM_PROC::I) + .Case("f", ARM_PROC::F) + .Default(~0U); + + // If some specific iflag is already set, it means that some letter is + // present more than once, this is not acceptable. + if (Flag == ~0U || (IFlags & Flag)) + return MatchOperand_NoMatch; - IFlags |= Flag; + IFlags |= Flag; + } } Parser.Lex(); // Eat identifier token. @@ -1389,18 +2327,49 @@ tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction. +/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); StringRef Mask = Tok.getString(); + if (isMClass()) { + // See ARMv6-M 10.1.1 + unsigned FlagsVal = StringSwitch<unsigned>(Mask) + .Case("apsr", 0) + .Case("iapsr", 1) + .Case("eapsr", 2) + .Case("xpsr", 3) + .Case("ipsr", 5) + .Case("epsr", 6) + .Case("iepsr", 7) + .Case("msp", 8) + .Case("psp", 9) + .Case("primask", 16) + .Case("basepri", 17) + .Case("basepri_max", 18) + .Case("faultmask", 19) + .Case("control", 20) + .Default(~0U); + + if (FlagsVal == ~0U) + return MatchOperand_NoMatch; + + if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) + // basepri, basepri_max and faultmask only valid for V7m. + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); + return MatchOperand_Success; + } + // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf" size_t Start = 0, Next = Mask.find('_'); StringRef Flags = ""; - StringRef SpecReg = Mask.slice(Start, Next); + std::string SpecReg = LowercaseString(Mask.slice(Start, Next)); if (Next != StringRef::npos) Flags = Mask.slice(Next+1, Mask.size()); @@ -1411,7 +2380,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (SpecReg == "apsr") { FlagsVal = StringSwitch<unsigned>(Flags) - .Case("nzcvq", 0x8) // same as CPSR_c + .Case("nzcvq", 0x8) // same as CPSR_f .Case("g", 0x4) // same as CPSR_s .Case("nzcvqg", 0xc) // same as CPSR_fs .Default(~0U); @@ -1420,7 +2389,7 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!Flags.empty()) return MatchOperand_NoMatch; else - FlagsVal = 0; // No flag + FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { if (Flags == "all") // cpsr_all is an alias for cpsr_fc @@ -1455,96 +2424,680 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); +parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, + int Low, int High) { + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), Op + " operand expected."); + return MatchOperand_ParseFail; + } + StringRef ShiftName = Tok.getString(); + std::string LowerOp = LowercaseString(Op); + std::string UpperOp = UppercaseString(Op); + if (ShiftName != LowerOp && ShiftName != UpperOp) { + Error(Parser.getTok().getLoc(), Op + " operand expected."); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat shift type token. - if (ParseMemory(Operands, ARMII::AddrMode2)) - return MatchOperand_NoMatch; + // There must be a '#' and a shift amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + int Val = CE->getValue(); + if (Val < Low || Val > High) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc())); return MatchOperand_Success; } -/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); +parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(Tok.getLoc(), "'be' or 'le' operand expected"); + return MatchOperand_ParseFail; + } + int Val = StringSwitch<int>(Tok.getString()) + .Case("be", 1) + .Case("le", 0) + .Default(-1); + Parser.Lex(); // Eat the token. + + if (Val == -1) { + Error(Tok.getLoc(), "'be' or 'le' operand expected"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, + getContext()), + S, Parser.getTok().getLoc())); + return MatchOperand_Success; +} + +/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT +/// instructions. Legal values are: +/// lsl #n 'n' in [0,31] +/// asr #n 'n' in [1,32] +/// n == 32 encoded as n == 0. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) { + Error(S, "shift operator 'asr' or 'lsl' expected"); + return MatchOperand_ParseFail; + } + StringRef ShiftName = Tok.getString(); + bool isASR; + if (ShiftName == "lsl" || ShiftName == "LSL") + isASR = false; + else if (ShiftName == "asr" || ShiftName == "ASR") + isASR = true; + else { + Error(S, "shift operator 'asr' or 'lsl' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the operator. + + // A '#' and a shift amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(E, "malformed shift expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(E, "shift amount must be an immediate"); + return MatchOperand_ParseFail; + } - if (ParseMemory(Operands, ARMII::AddrMode3)) + int64_t Val = CE->getValue(); + if (isASR) { + // Shift amount must be in [1,32] + if (Val < 1 || Val > 32) { + Error(E, "'asr' shift amount must be in range [1,32]"); + return MatchOperand_ParseFail; + } + // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode. + if (isThumb() && Val == 32) { + Error(E, "'asr #32' shift amount not allowed in Thumb mode"); + return MatchOperand_ParseFail; + } + if (Val == 32) Val = 0; + } else { + // Shift amount must be in [1,32] + if (Val < 0 || Val > 31) { + Error(E, "'lsr' shift amount must be in range [0,31]"); + return MatchOperand_ParseFail; + } + } + + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E)); + + return MatchOperand_Success; +} + +/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family +/// of instructions. Legal values are: +/// ror #n 'n' in {0, 8, 16, 24} +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + const AsmToken &Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + if (Tok.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef ShiftName = Tok.getString(); + if (ShiftName != "ror" && ShiftName != "ROR") return MatchOperand_NoMatch; + Parser.Lex(); // Eat the operator. + + // A '#' and a rotate amount. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *ShiftAmount; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(ShiftAmount)) { + Error(E, "malformed rotate expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); + if (!CE) { + Error(E, "rotate amount must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t Val = CE->getValue(); + // Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension) + // normally, zero is represented in asm by omitting the rotate operand + // entirely. + if (Val != 8 && Val != 16 && Val != 24 && Val != 0) { + Error(E, "'ror' rotate amount must be 8, 16, or 24"); + return MatchOperand_ParseFail; + } + + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateRotImm(Val, S, E)); + + return MatchOperand_Success; +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + // The bitfield descriptor is really two operands, the LSB and the width. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *LSBExpr; + SMLoc E = Parser.getTok().getLoc(); + if (getParser().ParseExpression(LSBExpr)) { + Error(E, "malformed immediate expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr); + if (!CE) { + Error(E, "'lsb' operand must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t LSB = CE->getValue(); + // The LSB must be in the range [0,31] + if (LSB < 0 || LSB > 31) { + Error(E, "'lsb' operand must be in the range [0,31]"); + return MatchOperand_ParseFail; + } + E = Parser.getTok().getLoc(); + + // Expect another immediate operand. + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "too few operands"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "'#' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat hash token. + + const MCExpr *WidthExpr; + if (getParser().ParseExpression(WidthExpr)) { + Error(E, "malformed immediate expression"); + return MatchOperand_ParseFail; + } + CE = dyn_cast<MCConstantExpr>(WidthExpr); + if (!CE) { + Error(E, "'width' operand must be an immediate"); + return MatchOperand_ParseFail; + } + + int64_t Width = CE->getValue(); + // The LSB must be in the range [1,32-lsb] + if (Width < 1 || Width > 32 - LSB) { + Error(E, "'width' operand must be in the range [1,32-lsb]"); + return MatchOperand_ParseFail; + } + E = Parser.getTok().getLoc(); + + Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E)); return MatchOperand_Success; } -/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Check for a post-index addressing register operand. Specifically: + // postidx_reg := '+' register {, shift} + // | '-' register {, shift} + // | register {, shift} + + // This method must return MatchOperand_NoMatch without consuming any tokens + // in the case where there is no match, as other alternatives take other + // parse methods. + AsmToken Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + bool haveEaten = false; + bool isAdd = true; + int Reg = -1; + if (Tok.is(AsmToken::Plus)) { + Parser.Lex(); // Eat the '+' token. + haveEaten = true; + } else if (Tok.is(AsmToken::Minus)) { + Parser.Lex(); // Eat the '-' token. + isAdd = false; + haveEaten = true; + } + if (Parser.getTok().is(AsmToken::Identifier)) + Reg = tryParseRegister(); + if (Reg == -1) { + if (!haveEaten) + return MatchOperand_NoMatch; + Error(Parser.getTok().getLoc(), "register expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Parser.getTok().getLoc(); + + ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift; + unsigned ShiftImm = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ','. + if (parseMemRegOffsetShift(ShiftTy, ShiftImm)) + return MatchOperand_ParseFail; + } + + Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy, + ShiftImm, S, E)); + + return MatchOperand_Success; +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Check for a post-index addressing register operand. Specifically: + // am3offset := '+' register + // | '-' register + // | register + // | # imm + // | # + imm + // | # - imm + + // This method must return MatchOperand_NoMatch without consuming any tokens + // in the case where there is no match, as other alternatives take other + // parse methods. + AsmToken Tok = Parser.getTok(); + SMLoc S = Tok.getLoc(); + + // Do immediates first, as we always parse those if we have a '#'. + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat the '#'. + // Explicitly look for a '-', as we need to encode negative zero + // differently. + bool isNegative = Parser.getTok().is(AsmToken::Minus); + const MCExpr *Offset; + if (getParser().ParseExpression(Offset)) + return MatchOperand_ParseFail; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); + if (!CE) { + Error(S, "constant expression expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Tok.getLoc(); + // Negative zero is encoded as the flag value INT32_MIN. + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + Val = INT32_MIN; + + Operands.push_back( + ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E)); + + return MatchOperand_Success; + } + + + bool haveEaten = false; + bool isAdd = true; + int Reg = -1; + if (Tok.is(AsmToken::Plus)) { + Parser.Lex(); // Eat the '+' token. + haveEaten = true; + } else if (Tok.is(AsmToken::Minus)) { + Parser.Lex(); // Eat the '-' token. + isAdd = false; + haveEaten = true; + } + if (Parser.getTok().is(AsmToken::Identifier)) + Reg = tryParseRegister(); + if (Reg == -1) { + if (!haveEaten) + return MatchOperand_NoMatch; + Error(Parser.getTok().getLoc(), "register expected"); + return MatchOperand_ParseFail; + } + SMLoc E = Parser.getTok().getLoc(); + + Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift, + 0, S, E)); + + return MatchOperand_Success; +} + +/// cvtT2LdrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateReg(0)); + // addr + ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtT2StrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateReg(0)); + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + + +/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } -/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. bool ARMAsmParser:: -CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); + // offset + ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtLdrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // addr + ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtStrdPre - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtStrdPre(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Rt, Rt2 + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + // addr + ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); + // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; } +/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The second source operand must be the same register as the destination + // operand. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + return false; + } + ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); + ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1); + // If we have a three-operand form, use that, else the second source operand + // is just the destination operand again. + if (Operands.size() == 6) + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + else + Inst.addOperand(Inst.getOperand(0)); + ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); + + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. -/// -/// TODO Only preindexing and postindexing addressing are started, unindexed -/// with option, etc are still to do. bool ARMAsmParser:: -ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - ARMII::AddrMode AddrMode = ARMII::AddrModeNone) { +parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -1552,185 +3105,178 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Parser.Lex(); // Eat left bracket token. const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) { - Error(BaseRegTok.getLoc(), "register expected"); - return true; - } - int BaseRegNum = TryParseRegister(); - if (BaseRegNum == -1) { - Error(BaseRegTok.getLoc(), "register expected"); - return true; - } + int BaseRegNum = tryParseRegister(); + if (BaseRegNum == -1) + return Error(BaseRegTok.getLoc(), "register expected"); // The next token must either be a comma or a closing bracket. const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) - return true; + return Error(Tok.getLoc(), "malformed memory operand"); - bool Preindexed = false; - bool Postindexed = false; - bool OffsetIsReg = false; - bool Negative = false; - bool Writeback = false; - ARMOperand *WBOp = 0; - int OffsetRegNum = -1; - bool OffsetRegShifted = false; - enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl; - const MCExpr *ShiftAmount = 0; - const MCExpr *Offset = 0; - - // First look for preindexed address forms, that is after the "[Rn" we now - // have to see if the next token is a comma. - if (Tok.is(AsmToken::Comma)) { - Preindexed = true; - Parser.Lex(); // Eat comma token. - - if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, - Offset, OffsetIsReg, OffsetRegNum, E)) - return true; - const AsmToken &RBracTok = Parser.getTok(); - if (RBracTok.isNot(AsmToken::RBrac)) { - Error(RBracTok.getLoc(), "']' expected"); - return true; - } - E = RBracTok.getLoc(); + if (Tok.is(AsmToken::RBrac)) { + E = Tok.getLoc(); Parser.Lex(); // Eat right bracket token. - const AsmToken &ExclaimTok = Parser.getTok(); - if (ExclaimTok.is(AsmToken::Exclaim)) { - // None of addrmode3 instruction uses "!" - if (AddrMode == ARMII::AddrMode3) - return true; + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, + 0, 0, false, S, E)); - WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), - ExclaimTok.getLoc()); - Writeback = true; - Parser.Lex(); // Eat exclaim token - } else { // In addressing mode 2, pre-indexed mode always end with "!" - if (AddrMode == ARMII::AddrMode2) - Preindexed = false; + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. It's rather odd, but syntactically valid. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } - } else { - // The "[Rn" we have so far was not followed by a comma. - // If there's anything other than the right brace, this is a post indexing - // addressing form. - E = Tok.getLoc(); - Parser.Lex(); // Eat right bracket token. + return false; + } - const AsmToken &NextTok = Parser.getTok(); + assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!"); + Parser.Lex(); // Eat the comma. - if (NextTok.isNot(AsmToken::EndOfStatement)) { - Postindexed = true; - Writeback = true; + // If we have a ':', it's an alignment specifier. + if (Parser.getTok().is(AsmToken::Colon)) { + Parser.Lex(); // Eat the ':'. + E = Parser.getTok().getLoc(); - if (NextTok.isNot(AsmToken::Comma)) { - Error(NextTok.getLoc(), "',' expected"); - return true; - } - - Parser.Lex(); // Eat comma token. + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; - if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, - ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, - E)) - return true; + // The expression has to be a constant. Memory references with relocations + // don't come through here, as they use the <label> forms of the relevant + // instructions. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE) + return Error (E, "constant expression expected"); + + unsigned Align = 0; + switch (CE->getValue()) { + default: + return Error(E, "alignment specifier must be 64, 128, or 256 bits"); + case 64: Align = 8; break; + case 128: Align = 16; break; + case 256: Align = 32; break; } - } - // Force Offset to exist if used. - if (!OffsetIsReg) { - if (!Offset) - Offset = MCConstantExpr::Create(0, getContext()); - } else { - if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) { - Error(E, "shift amount not supported"); - return true; + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. + + // Don't worry about range checking the value here. That's handled by + // the is*() predicates. + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, + ARM_AM::no_shift, 0, Align, + false, S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } + + return false; } - Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg, - Offset, OffsetRegNum, OffsetRegShifted, - ShiftType, ShiftAmount, Preindexed, - Postindexed, Negative, Writeback, S, E)); - if (WBOp) - Operands.push_back(WBOp); + // If we have a '#', it's an immediate offset, else assume it's a register + // offset. + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat the '#'. + E = Parser.getTok().getLoc(); - return false; -} + bool isNegative = getParser().getTok().is(AsmToken::Minus); + const MCExpr *Offset; + if (getParser().ParseExpression(Offset)) + return true; + + // The expression has to be a constant. Memory references with relocations + // don't come through here, as they use the <label> forms of the relevant + // instructions. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); + if (!CE) + return Error (E, "constant expression expected"); + + // If the constant was #-0, represent it as INT32_MIN. + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + CE = MCConstantExpr::Create(INT32_MIN, getContext()); + + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. -/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," -/// we will parse the following (were +/- means that a plus or minus is -/// optional): -/// +/-Rm -/// +/-Rm, shift -/// #offset -/// we return false on success or an error otherwise. -bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, - bool &OffsetRegShifted, - enum ARM_AM::ShiftOpc &ShiftType, - const MCExpr *&ShiftAmount, - const MCExpr *&Offset, - bool &OffsetIsReg, - int &OffsetRegNum, - SMLoc &E) { - Negative = false; - OffsetRegShifted = false; - OffsetIsReg = false; - OffsetRegNum = -1; - const AsmToken &NextTok = Parser.getTok(); - E = NextTok.getLoc(); - if (NextTok.is(AsmToken::Plus)) - Parser.Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - Parser.Lex(); // Eat minus token - } - // See if there is a register following the "[Rn," or "[Rn]," we have so far. - const AsmToken &OffsetRegTok = Parser.getTok(); - if (OffsetRegTok.is(AsmToken::Identifier)) { - SMLoc CurLoc = OffsetRegTok.getLoc(); - OffsetRegNum = TryParseRegister(); - if (OffsetRegNum != -1) { - OffsetIsReg = true; - E = CurLoc; + // Don't worry about range checking the value here. That's handled by + // the is*() predicates. + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, CE, 0, + ARM_AM::no_shift, 0, 0, + false, S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } - } - // If we parsed a register as the offset then there can be a shift after that. - if (OffsetRegNum != -1) { - // Look for a comma then a shift - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Comma)) { - Parser.Lex(); // Eat comma token. + return false; + } - const AsmToken &Tok = Parser.getTok(); - if (ParseShift(ShiftType, ShiftAmount, E)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; - } + // The register offset is optionally preceded by a '+' or '-' + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + Parser.Lex(); // Eat the '-'. + } else if (Parser.getTok().is(AsmToken::Plus)) { + // Nothing to do. + Parser.Lex(); // Eat the '+'. } - else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" - // Look for #offset following the "[Rn," or "[Rn]," - const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - Parser.Lex(); // Eat hash token. + E = Parser.getTok().getLoc(); + int OffsetRegNum = tryParseRegister(); + if (OffsetRegNum == -1) + return Error(E, "register expected"); + + // If there's a shift operator, handle it. + ARM_AM::ShiftOpc ShiftType = ARM_AM::no_shift; + unsigned ShiftImm = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ','. + if (parseMemRegOffsetShift(ShiftType, ShiftImm)) + return true; + } - if (getParser().ParseExpression(Offset)) - return true; - E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + // Now we should have the closing ']' + E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, + ShiftType, ShiftImm, 0, isNegative, + S, E)); + + // If there's a pre-indexing writeback marker, '!', just add it as a token + // operand. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken("!",Parser.getTok().getLoc())); + Parser.Lex(); // Eat the '!'. } + return false; } -/// ParseShift as one of these two: +/// parseMemRegOffsetShift - one of these two: /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx -/// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, - const MCExpr *&ShiftAmount, SMLoc &E) { +/// return true if it parses a shift otherwise it returns false. +bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, + unsigned &Amount) { + SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; @@ -1746,28 +3292,86 @@ bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, else if (ShiftName == "rrx" || ShiftName == "RRX") St = ARM_AM::rrx; else - return true; + return Error(Loc, "illegal shift operator"); Parser.Lex(); // Eat shift type token. - // Rrx stands alone. - if (St == ARM_AM::rrx) - return false; - - // Otherwise, there must be a '#' and a shift amount. - const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - Parser.Lex(); // Eat hash token. + // rrx stands alone. + Amount = 0; + if (St != ARM_AM::rrx) { + Loc = Parser.getTok().getLoc(); + // A '#' and a shift amount. + const AsmToken &HashTok = Parser.getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + Parser.Lex(); // Eat hash token. - if (getParser().ParseExpression(ShiftAmount)) - return true; + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; + // Range check the immediate. + // lsl, ror: 0 <= imm <= 31 + // lsr, asr: 0 <= imm <= 32 + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); + if (!CE) + return Error(Loc, "shift amount must be an immediate"); + int64_t Imm = CE->getValue(); + if (Imm < 0 || + ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) || + ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32)) + return Error(Loc, "immediate shift value out of range"); + Amount = Imm; + } return false; } +/// parseFPImm - A floating point immediate expression operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + + if (Parser.getTok().isNot(AsmToken::Hash)) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the '#'. + + // Handle negation, as that still comes through as a separate token. + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + Parser.Lex(); + } + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Real)) { + APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); + // If we had a '-' in front, toggle the sign bit. + IntVal ^= (uint64_t)isNegative << 63; + int Val = ARM_AM::getFP64Imm(APInt(64, IntVal)); + Parser.Lex(); // Eat the token. + if (Val == -1) { + TokError("floating point value out of range"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; + } + if (Tok.is(AsmToken::Integer)) { + int64_t Val = Tok.getIntVal(); + Parser.Lex(); // Eat the token. + if (Val > 255 || Val < 0) { + TokError("encoded floating point value out of range"); + return MatchOperand_ParseFail; + } + Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; + } + + TokError("invalid floating point immediate"); + return MatchOperand_ParseFail; +} /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. -bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, +bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Mnemonic) { SMLoc S, E; @@ -1787,13 +3391,20 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!TryParseRegisterWithWriteBack(Operands)) + // If this is VMRS, check for the apsr_nzcv operand. + if (!tryParseRegisterWithWriteBack(Operands)) return false; - int Res = TryParseShiftRegister(Operands); + int Res = tryParseShiftRegister(Operands); if (Res == 0) // success return false; else if (Res == -1) // irrecoverable error return true; + if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S)); + return false; + } // Fall though for the Identifier case that is not a register or a // special name. @@ -1811,26 +3422,36 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; } case AsmToken::LBrac: - return ParseMemory(Operands); + return parseMemory(Operands); case AsmToken::LCurly: - return ParseRegisterList(Operands); - case AsmToken::Hash: + return parseRegisterList(Operands); + case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate S = Parser.getTok().getLoc(); Parser.Lex(); + bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *ImmVal; if (getParser().ParseExpression(ImmVal)) return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (!CE) { + Error(S, "constant expression expected"); + return MatchOperand_ParseFail; + } + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); return false; + } case AsmToken::Colon: { // ":lower16:" and ":upper16:" expression prefixes // FIXME: Check it's an expression prefix, // e.g. (FOO - :lower16:BAR) isn't legal. ARMMCExpr::VariantKind RefKind; - if (ParsePrefix(RefKind)) + if (parsePrefix(RefKind)) return true; const MCExpr *SubExprVal; @@ -1846,9 +3467,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } } -// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. +// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. -bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) { +bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { RefKind = ARMMCExpr::VK_ARM_None; // :lower16: and :upper16: modifiers @@ -1879,55 +3500,16 @@ bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) { return false; } -const MCExpr * -ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind Variant) { - // Recurse over the given expression, rebuilding it to apply the given variant - // to the leftmost symbol. - if (Variant == MCSymbolRefExpr::VK_None) - return E; - - switch (E->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle target expr yet"); - case MCExpr::Constant: - llvm_unreachable("Can't handle lower16/upper16 of constant yet"); - - case MCExpr::SymbolRef: { - const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E); - - if (SRE->getKind() != MCSymbolRefExpr::VK_None) - return 0; - - return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext()); - } - - case MCExpr::Unary: - llvm_unreachable("Can't handle unary expressions yet"); - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(E); - const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant); - const MCExpr *RHS = BE->getRHS(); - if (!LHS) - return 0; - - return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext()); - } - } - - assert(0 && "Invalid expression kind!"); - return 0; -} - /// \brief Given a mnemonic, split out possible predication code and carry /// setting letters to form a canonical mnemonic and flags. // // FIXME: Would be nice to autogen this. -static StringRef SplitMnemonic(StringRef Mnemonic, - unsigned &PredicationCode, - bool &CarrySetting, - unsigned &ProcessorIMod) { +// FIXME: This is a bit of a maze of special cases. +StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, + unsigned &PredicationCode, + bool &CarrySetting, + unsigned &ProcessorIMod, + StringRef &ITMask) { PredicationCode = ARMCC::AL; CarrySetting = false; ProcessorIMod = 0; @@ -1935,23 +3517,22 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // Ignore some mnemonics we know aren't predicated forms. // // FIXME: Would be nice to autogen this. - if (Mnemonic == "teq" || Mnemonic == "vceq" || - Mnemonic == "movs" || - Mnemonic == "svc" || - (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || - Mnemonic == "vmls" || Mnemonic == "vnmls") || - Mnemonic == "vacge" || Mnemonic == "vcge" || - Mnemonic == "vclt" || - Mnemonic == "vacgt" || Mnemonic == "vcgt" || - Mnemonic == "vcle" || - (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || - Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || - Mnemonic == "vqdmlal" || Mnemonic == "bics")) + if ((Mnemonic == "movs" && isThumb()) || + Mnemonic == "teq" || Mnemonic == "vceq" || Mnemonic == "svc" || + Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || + Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || + Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || + Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || + Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || + Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't // predicated but do have a carry-set and so weren't caught above. - if (Mnemonic != "adcs") { + if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" && + Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" && + Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" && + Mnemonic != "sbcs" && Mnemonic != "rscs") { unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) .Case("eq", ARMCC::EQ) .Case("ne", ARMCC::NE) @@ -1980,11 +3561,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // Next, determine if we have a carry setting bit. We explicitly ignore all // the instructions we know end in 's'. if (Mnemonic.endswith("s") && - !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" || - Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" || - Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || - Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || - Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) { + !(Mnemonic == "cps" || Mnemonic == "mls" || + Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || + Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || + Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || + (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; } @@ -2004,6 +3586,12 @@ static StringRef SplitMnemonic(StringRef Mnemonic, } } + // The "it" instruction has the condition mask on the end of the mnemonic. + if (Mnemonic.startswith("it")) { + ITMask = Mnemonic.slice(2, Mnemonic.size()); + Mnemonic = Mnemonic.slice(0, 2); + } + return Mnemonic; } @@ -2012,37 +3600,154 @@ static StringRef SplitMnemonic(StringRef Mnemonic, // // FIXME: It would be nice to autogen this. void ARMAsmParser:: -GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, +getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || - Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" || + Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" || - Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mvn" || + Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || - Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" || - Mnemonic == "eor" || Mnemonic == "smlal" || - (Mnemonic == "mov" && !isThumbOne())) { + Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || + Mnemonic == "mla" || Mnemonic == "smlal" || + Mnemonic == "umlal" || Mnemonic == "umull"))) { CanAcceptCarrySet = true; - } else { + } else CanAcceptCarrySet = false; - } if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || - Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" || - Mnemonic == "clrex" || Mnemonic.startswith("cps")) { + Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" || + (Mnemonic == "clrex" && !isThumb()) || + (Mnemonic == "nop" && isThumbOne()) || + ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || + Mnemonic == "ldc2" || Mnemonic == "ldc2l" || + Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) || + ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) && + !isThumb()) || + Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) { CanAcceptPredicationCode = false; - } else { + } else CanAcceptPredicationCode = true; - } - if (isThumb()) + if (isThumb()) { if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") CanAcceptPredicationCode = false; + } +} + +bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // FIXME: This is all horribly hacky. We really need a better way to deal + // with optional operands like this in the matcher table. + + // The 'mov' mnemonic is special. One variant has a cc_out operand, while + // another does not. Specifically, the MOVW instruction does not. So we + // special case it here and remove the defaulted (non-setting) cc_out + // operand if that's the instruction we're trying to match. + // + // We do this as post-processing of the explicit operands rather than just + // conditionally adding the cc_out in the first place because we need + // to check the type of the parsed immediate operand. + if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && + !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() && + static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + + // Register-register 'add' for thumb does not have a cc_out operand + // when there are only two register operands. + if (isThumb() && Mnemonic == "add" && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + // Register-register 'add' for thumb does not have a cc_out operand + // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do + // have to check the immediate range here since Thumb2 has a variant + // that can handle a different range and has a cc_out operand. + if (((isThumb() && Mnemonic == "add") || + (isThumbTwo() && Mnemonic == "sub")) && + Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[5])->isReg() || + static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) + return true; + // For Thumb2, add/sub immediate does not have a cc_out operand for the + // imm0_4095 variant. That's the least-preferred variant when + // selecting via the generic "add" mnemonic, so to know that we + // should remove the cc_out operand, we have to explicitly check that + // it's not one of the other variants. Ugh. + if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && + Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + // Nest conditions rather than one big 'if' statement for readability. + // + // If either register is a high reg, it's either one of the SP + // variants (handled above) or a 32-bit encoding, so we just + // check against T3. + if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && + static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + return false; + // If both registers are low, we're in an IT block, and the immediate is + // in range, we should use encoding T1 instead, which has a cc_out. + if (inITBlock() && + isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && + static_cast<ARMOperand*>(Operands[5])->isImm0_7()) + return false; + + // Otherwise, we use encoding T4, which does not have a cc_out + // operand. + return true; + } + + // The thumb2 multiply instruction doesn't have a CCOut register, so + // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to + // use the 16-bit encoding or not. + if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[5])->isReg() && + // If the registers aren't low regs, the destination reg isn't the + // same as one of the source regs, or the cc_out operand is zero + // outside of an IT block, we have to use the 32-bit encoding, so + // remove the cc_out operand. + (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !inITBlock() || + (static_cast<ARMOperand*>(Operands[3])->getReg() != + static_cast<ARMOperand*>(Operands[5])->getReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() != + static_cast<ARMOperand*>(Operands[4])->getReg()))) + return true; + + + + // Register-register 'add/sub' for thumb does not have a cc_out operand + // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also + // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't + // right, this will result in better diagnostics (which operand is off) + // anyway. + if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") && + (Operands.size() == 5 || Operands.size() == 6) && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + return true; + + return false; } /// Parse an arm instruction mnemonic followed by its operands. @@ -2050,16 +3755,51 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); + StringRef Mnemonic = Name.slice(Start, Next); // Split out the predication code and carry setting flag from the mnemonic. unsigned PredicationCode; unsigned ProcessorIMod; bool CarrySetting; - Head = SplitMnemonic(Head, PredicationCode, CarrySetting, - ProcessorIMod); + StringRef ITMask; + Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting, + ProcessorIMod, ITMask); + + // In Thumb1, only the branch (B) instruction can be predicated. + if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { + Parser.EatToEndOfStatement(); + return Error(NameLoc, "conditional execution not supported in Thumb1"); + } + + Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc)); - Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); + // Handle the IT instruction ITMask. Convert it to a bitmask. This + // is the mask as it will be for the IT encoding if the conditional + // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case + // where the conditional bit0 is zero, the instruction post-processing + // will adjust the mask accordingly. + if (Mnemonic == "it") { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); + if (ITMask.size() > 3) { + Parser.EatToEndOfStatement(); + return Error(Loc, "too many conditions on IT instruction"); + } + unsigned Mask = 8; + for (unsigned i = ITMask.size(); i != 0; --i) { + char pos = ITMask[i - 1]; + if (pos != 't' && pos != 'e') { + Parser.EatToEndOfStatement(); + return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); + } + Mask >>= 1; + if (ITMask[i - 1] == 't') + Mask |= 8; + } + Operands.push_back(ARMOperand::CreateITMask(Mask, Loc)); + } + + // FIXME: This is all a pretty gross hack. We should automatically handle + // optional operands like this via tblgen. // Next, add the CCOut and ConditionCode operands, if needed. // @@ -2069,34 +3809,36 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, // the matcher deal with finding the right instruction or generating an // appropriate error. bool CanAcceptCarrySet, CanAcceptPredicationCode; - GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode); + getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode); // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { Parser.EatToEndOfStatement(); - return Error(NameLoc, "instruction '" + Head + + return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } + // If we had a predication code on an instruction that can't do that, issue an + // error. + if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { + Parser.EatToEndOfStatement(); + return Error(NameLoc, "instruction '" + Mnemonic + + "' is not predicable, but condition code specified"); + } // Add the carry setting operand, if necessary. - // - // FIXME: It would be awesome if we could somehow invent a location such that - // match errors on this operand would print a nice diagnostic about how the - // 's' character in the mnemonic resulted in a CCOut operand. - if (CanAcceptCarrySet) + if (CanAcceptCarrySet) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size()); Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0, - NameLoc)); + Loc)); + } // Add the predication code operand, if necessary. if (CanAcceptPredicationCode) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() + + CarrySetting); Operands.push_back(ARMOperand::CreateCondCode( - ARMCC::CondCodes(PredicationCode), NameLoc)); - } else { - // This mnemonic can't ever accept a predication code, but the user wrote - // one (or misspelled another mnemonic). - - // FIXME: Issue a nice error. + ARMCC::CondCodes(PredicationCode), Loc)); } // Add the processor imod operand, if necessary. @@ -2104,11 +3846,6 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.push_back(ARMOperand::CreateImm( MCConstantExpr::Create(ProcessorIMod, getContext()), NameLoc, NameLoc)); - } else { - // This mnemonic can't ever accept a imod, but the user wrote - // one (or misspelled another mnemonic). - - // FIXME: Issue a nice error. } // Add the remaining tokens in the mnemonic. @@ -2117,13 +3854,19 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); - Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc)); + // For now, we're only parsing Thumb1 (for the most part), so + // just ignore ".n" qualifiers. We'll use them to restrict + // matching when we do Thumb2. + if (ExtraToken != ".n") { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); + } } // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Head)) { + if (parseOperand(Operands, Mnemonic)) { Parser.EatToEndOfStatement(); return true; } @@ -2132,7 +3875,7 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Parser.Lex(); // Eat the comma. // Parse and remember the operand. - if (ParseOperand(Operands, Head)) { + if (parseOperand(Operands, Mnemonic)) { Parser.EatToEndOfStatement(); return true; } @@ -2140,75 +3883,548 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); Parser.EatToEndOfStatement(); - return TokError("unexpected token in argument list"); + return Error(Loc, "unexpected token in argument list"); } Parser.Lex(); // Consume the EndOfStatement + + // Some instructions, mostly Thumb, have forms for the same mnemonic that + // do and don't have a cc_out optional-def operand. With some spot-checks + // of the operand list, we can figure out which variant we're trying to + // parse and adjust accordingly before actually matching. We shouldn't ever + // try to remove a cc_out operand that was explicitly set on the the + // mnemonic, of course (CarrySetting == true). Reason number #317 the + // table driven matcher doesn't fit well with the ARM instruction set. + if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + + // ARM mode 'blx' need special handling, as the register operand version + // is predicable, but the label operand version is not. So, we can't rely + // on the Mnemonic based checking to correctly figure out when to put + // a k_CondCode operand in the list. If we're trying to match the label + // version, remove the k_CondCode operand here. + if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 && + static_cast<ARMOperand*>(Operands[2])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + + // The vector-compare-to-zero instructions have a literal token "#0" at + // the end that comes to here as an immediate operand. Convert it to a + // token to play nicely with the matcher. + if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" || + Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 5); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + // VCMP{E} does the same thing, but with a different operand count. + if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[4])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 4); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the + // end. Convert it to a token here. + if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE && CE->getValue() == 0) { + Operands.erase(Operands.begin() + 5); + Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); + delete Op; + } + } + + return false; +} + +// Validate context-sensitive operand constraints. + +// return 'true' if register list contains non-low GPR registers, +// 'false' otherwise. If Reg is in the register list or is HiReg, set +// 'containsReg' to true. +static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, + unsigned HiReg, bool &containsReg) { + containsReg = false; + for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { + unsigned OpReg = Inst.getOperand(i).getReg(); + if (OpReg == Reg) + containsReg = true; + // Anything other than a low register isn't legal here. + if (!isARMLowRegister(OpReg) && (!HiReg || OpReg != HiReg)) + return true; + } + return false; +} + +// Check if the specified regisgter is in the register list of the inst, +// starting at the indicated operand number. +static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { + for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { + unsigned OpReg = Inst.getOperand(i).getReg(); + if (OpReg == Reg) + return true; + } + return false; +} + +// FIXME: We would really prefer to have MCInstrInfo (the wrapper around +// the ARMInsts array) instead. Getting that here requires awkward +// API changes, though. Better way? +namespace llvm { +extern MCInstrDesc ARMInsts[]; +} +static MCInstrDesc &getInstDesc(unsigned Opcode) { + return ARMInsts[Opcode]; +} + +// FIXME: We would really like to be able to tablegen'erate this. +bool ARMAsmParser:: +validateInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + SMLoc Loc = Operands[0]->getStartLoc(); + // Check the IT block state first. + // NOTE: In Thumb mode, the BKPT instruction has the interesting property of + // being allowed in IT blocks, but not being predicable. It just always + // executes. + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + unsigned bit = 1; + if (ITState.FirstCond) + ITState.FirstCond = false; + else + bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; + // The instruction must be predicable. + if (!MCID.isPredicable()) + return Error(Loc, "instructions in IT block must be predicable"); + unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm(); + unsigned ITCond = bit ? ITState.Cond : + ARMCC::getOppositeCondition(ITState.Cond); + if (Cond != ITCond) { + // Find the condition code Operand to get its SMLoc information. + SMLoc CondLoc; + for (unsigned i = 1; i < Operands.size(); ++i) + if (static_cast<ARMOperand*>(Operands[i])->isCondCode()) + CondLoc = Operands[i]->getStartLoc(); + return Error(CondLoc, "incorrect condition in IT block; got '" + + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + + "', but expected '" + + ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'"); + } + // Check for non-'al' condition codes outside of the IT block. + } else if (isThumbTwo() && MCID.isPredicable() && + Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != + ARMCC::AL && Inst.getOpcode() != ARM::tB && + Inst.getOpcode() != ARM::t2B) + return Error(Loc, "predicated instructions must be in IT block"); + + switch (Inst.getOpcode()) { + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + case ARM::LDREXD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "destination operands must be sequential"); + return false; + } + case ARM::STRD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "source operands must be sequential"); + return false; + } + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::STREXD: { + // Rt2 must be Rt + 1. + unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg()); + if (Rt2 != Rt + 1) + return Error(Operands[3]->getStartLoc(), + "source operands must be sequential"); + return false; + } + case ARM::SBFX: + case ARM::UBFX: { + // width must be in range [1, 32-lsb] + unsigned lsb = Inst.getOperand(2).getImm(); + unsigned widthm1 = Inst.getOperand(3).getImm(); + if (widthm1 >= 32 - lsb) + return Error(Operands[5]->getStartLoc(), + "bitfield width must be in range [1,32-lsb]"); + return false; + } + case ARM::tLDMIA: { + // If we're parsing Thumb2, the .w variant is available and handles + // most cases that are normally illegal for a Thumb1 LDM + // instruction. We'll make the transformation in processInstruction() + // if necessary. + // + // Thumb LDM instructions are writeback iff the base register is not + // in the register list. + unsigned Rn = Inst.getOperand(0).getReg(); + bool hasWritebackToken = + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo()) + return Error(Operands[3 + hasWritebackToken]->getStartLoc(), + "registers must be in range r0-r7"); + // If we should have writeback, then there should be a '!' token. + if (!listContainsBase && !hasWritebackToken && !isThumbTwo()) + return Error(Operands[2]->getStartLoc(), + "writeback operator '!' expected"); + // If we should not have writeback, there must not be a '!'. This is + // true even for the 32-bit wide encodings. + if (listContainsBase && hasWritebackToken) + return Error(Operands[3]->getStartLoc(), + "writeback operator '!' not allowed when base register " + "in register list"); + + break; + } + case ARM::t2LDMIA_UPD: { + if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) + return Error(Operands[4]->getStartLoc(), + "writeback operator '!' not allowed when base register " + "in register list"); + break; + } + case ARM::tPOP: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase)) + return Error(Operands[2]->getStartLoc(), + "registers must be in range r0-r7 or pc"); + break; + } + case ARM::tPUSH: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase)) + return Error(Operands[2]->getStartLoc(), + "registers must be in range r0-r7 or lr"); + break; + } + case ARM::tSTMIA_UPD: { + bool listContainsBase; + if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo()) + return Error(Operands[4]->getStartLoc(), + "registers must be in range r0-r7"); + break; + } + } + return false; } +void ARMAsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case ARM::LDMIA_UPD: + // If this is a load of a single register via a 'pop', then we should use + // a post-indexed LDR instruction instead, per the ARM ARM. + if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" && + Inst.getNumOperands() == 5) { + MCInst TmpInst; + TmpInst.setOpcode(ARM::LDR_POST_IMM); + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateReg(0)); // am2offset + TmpInst.addOperand(MCOperand::CreateImm(4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + case ARM::STMDB_UPD: + // If this is a store of a single register via a 'push', then we should use + // a pre-indexed STR instruction instead, per the ARM ARM. + if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" && + Inst.getNumOperands() == 5) { + MCInst TmpInst; + TmpInst.setOpcode(ARM::STR_PRE_IMM); + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(1)); // addrmode_imm12 + TmpInst.addOperand(MCOperand::CreateImm(-4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + case ARM::tADDi8: + // If the immediate is in the range 0-7, we want tADDi3 iff Rd was + // explicitly specified. From the ARM ARM: "Encoding T1 is preferred + // to encoding T2 if <Rd> is specified and encoding T2 is preferred + // to encoding T1 if <Rd> is omitted." + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + Inst.setOpcode(ARM::tADDi3); + break; + case ARM::tSUBi8: + // If the immediate is in the range 0-7, we want tADDi3 iff Rd was + // explicitly specified. From the ARM ARM: "Encoding T1 is preferred + // to encoding T2 if <Rd> is specified and encoding T2 is preferred + // to encoding T1 if <Rd> is omitted." + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + Inst.setOpcode(ARM::tSUBi3); + break; + case ARM::tB: + // A Thumb conditional branch outside of an IT block is a tBcc. + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + Inst.setOpcode(ARM::tBcc); + break; + case ARM::t2B: + // A Thumb2 conditional branch outside of an IT block is a t2Bcc. + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + Inst.setOpcode(ARM::t2Bcc); + break; + case ARM::t2Bcc: + // If the conditional is AL or we're in an IT block, we really want t2B. + if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) + Inst.setOpcode(ARM::t2B); + break; + case ARM::tBcc: + // If the conditional is AL, we really want tB. + if (Inst.getOperand(1).getImm() == ARMCC::AL) + Inst.setOpcode(ARM::tB); + break; + case ARM::tLDMIA: { + // If the register list contains any high registers, or if the writeback + // doesn't match what tLDMIA can do, we need to use the 32-bit encoding + // instead if we're in Thumb2. Otherwise, this should have generated + // an error in validateInstruction(). + unsigned Rn = Inst.getOperand(0).getReg(); + bool hasWritebackToken = + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + bool listContainsBase; + if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) || + (!listContainsBase && !hasWritebackToken) || + (listContainsBase && hasWritebackToken)) { + // 16-bit encoding isn't sufficient. Switch to the 32-bit version. + assert (isThumbTwo()); + Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA); + // If we're switching to the updating version, we need to insert + // the writeback tied operand. + if (hasWritebackToken) + Inst.insert(Inst.begin(), + MCOperand::CreateReg(Inst.getOperand(0).getReg())); + } + break; + } + case ARM::tSTMIA_UPD: { + // If the register list contains any high registers, we need to use + // the 32-bit encoding instead if we're in Thumb2. Otherwise, this + // should have generated an error in validateInstruction(). + unsigned Rn = Inst.getOperand(0).getReg(); + bool listContainsBase; + if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) { + // 16-bit encoding isn't sufficient. Switch to the 32-bit version. + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2STMIA_UPD); + } + break; + } + case ARM::t2MOVi: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(1).getImm() <= 255 && + ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && + Inst.getOperand(4).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(4).getReg() == 0)) && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + // The operands aren't in the same order for tMOVi8... + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + } + case ARM::t2MOVr: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + Inst.getOperand(2).getImm() == ARMCC::AL && + Inst.getOperand(4).getReg() == ARM::CPSR && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + // The operands aren't the same for tMOV[S]r... (no cc_out) + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + } + break; + } + case ARM::t2SXTH: + case ARM::t2SXTB: + case ARM::t2UXTH: + case ARM::t2UXTB: { + // If we can use the 16-bit encoding and the user didn't explicitly + // request the 32-bit variant, transform it here. + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + Inst.getOperand(2).getImm() == 0 && + (!static_cast<ARMOperand*>(Operands[2])->isToken() || + static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("Illegal opcode!"); + case ARM::t2SXTH: NewOpc = ARM::tSXTH; break; + case ARM::t2SXTB: NewOpc = ARM::tSXTB; break; + case ARM::t2UXTH: NewOpc = ARM::tUXTH; break; + case ARM::t2UXTB: NewOpc = ARM::tUXTB; break; + } + // The operands aren't the same for thumb1 (no rotate operand). + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + } + break; + } + case ARM::t2IT: { + // The mask bits for all but the first condition are represented as + // the low bit of the condition code value implies 't'. We currently + // always have 1 implies 't', so XOR toggle the bits if the low bit + // of the condition code is zero. The encoding also expects the low + // bit of the condition to be encoded as bit 4 of the mask operand, + // so mask that in if needed + MCOperand &MO = Inst.getOperand(1); + unsigned Mask = MO.getImm(); + unsigned OrigMask = Mask; + unsigned TZ = CountTrailingZeros_32(Mask); + if ((Inst.getOperand(0).getImm() & 1) == 0) { + assert(Mask && TZ <= 3 && "illegal IT mask value!"); + for (unsigned i = 3; i != TZ; --i) + Mask ^= 1 << i; + } else + Mask |= 0x10; + MO.setImm(Mask); + + // Set up the IT block state according to the IT instruction we just + // matched. + assert(!inITBlock() && "nested IT blocks?!"); + ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm()); + ITState.Mask = OrigMask; // Use the original mask, not the updated one. + ITState.CurPosition = 0; + ITState.FirstCond = true; + break; + } + } +} + +unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + // 16-bit thumb arithmetic instructions either require or preclude the 'S' + // suffix depending on whether they're in an IT block or not. + unsigned Opc = Inst.getOpcode(); + MCInstrDesc &MCID = getInstDesc(Opc); + if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { + assert(MCID.hasOptionalDef() && + "optionally flag setting instruction missing optional def operand"); + assert(MCID.NumOperands == Inst.getNumOperands() && + "operand count mismatch!"); + // Find the optional-def operand (cc_out). + unsigned OpNo; + for (OpNo = 0; + !MCID.OpInfo[OpNo].isOptionalDef() && OpNo < MCID.NumOperands; + ++OpNo) + ; + // If we're parsing Thumb1, reject it completely. + if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR) + return Match_MnemonicFail; + // If we're parsing Thumb2, which form is legal depends on whether we're + // in an IT block. + if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR && + !inITBlock()) + return Match_RequiresITBlock; + if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR && + inITBlock()) + return Match_RequiresNotITBlock; + } + // Some high-register supporting Thumb1 encodings only allow both registers + // to be from r0-r7 when in Thumb2. + else if (Opc == ARM::tADDhirr && isThumbOne() && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) + return Match_RequiresThumb2; + // Others only require ARMv6 or later. + else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() && + isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg())) + return Match_RequiresV6; + return Match_Success; +} + bool ARMAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; unsigned ErrorInfo; - MatchResultTy MatchResult, MatchResult2; + unsigned MatchResult; MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult != Match_Success) { - // If we get a Match_InvalidOperand it might be some arithmetic instruction - // that does not update the condition codes. So try adding a CCOut operand - // with a value of reg0. - if (MatchResult == Match_InvalidOperand) { - Operands.insert(Operands.begin() + 1, - ARMOperand::CreateCCOut(0, - ((ARMOperand*)Operands[0])->getStartLoc())); - MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult2 == Match_Success) - MatchResult = Match_Success; - else { - ARMOperand *CCOut = ((ARMOperand*)Operands[1]); - Operands.erase(Operands.begin() + 1); - delete CCOut; - } - } - // If we get a Match_MnemonicFail it might be some arithmetic instruction - // that updates the condition codes if it ends in 's'. So see if the - // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut - // operand with a value of CPSR. - else if (MatchResult == Match_MnemonicFail) { - // Get the instruction mnemonic, which is the first token. - StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken(); - if (Mnemonic.substr(Mnemonic.size()-1) == "s") { - // removed the 's' from the mnemonic for matching. - StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1); - SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc(); - ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); - Operands.erase(Operands.begin()); - delete OldMnemonic; - Operands.insert(Operands.begin(), - ARMOperand::CreateToken(MnemonicNoS, NameLoc)); - Operands.insert(Operands.begin() + 1, - ARMOperand::CreateCCOut(ARM::CPSR, NameLoc)); - MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); - if (MatchResult2 == Match_Success) - MatchResult = Match_Success; - else { - ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); - Operands.erase(Operands.begin()); - delete OldMnemonic; - Operands.insert(Operands.begin(), - ARMOperand::CreateToken(Mnemonic, NameLoc)); - ARMOperand *CCOut = ((ARMOperand*)Operands[1]); - Operands.erase(Operands.begin() + 1); - delete CCOut; - } - } - } - } switch (MatchResult) { + default: break; case Match_Success: + // Context sensitive operand constraints aren't handled by the matcher, + // so check them here. + if (validateInstruction(Inst, Operands)) { + // Still progress the IT block, otherwise one wrong condition causes + // nasty cascading errors. + forwardITPosition(); + return true; + } + + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. + processInstruction(Inst, Operands); + + // Only move forward at the very end so that everything in validate + // and process gets a consistent answer about whether we're in an IT + // block. + forwardITPosition(); + Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -2227,34 +4443,43 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); + return Error(IDLoc, "invalid instruction"); case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); + // The converter function will have already emited a diagnostic. + return true; + case Match_RequiresNotITBlock: + return Error(IDLoc, "flag setting instruction only valid outside IT block"); + case Match_RequiresITBlock: + return Error(IDLoc, "instruction only valid inside IT block"); + case Match_RequiresV6: + return Error(IDLoc, "instruction variant requires ARMv6 or later"); + case Match_RequiresThumb2: + return Error(IDLoc, "instruction variant requires Thumb2"); } llvm_unreachable("Implement any new match types added!"); return true; } -/// ParseDirective parses the arm specific directives +/// parseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); + return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") - return ParseDirectiveThumb(DirectiveID.getLoc()); + return parseDirectiveThumb(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") - return ParseDirectiveThumbFunc(DirectiveID.getLoc()); + return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") - return ParseDirectiveCode(DirectiveID.getLoc()); + return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") - return ParseDirectiveSyntax(DirectiveID.getLoc()); + return parseDirectiveSyntax(DirectiveID.getLoc()); return true; } -/// ParseDirectiveWord +/// parseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { +bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -2277,9 +4502,9 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } -/// ParseDirectiveThumb +/// parseDirectiveThumb /// ::= .thumb -bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { +bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); Parser.Lex(); @@ -2290,9 +4515,9 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { return false; } -/// ParseDirectiveThumbFunc +/// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name -bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { +bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); bool isMachO = MAI.hasSubsectionsViaSymbols(); StringRef Name; @@ -2322,9 +4547,9 @@ bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { return false; } -/// ParseDirectiveSyntax +/// parseDirectiveSyntax /// ::= .syntax unified | divided -bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { +bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); @@ -2345,9 +4570,9 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { return false; } -/// ParseDirectiveCode +/// parseDirectiveCode /// ::= .code 16 | 32 -bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { +bool ARMAsmParser::parseDirectiveCode(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Integer)) return Error(L, "unexpected token in .code directive"); @@ -2380,8 +4605,8 @@ extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { - RegisterAsmParser<ARMAsmParser> X(TheARMTarget); - RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); + RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); + RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); LLVMInitializeARMAsmLexer(); } diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index bdce2c4..8f2f813 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -6,584 +6,4077 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code to implement the public interfaces of ARMDisassembler and -// ThumbDisassembler, both of which are instances of MCDisassembler. -// -//===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-disassembler" -#include "ARMDisassembler.h" -#include "ARMDisassemblerCore.h" - -#include "llvm/ADT/OwningPtr.h" +#include "ARM.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -//#define DEBUG(X) do { X; } while (0) - -/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from -/// ARMDecoderEmitter.cpp TableGen backend. It contains: -/// -/// o Mappings from opcode to ARM/Thumb instruction format -/// -/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function -/// for an ARM instruction. -/// -/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding -/// function for a Thumb instruction. -/// -#include "ARMGenDecoderTables.inc" +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; +namespace { +/// ARMDisassembler - ARM disassembler for all ARM platforms. +class ARMDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + ARMDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { + } + + ~ARMDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; +private: +}; + +/// ThumbDisassembler - Thumb disassembler for all Thumb platforms. +class ThumbDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + ThumbDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { + } + + ~ThumbDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; +private: + mutable std::vector<unsigned> ITBlock; + DecodeStatus AddThumbPredicate(MCInst&) const; + void UpdateThumbVFPPredicate(MCInst&) const; +}; +} + +static bool Check(DecodeStatus &Out, DecodeStatus In) { + switch (In) { + case MCDisassembler::Success: + // Out stays the same. + return true; + case MCDisassembler::SoftFail: + Out = In; + return true; + case MCDisassembler::Fail: + Out = In; + return false; + } + return false; +} + + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, + unsigned Insn, + uint64_t Adddress, + const void *Decoder); +static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + + + +#include "ARMGenDisassemblerTables.inc" +#include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" -using namespace llvm; +static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { + return new ARMDisassembler(STI); +} + +static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) { + return new ThumbDisassembler(STI); +} -/// showBitVector - Use the raw_ostream to log a diagnostic message describing -/// the inidividual bits of the instruction. -/// -static inline void showBitVector(raw_ostream &os, const uint32_t &insn) { - // Split the bit position markers into more than one lines to fit 80 columns. - os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11" - << " 10 9 8 7 6 5 4 3 2 1 0 \n"; - os << "---------------------------------------------------------------" - << "----------------------------------\n"; - os << '|'; - for (unsigned i = 32; i != 0; --i) { - if (insn >> (i - 1) & 0x01) - os << " 1"; +EDInstInfo *ARMDisassembler::getEDInfo() const { + return instInfoARM; +} + +EDInstInfo *ThumbDisassembler::getEDInfo() const { + return instInfoARM; +} + +DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + assert(!(STI.getFeatureBits() & ARM::ModeThumb) && + "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // Encoded as a small-endian 32-bit word in the stream. + uint32_t insn = (bytes[3] << 24) | + (bytes[2] << 16) | + (bytes[1] << 8) | + (bytes[0] << 0); + + // Calling the auto-generated decoder function. + DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + // VFP and NEON instructions, similarly, are shared between ARM + // and Thumb modes. + MI.clear(); + result = decodeVFPInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + result = decodeNEONDataInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + result = decodeNEONDupInstruction32(MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (!DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return result; + } + + MI.clear(); + + Size = 0; + return MCDisassembler::Fail; +} + +namespace llvm { +extern MCInstrDesc ARMInsts[]; +} + +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. The immediate Value has had any PC +/// adjustment made by the caller. If the instruction is a branch instruction +/// then isBranch is true, else false. If the getOpInfo() function was set as +/// part of the setupForSymbolicDisassembly() call then that function is called +/// to get any symbolic information at the Address for this instruction. If +/// that returns non-zero then the symbolic information it returns is used to +/// create an MCExpr and that is added as an operand to the MCInst. If +/// getOpInfo() returns zero and isBranch is true then a symbol look up for +/// Value is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with Value is created. This function returns true if it adds an +/// operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, + bool isBranch, uint64_t InstSize, + MCInst &MI, const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); + if (!getOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + void *DisInfo = Dis->getDisInfoBlock(); + if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (isBranch) { + LLVMSymbolLookupCallback SymbolLookUp = + Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + uint64_t ReferenceType; + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + SymbolicOp.Value = 0; + } + else { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + } + else { + return false; + } + } + else { + return false; + } + } + + MCContext *Ctx = Dis->getMCContext(); + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); else - os << " 0"; - os << (i%4 == 1 ? '|' : ':'); - } - os << '\n'; - // Split the bit position markers into more than one lines to fit 80 columns. - os << "---------------------------------------------------------------" - << "----------------------------------\n"; - os << '\n'; -} - -/// decodeARMInstruction is a decorator function which tries special cases of -/// instruction matching before calling the auto-generated decoder function. -static unsigned decodeARMInstruction(uint32_t &insn) { - if (slice(insn, 31, 28) == 15) - goto AutoGenedDecoder; - - // Special case processing, if any, goes here.... - - // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB. - // The insufficient encoding information of the combined instruction confuses - // the decoder wrt BFC/BFI. Therefore, we try to recover here. - // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111. - // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111. - if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) { - if (slice(insn, 3, 0) == 15) - return ARM::BFC; + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); else - return ARM::BFI; - } - - // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings - // A1 & A2. - // As a result, the decoder fails to deocode USAT properly. - if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) - return ARM::USAT; - // As a result, the decoder fails to deocode UQADD16 properly. - if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) - return ARM::UQADD16; - - // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. - // As a result, the decoder fails to decode UMULL properly. - if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { - return ARM::UMULL; - } - - // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195. - // As a result, the decoder fails to decode SBFX properly. - if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5) - return ARM::SBFX; - - // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198. - // As a result, the decoder fails to decode UBFX properly. - if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5) - return ARM::UBFX; - - // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. - // As a result, the decoder fails to deocode SSAT properly. - if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) - return ARM::SSAT; - - // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. - // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. - if (slice(insn, 27, 24) == 0) { - switch (slice(insn, 21, 20)) { - case 2: - switch (slice(insn, 7, 4)) { - case 11: - return ARM::STRHT; - default: - break; // fallthrough + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert(0 && "bad SymbolicOp.VariantKind"); + + return true; +} + +/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being +/// referenced by a load instruction with the base register that is the Pc. +/// These can often be values in a literal pool near the Address of the +/// instruction. The Address of the instruction and its immediate Value are +/// used as a possible literal pool entry. The SymbolLookUp call back will +/// return the name of a symbol referenced by the the literal pool's entry if +/// the referenced address is that of a symbol. Or it will return a pointer to +/// a literal 'C' string if the referenced address of the literal pool's entry +/// is an address into a section with 'C' string literals. +static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, + const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + void *DisInfo = Dis->getDisInfoBlock(); + uint64_t ReferenceType; + ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; + const char *ReferenceName; + (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr || + ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + (*Dis->CommentStream) << "literal pool for: " << ReferenceName; + } +} + +// Thumb1 instructions don't have explicit S bits. Rather, they +// implicitly set CPSR. Since it's not represented in the encoding, the +// auto-generated decoder won't inject the CPSR operand. We need to fix +// that as a post-pass. +static void AddThumb1SBit(MCInst &MI, bool InITBlock) { + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + MCInst::iterator I = MI.begin(); + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (I == MI.end()) break; + if (OpInfo[i].isOptionalDef() && OpInfo[i].RegClass == ARM::CCRRegClassID) { + if (i > 0 && OpInfo[i-1].isPredicate()) continue; + MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR)); + return; + } + } + + MI.insert(I, MCOperand::CreateReg(InITBlock ? 0 : ARM::CPSR)); +} + +// Most Thumb instructions don't have explicit predicates in the +// encoding, but rather get their predicates from IT context. We need +// to fix up the predicate operands using this context information as a +// post-pass. +MCDisassembler::DecodeStatus +ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { + MCDisassembler::DecodeStatus S = Success; + + // A few instructions actually have predicates encoded in them. Don't + // try to overwrite it if we're seeing one of those. + switch (MI.getOpcode()) { + case ARM::tBcc: + case ARM::t2Bcc: + case ARM::tCBZ: + case ARM::tCBNZ: + case ARM::tCPS: + case ARM::t2CPS3p: + case ARM::t2CPS2p: + case ARM::t2CPS1p: + case ARM::tMOVSr: + case ARM::tSETEND: + // Some instructions (mostly conditional branches) are not + // allowed in IT blocks. + if (!ITBlock.empty()) + S = SoftFail; + else + return Success; + break; + case ARM::tB: + case ARM::t2B: + case ARM::t2TBB: + case ARM::t2TBH: + // Some instructions (mostly unconditional branches) can + // only appears at the end of, or outside of, an IT. + if (ITBlock.size() > 1) + S = SoftFail; + break; + default: + break; + } + + // If we're in an IT block, base the predicate on that. Otherwise, + // assume a predicate of AL. + unsigned CC; + if (!ITBlock.empty()) { + CC = ITBlock.back(); + if (CC == 0xF) + CC = ARMCC::AL; + ITBlock.pop_back(); + } else + CC = ARMCC::AL; + + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + MCInst::iterator I = MI.begin(); + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (I == MI.end()) break; + if (OpInfo[i].isPredicate()) { + I = MI.insert(I, MCOperand::CreateImm(CC)); + ++I; + if (CC == ARMCC::AL) + MI.insert(I, MCOperand::CreateReg(0)); + else + MI.insert(I, MCOperand::CreateReg(ARM::CPSR)); + return S; + } + } + + I = MI.insert(I, MCOperand::CreateImm(CC)); + ++I; + if (CC == ARMCC::AL) + MI.insert(I, MCOperand::CreateReg(0)); + else + MI.insert(I, MCOperand::CreateReg(ARM::CPSR)); + + return S; +} + +// Thumb VFP instructions are a special case. Because we share their +// encodings between ARM and Thumb modes, and they are predicable in ARM +// mode, the auto-generated decoder will give them an (incorrect) +// predicate operand. We need to rewrite these operands based on the IT +// context as a post-pass. +void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const { + unsigned CC; + if (!ITBlock.empty()) { + CC = ITBlock.back(); + ITBlock.pop_back(); + } else + CC = ARMCC::AL; + + const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; + MCInst::iterator I = MI.begin(); + unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; + for (unsigned i = 0; i < NumOps; ++i, ++I) { + if (OpInfo[i].isPredicate() ) { + I->setImm(CC); + ++I; + if (CC == ARMCC::AL) + I->setReg(0); + else + I->setReg(ARM::CPSR); + return; + } + } +} + +DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + CommentStream = &cs; + + uint8_t bytes[4]; + + assert((STI.getFeatureBits() & ARM::ModeThumb) && + "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); + + // We want to read exactly 2 bytes of data. + if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + uint16_t insn16 = (bytes[1] << 8) | bytes[0]; + DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 2; + Check(result, AddThumbPredicate(MI)); + return result; + } + + MI.clear(); + result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI); + if (result) { + Size = 2; + bool InITBlock = !ITBlock.empty(); + Check(result, AddThumbPredicate(MI)); + AddThumb1SBit(MI, InITBlock); + return result; + } + + MI.clear(); + result = decodeThumb2Instruction16(MI, insn16, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 2; + + // Nested IT blocks are UNPREDICTABLE. Must be checked before we add + // the Thumb predicate. + if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty()) + result = MCDisassembler::SoftFail; + + Check(result, AddThumbPredicate(MI)); + + // If we find an IT instruction, we need to parse its condition + // code and mask operands so that we can apply them correctly + // to the subsequent instructions. + if (MI.getOpcode() == ARM::t2IT) { + + // (3 - the number of trailing zeros) is the number of then / else. + unsigned firstcond = MI.getOperand(0).getImm(); + unsigned Mask = MI.getOperand(1).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + ITBlock.insert(ITBlock.begin(), firstcond); + else + ITBlock.insert(ITBlock.begin(), firstcond ^ 1); } + + ITBlock.push_back(firstcond); + } + + return result; + } + + // We want to read exactly 4 bytes of data. + if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + uint32_t insn32 = (bytes[3] << 8) | + (bytes[2] << 0) | + (bytes[1] << 24) | + (bytes[0] << 16); + MI.clear(); + result = decodeThumbInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + bool InITBlock = ITBlock.size(); + Check(result, AddThumbPredicate(MI)); + AddThumb1SBit(MI, InITBlock); + return result; + } + + MI.clear(); + result = decodeThumb2Instruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + + MI.clear(); + result = decodeVFPInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } + + MI.clear(); + result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + + if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) { + MI.clear(); + uint32_t NEONLdStInsn = insn32; + NEONLdStInsn &= 0xF0FFFFFF; + NEONLdStInsn |= 0x04000000; + result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + + if (fieldFromInstruction32(insn32, 24, 4) == 0xF) { + MI.clear(); + uint32_t NEONDataInsn = insn32; + NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 + NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 + NEONDataInsn |= 0x12000000; // Set bits 28 and 25 + result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + + Size = 0; + return MCDisassembler::Fail; +} + + +extern "C" void LLVMInitializeARMDisassembler() { + TargetRegistry::RegisterMCDisassembler(TheARMTarget, + createARMDisassembler); + TargetRegistry::RegisterMCDisassembler(TheThumbTarget, + createThumbDisassembler); +} + +static const unsigned GPRDecoderTable[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, ARM::R11, + ARM::R12, ARM::SP, ARM::LR, ARM::PC +}; + +static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + unsigned Register = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo == 15) return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = ARM::R0; + break; + case 1: + Register = ARM::R1; + break; + case 2: + Register = ARM::R2; break; case 3: - switch (slice(insn, 7, 4)) { - case 11: - return ARM::LDRHT; - case 13: - return ARM::LDRSBT; - case 15: - return ARM::LDRSHT; - default: - break; // fallthrough - } + Register = ARM::R3; + break; + case 9: + Register = ARM::R9; + break; + case 12: + Register = ARM::R12; break; default: - break; // fallthrough + return MCDisassembler::Fail; } + + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static const unsigned SPRDecoderTable[] = { + ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15, + ARM::S16, ARM::S17, ARM::S18, ARM::S19, + ARM::S20, ARM::S21, ARM::S22, ARM::S23, + ARM::S24, ARM::S25, ARM::S26, ARM::S27, + ARM::S28, ARM::S29, ARM::S30, ARM::S31 +}; + +static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Register = SPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static const unsigned DPRDecoderTable[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15, + ARM::D16, ARM::D17, ARM::D18, ARM::D19, + ARM::D20, ARM::D21, ARM::D22, ARM::D23, + ARM::D24, ARM::D25, ARM::D26, ARM::D27, + ARM::D28, ARM::D29, ARM::D30, ARM::D31 +}; + +static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Register = DPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus +DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static const unsigned QPRDecoderTable[] = { + ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, + ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, + ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15 +}; + + +static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + RegNo >>= 1; + + unsigned Register = QPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val == 0xF) return MCDisassembler::Fail; + // AL predicate is not allowed on Thumb1 branches. + if (Inst.getOpcode() == ARM::tBcc && Val == 0xE) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Val)); + if (Val == ARMCC::AL) { + Inst.addOperand(MCOperand::CreateReg(0)); + } else + Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val) + Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + else + Inst.addOperand(MCOperand::CreateReg(0)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + uint32_t imm = Val & 0xFF; + uint32_t rot = (Val & 0xF00) >> 7; + uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F)); + Inst.addOperand(MCOperand::CreateImm(rot_imm)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned imm = fieldFromInstruction32(Val, 7, 5); + + // Register-immediate + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::ShiftOpc Shift = ARM_AM::lsl; + switch (type) { + case 0: + Shift = ARM_AM::lsl; + break; + case 1: + Shift = ARM_AM::lsr; + break; + case 2: + Shift = ARM_AM::asr; + break; + case 3: + Shift = ARM_AM::ror; + break; } - // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153. - // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST - // properly. - if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) { - unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); - switch (slice(insn, 7, 4)) { - case 11: - switch (PW) { - case 2: // Offset - return ARM::STRH; - case 3: // Pre-indexed - return ARM::STRH_PRE; - case 0: // Post-indexed - return ARM::STRH_POST; - default: - break; // fallthrough - } + if (Shift == ARM_AM::ror && imm == 0) + Shift = ARM_AM::rrx; + + unsigned Op = Shift | (imm << 3); + Inst.addOperand(MCOperand::CreateImm(Op)); + + return S; +} + +static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned Rs = fieldFromInstruction32(Val, 8, 4); + + // Register-register + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rs, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::ShiftOpc Shift = ARM_AM::lsl; + switch (type) { + case 0: + Shift = ARM_AM::lsl; break; - case 13: - switch (PW) { - case 2: // Offset - return ARM::LDRD; - case 3: // Pre-indexed - return ARM::LDRD_PRE; - case 0: // Post-indexed - return ARM::LDRD_POST; - default: - break; // fallthrough - } + case 1: + Shift = ARM_AM::lsr; break; - case 15: - switch (PW) { - case 2: // Offset - return ARM::STRD; - case 3: // Pre-indexed - return ARM::STRD_PRE; - case 0: // Post-indexed - return ARM::STRD_POST; - default: - break; // fallthrough - } + case 2: + Shift = ARM_AM::asr; break; + case 3: + Shift = ARM_AM::ror; + break; + } + + Inst.addOperand(MCOperand::CreateImm(Shift)); + + return S; +} + +static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + bool writebackLoad = false; + unsigned writebackReg = 0; + switch (Inst.getOpcode()) { default: - break; // fallthrough + break; + case ARM::LDMIA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::LDMDA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + writebackLoad = true; + writebackReg = Inst.getOperand(0).getReg(); + break; + } + + // Empty register lists are not allowed. + if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; + for (unsigned i = 0; i < 16; ++i) { + if (Val & (1 << i)) { + if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) + return MCDisassembler::Fail; + // Writeback not allowed if Rn is in the target list. + if (writebackLoad && writebackReg == Inst.end()[-1].getReg()) + Check(S, MCDisassembler::SoftFail); } } - // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153. - // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST - // properly. - if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) { - unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); - switch (slice(insn, 7, 4)) { - case 11: - switch (PW) { - case 2: // Offset - return ARM::LDRH; - case 3: // Pre-indexed - return ARM::LDRH_PRE; - case 0: // Post-indexed - return ARM::LDRH_POST; - default: - break; // fallthrough - } + return S; +} + +static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Vd = fieldFromInstruction32(Val, 8, 4); + unsigned regs = Val & 0xFF; + + if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + for (unsigned i = 0; i < (regs - 1); ++i) { + if (!Check(S, DecodeSPRRegisterClass(Inst, ++Vd, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Vd = fieldFromInstruction32(Val, 8, 4); + unsigned regs = (Val & 0xFF) / 2; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + for (unsigned i = 0; i < (regs - 1); ++i) { + if (!Check(S, DecodeDPRRegisterClass(Inst, ++Vd, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + // This operand encodes a mask of contiguous zeros between a specified MSB + // and LSB. To decode it, we create the mask of all bits MSB-and-lower, + // the mask of all bits LSB-and-lower, and then xor them to create + // the mask of that's all ones on [msb, lsb]. Finally we not it to + // create the final mask. + unsigned msb = fieldFromInstruction32(Val, 5, 5); + unsigned lsb = fieldFromInstruction32(Val, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + if (lsb > msb) Check(S, MCDisassembler::SoftFail); + + uint32_t msb_mask = 0xFFFFFFFF; + if (msb != 31) msb_mask = (1U << (msb+1)) - 1; + uint32_t lsb_mask = (1U << lsb) - 1; + + Inst.addOperand(MCOperand::CreateImm(~(msb_mask ^ lsb_mask))); + return S; +} + +static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned CRd = fieldFromInstruction32(Insn, 12, 4); + unsigned coproc = fieldFromInstruction32(Insn, 8, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 8); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + + switch (Inst.getOpcode()) { + case ARM::LDC_OFFSET: + case ARM::LDC_PRE: + case ARM::LDC_POST: + case ARM::LDC_OPTION: + case ARM::LDCL_OFFSET: + case ARM::LDCL_PRE: + case ARM::LDCL_POST: + case ARM::LDCL_OPTION: + case ARM::STC_OFFSET: + case ARM::STC_PRE: + case ARM::STC_POST: + case ARM::STC_OPTION: + case ARM::STCL_OFFSET: + case ARM::STCL_PRE: + case ARM::STCL_POST: + case ARM::STCL_OPTION: + case ARM::t2LDC_OFFSET: + case ARM::t2LDC_PRE: + case ARM::t2LDC_POST: + case ARM::t2LDC_OPTION: + case ARM::t2LDCL_OFFSET: + case ARM::t2LDCL_PRE: + case ARM::t2LDCL_POST: + case ARM::t2LDCL_OPTION: + case ARM::t2STC_OFFSET: + case ARM::t2STC_PRE: + case ARM::t2STC_POST: + case ARM::t2STC_OPTION: + case ARM::t2STCL_OFFSET: + case ARM::t2STCL_PRE: + case ARM::t2STCL_POST: + case ARM::t2STCL_OPTION: + if (coproc == 0xA || coproc == 0xB) + return MCDisassembler::Fail; + break; + default: break; - case 13: - switch (PW) { - case 2: // Offset - return ARM::LDRSB; - case 3: // Pre-indexed - return ARM::LDRSB_PRE; - case 0: // Post-indexed - return ARM::LDRSB_POST; + } + + Inst.addOperand(MCOperand::CreateImm(coproc)); + Inst.addOperand(MCOperand::CreateImm(CRd)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + + bool writeback = (P == 0) || (W == 1); + unsigned idx_mode = 0; + if (P && writeback) + idx_mode = ARMII::IndexModePre; + else if (!P && writeback) + idx_mode = ARMII::IndexModePost; + + switch (Inst.getOpcode()) { + case ARM::t2LDC2_OFFSET: + case ARM::t2LDC2L_OFFSET: + case ARM::t2LDC2_PRE: + case ARM::t2LDC2L_PRE: + case ARM::t2STC2_OFFSET: + case ARM::t2STC2L_OFFSET: + case ARM::t2STC2_PRE: + case ARM::t2STC2L_PRE: + case ARM::LDC2_OFFSET: + case ARM::LDC2L_OFFSET: + case ARM::LDC2_PRE: + case ARM::LDC2L_PRE: + case ARM::STC2_OFFSET: + case ARM::STC2L_OFFSET: + case ARM::STC2_PRE: + case ARM::STC2L_PRE: + case ARM::t2LDC_OFFSET: + case ARM::t2LDCL_OFFSET: + case ARM::t2LDC_PRE: + case ARM::t2LDCL_PRE: + case ARM::t2STC_OFFSET: + case ARM::t2STCL_OFFSET: + case ARM::t2STC_PRE: + case ARM::t2STCL_PRE: + case ARM::LDC_OFFSET: + case ARM::LDCL_OFFSET: + case ARM::LDC_PRE: + case ARM::LDCL_PRE: + case ARM::STC_OFFSET: + case ARM::STCL_OFFSET: + case ARM::STC_PRE: + case ARM::STCL_PRE: + imm = ARM_AM::getAM5Opc(U ? ARM_AM::add : ARM_AM::sub, imm); + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + case ARM::t2LDC2_POST: + case ARM::t2LDC2L_POST: + case ARM::t2STC2_POST: + case ARM::t2STC2L_POST: + case ARM::LDC2_POST: + case ARM::LDC2L_POST: + case ARM::STC2_POST: + case ARM::STC2L_POST: + case ARM::t2LDC_POST: + case ARM::t2LDCL_POST: + case ARM::t2STC_POST: + case ARM::t2STCL_POST: + case ARM::LDC_POST: + case ARM::LDCL_POST: + case ARM::STC_POST: + case ARM::STCL_POST: + imm |= U << 8; + // fall through. + default: + // The 'option' variant doesn't encode 'U' in the immediate since + // the immediate is unsigned [0,255]. + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + } + + switch (Inst.getOpcode()) { + case ARM::LDC_OFFSET: + case ARM::LDC_PRE: + case ARM::LDC_POST: + case ARM::LDC_OPTION: + case ARM::LDCL_OFFSET: + case ARM::LDCL_PRE: + case ARM::LDCL_POST: + case ARM::LDCL_OPTION: + case ARM::STC_OFFSET: + case ARM::STC_PRE: + case ARM::STC_POST: + case ARM::STC_OPTION: + case ARM::STCL_OFFSET: + case ARM::STCL_PRE: + case ARM::STCL_POST: + case ARM::STCL_OPTION: + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus +DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned reg = fieldFromInstruction32(Insn, 25, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + + // On stores, the writeback operand precedes Rt. + switch (Inst.getOpcode()) { + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRT_POST_REG: + case ARM::STRT_POST_IMM: + case ARM::STRBT_POST_REG: + case ARM::STRBT_POST_IMM: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + + // On loads, the writeback operand comes after Rt. + switch (Inst.getOpcode()) { + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRBT_POST_REG: + case ARM::LDRBT_POST_IMM: + case ARM::LDRT_POST_REG: + case ARM::LDRT_POST_IMM: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + ARM_AM::AddrOpc Op = ARM_AM::add; + if (!fieldFromInstruction32(Insn, 23, 1)) + Op = ARM_AM::sub; + + bool writeback = (P == 0) || (W == 1); + unsigned idx_mode = 0; + if (P && writeback) + idx_mode = ARMII::IndexModePre; + else if (!P && writeback) + idx_mode = ARMII::IndexModePost; + + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; // UNPREDICTABLE + + if (reg) { + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + ARM_AM::ShiftOpc Opc = ARM_AM::lsl; + switch( fieldFromInstruction32(Insn, 5, 2)) { + case 0: + Opc = ARM_AM::lsl; + break; + case 1: + Opc = ARM_AM::lsr; + break; + case 2: + Opc = ARM_AM::asr; + break; + case 3: + Opc = ARM_AM::ror; + break; default: - break; // fallthrough - } + return MCDisassembler::Fail; + } + unsigned amt = fieldFromInstruction32(Insn, 7, 5); + unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); + + Inst.addOperand(MCOperand::CreateImm(imm)); + } else { + Inst.addOperand(MCOperand::CreateReg(0)); + unsigned tmp = ARM_AM::getAM2Opc(Op, imm, ARM_AM::lsl, idx_mode); + Inst.addOperand(MCOperand::CreateImm(tmp)); + } + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned type = fieldFromInstruction32(Val, 5, 2); + unsigned imm = fieldFromInstruction32(Val, 7, 5); + unsigned U = fieldFromInstruction32(Val, 12, 1); + + ARM_AM::ShiftOpc ShOp = ARM_AM::lsl; + switch (type) { + case 0: + ShOp = ARM_AM::lsl; + break; + case 1: + ShOp = ARM_AM::lsr; + break; + case 2: + ShOp = ARM_AM::asr; + break; + case 3: + ShOp = ARM_AM::ror; + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + unsigned shift; + if (U) + shift = ARM_AM::getAM2Opc(ARM_AM::add, imm, ShOp); + else + shift = ARM_AM::getAM2Opc(ARM_AM::sub, imm, ShOp); + Inst.addOperand(MCOperand::CreateImm(shift)); + + return S; +} + +static DecodeStatus +DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned type = fieldFromInstruction32(Insn, 22, 1); + unsigned imm = fieldFromInstruction32(Insn, 8, 4); + unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + + bool writeback = (W == 1) | (P == 0); + + // For {LD,ST}RD, Rt must be even, else undefined. + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (Rt & 0x1) return MCDisassembler::Fail; + break; + default: + break; + } + + if (writeback) { // Writeback + if (P) + U |= ARMII::IndexModePre << 9; + else + U |= ARMII::IndexModePost << 9; + + // On stores, the writeback operand precedes Rt. + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; break; - case 15: - switch (PW) { - case 2: // Offset - return ARM::LDRSH; - case 3: // Pre-indexed - return ARM::LDRSH_PRE; - case 0: // Post-indexed - return ARM::LDRSH_POST; + default: + break; + } + + if (writeback) { + // On loads, the writeback operand comes after Rt. + switch (Inst.getOpcode()) { + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + case ARM::LDRHTr: + case ARM::LDRSBTr: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (type) { + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(U | (imm << 4) | Rm)); + } else { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(U)); + } + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned mode = fieldFromInstruction32(Insn, 23, 2); + + switch (mode) { + case 0: + mode = ARM_AM::da; + break; + case 1: + mode = ARM_AM::ia; + break; + case 2: + mode = ARM_AM::db; + break; + case 3: + mode = ARM_AM::ib; + break; + } + + Inst.addOperand(MCOperand::CreateImm(mode)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, + unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned reglist = fieldFromInstruction32(Insn, 0, 16); + + if (pred == 0xF) { + switch (Inst.getOpcode()) { + case ARM::LDMDA: + Inst.setOpcode(ARM::RFEDA); + break; + case ARM::LDMDA_UPD: + Inst.setOpcode(ARM::RFEDA_UPD); + break; + case ARM::LDMDB: + Inst.setOpcode(ARM::RFEDB); + break; + case ARM::LDMDB_UPD: + Inst.setOpcode(ARM::RFEDB_UPD); + break; + case ARM::LDMIA: + Inst.setOpcode(ARM::RFEIA); + break; + case ARM::LDMIA_UPD: + Inst.setOpcode(ARM::RFEIA_UPD); + break; + case ARM::LDMIB: + Inst.setOpcode(ARM::RFEIB); + break; + case ARM::LDMIB_UPD: + Inst.setOpcode(ARM::RFEIB_UPD); + break; + case ARM::STMDA: + Inst.setOpcode(ARM::SRSDA); + break; + case ARM::STMDA_UPD: + Inst.setOpcode(ARM::SRSDA_UPD); + break; + case ARM::STMDB: + Inst.setOpcode(ARM::SRSDB); + break; + case ARM::STMDB_UPD: + Inst.setOpcode(ARM::SRSDB_UPD); + break; + case ARM::STMIA: + Inst.setOpcode(ARM::SRSIA); + break; + case ARM::STMIA_UPD: + Inst.setOpcode(ARM::SRSIA_UPD); + break; + case ARM::STMIB: + Inst.setOpcode(ARM::SRSIB); + break; + case ARM::STMIB_UPD: + Inst.setOpcode(ARM::SRSIB_UPD); + break; default: - break; // fallthrough - } + if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail; + } + + // For stores (which become SRS's, the only operand is the mode. + if (fieldFromInstruction32(Insn, 20, 1) == 0) { + Inst.addOperand( + MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4))); + return S; + } + + return DecodeRFEInstruction(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; // Tied + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeRegListOperand(Inst, reglist, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction32(Insn, 18, 2); + unsigned M = fieldFromInstruction32(Insn, 17, 1); + unsigned iflags = fieldFromInstruction32(Insn, 6, 3); + unsigned mode = fieldFromInstruction32(Insn, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + + // imod == '01' --> UNPREDICTABLE + // NOTE: Even though this is technically UNPREDICTABLE, we choose to + // return failure here. The '01' imod value is unprintable, so there's + // nothing useful we could do even if we returned UNPREDICTABLE. + + if (imod == 1) return MCDisassembler::Fail; + + if (imod && M) { + Inst.setOpcode(ARM::CPS3p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + Inst.addOperand(MCOperand::CreateImm(mode)); + } else if (imod && !M) { + Inst.setOpcode(ARM::CPS2p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + if (mode) S = MCDisassembler::SoftFail; + } else if (!imod && M) { + Inst.setOpcode(ARM::CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + if (iflags) S = MCDisassembler::SoftFail; + } else { + // imod == '00' && M == '0' --> UNPREDICTABLE + Inst.setOpcode(ARM::CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + S = MCDisassembler::SoftFail; + } + + return S; +} + +static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction32(Insn, 9, 2); + unsigned M = fieldFromInstruction32(Insn, 8, 1); + unsigned iflags = fieldFromInstruction32(Insn, 5, 3); + unsigned mode = fieldFromInstruction32(Insn, 0, 5); + + DecodeStatus S = MCDisassembler::Success; + + // imod == '01' --> UNPREDICTABLE + // NOTE: Even though this is technically UNPREDICTABLE, we choose to + // return failure here. The '01' imod value is unprintable, so there's + // nothing useful we could do even if we returned UNPREDICTABLE. + + if (imod == 1) return MCDisassembler::Fail; + + if (imod && M) { + Inst.setOpcode(ARM::t2CPS3p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + Inst.addOperand(MCOperand::CreateImm(mode)); + } else if (imod && !M) { + Inst.setOpcode(ARM::t2CPS2p); + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(iflags)); + if (mode) S = MCDisassembler::SoftFail; + } else if (!imod && M) { + Inst.setOpcode(ARM::t2CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + if (iflags) S = MCDisassembler::SoftFail; + } else { + // imod == '00' && M == '0' --> UNPREDICTABLE + Inst.setOpcode(ARM::t2CPS1p); + Inst.addOperand(MCOperand::CreateImm(mode)); + S = MCDisassembler::SoftFail; + } + + return S; +} + +static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 8, 4); + unsigned imm = 0; + + imm |= (fieldFromInstruction32(Insn, 0, 8) << 0); + imm |= (fieldFromInstruction32(Insn, 12, 3) << 8); + imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction32(Insn, 26, 1) << 11); + + if (Inst.getOpcode() == ARM::t2MOVTi16) + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned imm = 0; + + imm |= (fieldFromInstruction32(Insn, 0, 12) << 0); + imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + + if (Inst.getOpcode() == ARM::MOVTi16) + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 8, 4); + unsigned Ra = fieldFromInstruction32(Insn, 12, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Ra, Address, Decoder))) + return MCDisassembler::Fail; + + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned add = fieldFromInstruction32(Val, 12, 1); + unsigned imm = fieldFromInstruction32(Val, 0, 12); + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (!add) imm *= -1; + if (imm == 0 && !add) imm = INT32_MIN; + Inst.addOperand(MCOperand::CreateImm(imm)); + if (Rn == 15) + tryAddingPcLoadReferenceComment(Address, Address + imm + 8, Decoder); + + return S; +} + +static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned U = fieldFromInstruction32(Val, 8, 1); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (U) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, imm))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, imm))); + + return S; +} + +static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); +} + +static DecodeStatus +DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2; + + if (pred == 0xF) { + Inst.setOpcode(ARM::BLXi); + imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); + return S; + } + + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, + 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + + +static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + unsigned align = fieldFromInstruction32(Val, 4, 2); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!align) + Inst.addOperand(MCOperand::CreateImm(0)); + else + Inst.addOperand(MCOperand::CreateImm(4 << align)); + + return S; +} + +static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + // First output register + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + // Second output register + switch (Inst.getOpcode()) { + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD1d8T: + case ARM::VLD1d16T: + case ARM::VLD1d32T: + case ARM::VLD1d64T: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; break; + case ARM::VLD2b8: + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2b8_UPD: + case ARM::VLD2b16_UPD: + case ARM::VLD2b32_UPD: + case ARM::VLD3q8: + case ARM::VLD3q16: + case ARM::VLD3q32: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; default: - break; // fallthrough + break; + } + + // Third output register + switch(Inst.getOpcode()) { + case ARM::VLD1d8T: + case ARM::VLD1d16T: + case ARM::VLD1d32T: + case ARM::VLD1d64T: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD3q8: + case ARM::VLD3q16: + case ARM::VLD3q32: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Fourth output register + switch (Inst.getOpcode()) { + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Writeback operand + switch (Inst.getOpcode()) { + case ARM::VLD1d8_UPD: + case ARM::VLD1d16_UPD: + case ARM::VLD1d32_UPD: + case ARM::VLD1d64_UPD: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD1d8T_UPD: + case ARM::VLD1d16T_UPD: + case ARM::VLD1d32T_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Q_UPD: + case ARM::VLD1d16Q_UPD: + case ARM::VLD1d32Q_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD2b8_UPD: + case ARM::VLD2b16_UPD: + case ARM::VLD2b32_UPD: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // AddrMode6 Base (register+alignment) + if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + // AddrMode6 Offset (register) + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + // Writeback Operand + switch (Inst.getOpcode()) { + case ARM::VST1d8_UPD: + case ARM::VST1d16_UPD: + case ARM::VST1d32_UPD: + case ARM::VST1d64_UPD: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2d8_UPD: + case ARM::VST2d16_UPD: + case ARM::VST2d32_UPD: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST2b8_UPD: + case ARM::VST2b16_UPD: + case ARM::VST2b32_UPD: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeGPRRegisterClass(Inst, wb, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // AddrMode6 Base (register+alignment) + if (!Check(S, DecodeAddrMode6Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + // AddrMode6 Offset (register) + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + // First input register + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + // Second input register + switch (Inst.getOpcode()) { + case ARM::VST1q8: + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + case ARM::VST1d8T: + case ARM::VST1d16T: + case ARM::VST1d32T: + case ARM::VST1d64T: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2d8: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8_UPD: + case ARM::VST2d16_UPD: + case ARM::VST2d32_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST2b8: + case ARM::VST2b16: + case ARM::VST2b32: + case ARM::VST2b8_UPD: + case ARM::VST2b16_UPD: + case ARM::VST2b32_UPD: + case ARM::VST3q8: + case ARM::VST3q16: + case ARM::VST3q32: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Third input register + switch (Inst.getOpcode()) { + case ARM::VST1d8T: + case ARM::VST1d16T: + case ARM::VST1d32T: + case ARM::VST1d64T: + case ARM::VST1d8T_UPD: + case ARM::VST1d16T_UPD: + case ARM::VST1d32T_UPD: + case ARM::VST1d64T_UPD: + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST3q8: + case ARM::VST3q16: + case ARM::VST3q32: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+4)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + // Fourth input register + switch (Inst.getOpcode()) { + case ARM::VST1d8Q: + case ARM::VST1d16Q: + case ARM::VST1d32Q: + case ARM::VST1d64Q: + case ARM::VST1d8Q_UPD: + case ARM::VST1d16Q_UPD: + case ARM::VST1d32Q_UPD: + case ARM::VST1d64Q_UPD: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2q8_UPD: + case ARM::VST2q16_UPD: + case ARM::VST2q32_UPD: + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+6)%32, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; + + align *= (1 << size); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (regs == 2) { + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + return MCDisassembler::Fail; + } + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + align *= 2*size; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(0)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned align = fieldFromInstruction32(Insn, 4, 1); + + if (size == 0x3) { + size = 4; + align = 16; + } else { + if (size == 2) { + size = 1 << size; + align *= 8; + } else { + size = 1 << size; + align *= 4*size; } } -AutoGenedDecoder: - // Calling the auto-generated decoder function. - return decodeInstruction(insn); + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+3*inc)%32, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; } -// Helper function for special case handling of LDR (literal) and friends. -// See, for example, A6.3.7 Load word: Table A6-18 Load word. -// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode -// before returning it. -static unsigned T2Morph2LoadLiteral(unsigned Opcode) { - switch (Opcode) { - default: - return Opcode; // Return unmorphed opcode. +static DecodeStatus +DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned imm = fieldFromInstruction32(Insn, 0, 4); + imm |= fieldFromInstruction32(Insn, 16, 3) << 4; + imm |= fieldFromInstruction32(Insn, 24, 1) << 7; + imm |= fieldFromInstruction32(Insn, 8, 4) << 8; + imm |= fieldFromInstruction32(Insn, 5, 1) << 12; + unsigned Q = fieldFromInstruction32(Insn, 6, 1); + + if (Q) { + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } else { + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } + + Inst.addOperand(MCOperand::CreateImm(imm)); + + switch (Inst.getOpcode()) { + case ARM::VORRiv4i16: + case ARM::VORRiv2i32: + case ARM::VBICiv4i16: + case ARM::VBICiv2i32: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VORRiv8i16: + case ARM::VORRiv4i32: + case ARM::VBICiv8i16: + case ARM::VBICiv4i32: + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + break; + } + + return S; +} + +static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 18, 2); + + if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(8 << size)); + + return S; +} + +static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(8 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDR_POST: case ARM::t2LDR_PRE: - case ARM::t2LDRi12: case ARM::t2LDRi8: - case ARM::t2LDRs: case ARM::t2LDRT: - return ARM::t2LDRpci; +static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(16 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: - case ARM::t2LDRBi12: case ARM::t2LDRBi8: - case ARM::t2LDRBs: case ARM::t2LDRBT: - return ARM::t2LDRBpci; +static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(32 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: - case ARM::t2LDRHi12: case ARM::t2LDRHi8: - case ARM::t2LDRHs: case ARM::t2LDRHT: - return ARM::t2LDRHpci; +static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} - case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: - case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: - case ARM::t2LDRSBs: case ARM::t2LDRSBT: - return ARM::t2LDRSBpci; +static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + Rn |= fieldFromInstruction32(Insn, 7, 1) << 4; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + unsigned op = fieldFromInstruction32(Insn, 6, 1); + unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (op) { + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; // Writeback + } - case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: - case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: - case ARM::t2LDRSHs: case ARM::t2LDRSHT: - return ARM::t2LDRSHpci; + for (unsigned i = 0; i < length; ++i) { + if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) + return MCDisassembler::Fail; } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -// Helper function for special case handling of PLD (literal) and friends. -// See A8.6.117 T1 & T2 and friends for why we morphed the opcode -// before returning it. -static unsigned T2Morph2PLDLiteral(unsigned Opcode) { - switch (Opcode) { - default: - return Opcode; // Return unmorphed opcode. - - case ARM::t2PLDi8: case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWs: - return ARM::t2PLDi12; - - case ARM::t2PLIi8: case ARM::t2PLIs: - return ARM::t2PLIi12; - } -} - -/// decodeThumbSideEffect is a decorator function which can potentially twiddle -/// the instruction or morph the returned opcode under Thumb2. -/// -/// First it checks whether the insn is a NEON or VFP instr; if true, bit -/// twiddling could be performed on insn to turn it into an ARM NEON/VFP -/// equivalent instruction and decodeInstruction is called with the transformed -/// insn. -/// -/// Next, there is special handling for Load byte/halfword/word instruction by -/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded -/// Thumb2 instruction. See comments below for further details. -/// -/// Finally, one last check is made to see whether the insn is a NEON/VFP and -/// decodeInstruction(insn) is invoked on the original insn. -/// -/// Otherwise, decodeThumbInstruction is called with the original insn. -static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { - if (IsThumb2) { - uint16_t op1 = slice(insn, 28, 27); - uint16_t op2 = slice(insn, 26, 20); - - // A6.3 32-bit Thumb instruction encoding - // Table A6-9 32-bit Thumb instruction encoding - - // The coprocessor instructions of interest are transformed to their ARM - // equivalents. - - // --------- Transform Begin Marker --------- - if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) { - // A7.4 Advanced SIMD data-processing instructions - // U bit of Thumb corresponds to Inst{24} of ARM. - uint16_t U = slice(op1, 1, 1); - - // Inst{28-24} of ARM = {1,0,0,1,U}; - uint16_t bits28_24 = 9 << 1 | U; - DEBUG(showBitVector(errs(), insn)); - setSlice(insn, 28, 24, bits28_24); - return decodeInstruction(insn); +static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned dst = fieldFromInstruction16(Insn, 8, 3); + unsigned imm = fieldFromInstruction16(Insn, 0, 8); + + if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder))) + return MCDisassembler::Fail; + + switch(Inst.getOpcode()) { + default: + return MCDisassembler::Fail; + case ARM::tADR: + break; // tADR does not explicitly represent the PC as an operand. + case ARM::tADDrSPi: + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + break; + } + + Inst.addOperand(MCOperand::CreateImm(imm)); + return S; +} + +static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 0, 3); + unsigned Rm = fieldFromInstruction32(Val, 3, 3); + + if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodetGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 0, 3); + unsigned imm = fieldFromInstruction32(Val, 3, 5); + + if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + unsigned imm = Val << 2; + + Inst.addOperand(MCOperand::CreateImm(imm)); + tryAddingPcLoadReferenceComment(Address, (Address & ~2u) + imm + 4, Decoder); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateImm(Val)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 6, 4); + unsigned Rm = fieldFromInstruction32(Val, 2, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 2); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + switch (Inst.getOpcode()) { + case ARM::t2PLDs: + case ARM::t2PLDWs: + case ARM::t2PLIs: + break; + default: { + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; } + } - if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) { - // A7.7 Advanced SIMD element or structure load/store instructions - // Inst{27-24} of Thumb = 0b1001 - // Inst{27-24} of ARM = 0b0100 - DEBUG(showBitVector(errs(), insn)); - setSlice(insn, 27, 24, 4); - return decodeInstruction(insn); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + if (Rn == 0xF) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBs: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHs: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHs: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2PLDs: + Inst.setOpcode(ARM::t2PLDi12); + Inst.addOperand(MCOperand::CreateReg(ARM::PC)); + break; + default: + return MCDisassembler::Fail; } - // --------- Transform End Marker --------- - - unsigned unmorphed = decodeThumbInstruction(insn); - - // See, for example, A6.3.7 Load word: Table A6-18 Load word. - // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode - // before returning it to our caller. - if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 - && slice(insn, 19, 16) == 15) { - unsigned morphed = T2Morph2LoadLiteral(unmorphed); - if (morphed != unmorphed) - return morphed; + + int imm = fieldFromInstruction32(Insn, 0, 12); + if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; + } + + unsigned addrmode = fieldFromInstruction32(Insn, 4, 2); + addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6; + if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + int imm = Val & 0xFF; + if (!(Val & 0x100)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm << 2)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 9); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2Imm8S4(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 8, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + +static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + int imm = Val & 0xFF; + if (Val == 0) + imm = INT32_MIN; + else if (!(Val & 0x100)) + imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 9, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 9); + + // Some instructions always use an additive offset. + switch (Inst.getOpcode()) { + case ARM::t2LDRT: + case ARM::t2LDRBT: + case ARM::t2LDRHT: + case ARM::t2LDRSBT: + case ARM::t2LDRSHT: + case ARM::t2STRT: + case ARM::t2STRBT: + case ARM::t2STRHT: + imm |= 0x100; + break; + default: + break; + } + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2Imm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + addr |= fieldFromInstruction32(Insn, 9, 1) << 8; + addr |= Rn << 9; + unsigned load = fieldFromInstruction32(Insn, 20, 1); + + if (!load) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + + if (load) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm8(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned imm = fieldFromInstruction32(Val, 0, 12); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(imm)); + + return S; +} + + +static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + unsigned imm = fieldFromInstruction16(Insn, 0, 7); + + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateImm(imm)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (Inst.getOpcode() == ARM::tADDrSP) { + unsigned Rdm = fieldFromInstruction16(Insn, 0, 3); + Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + } else if (Inst.getOpcode() == ARM::tADDspr) { + unsigned Rm = fieldFromInstruction16(Insn, 3, 4); + + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} + +static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, + uint64_t Address, const void *Decoder) { + unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; + unsigned flags = fieldFromInstruction16(Insn, 0, 3); + + Inst.addOperand(MCOperand::CreateImm(imod)); + Inst.addOperand(MCOperand::CreateImm(flags)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned add = fieldFromInstruction32(Insn, 4, 1); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(add)); + + return S; +} + +static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (!tryAddingSymbolicOperand(Address, + (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val == 0xA || Val == 0xB) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + if (Rn == ARM::SP) S = MCDisassembler::SoftFail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus +DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned pred = fieldFromInstruction32(Insn, 22, 4); + if (pred == 0xE || pred == 0xF) { + unsigned opc = fieldFromInstruction32(Insn, 4, 28); + switch (opc) { + default: + return MCDisassembler::Fail; + case 0xf3bf8f4: + Inst.setOpcode(ARM::t2DSB); + break; + case 0xf3bf8f5: + Inst.setOpcode(ARM::t2DMB); + break; + case 0xf3bf8f6: + Inst.setOpcode(ARM::t2ISB); + break; } - // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for - // why we morphed the opcode before returning it to our caller. - if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF - && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1 - && slice(insn, 19, 16) == 15) { - unsigned morphed = T2Morph2PLDLiteral(unmorphed); - if (morphed != unmorphed) - return morphed; + unsigned imm = fieldFromInstruction32(Insn, 0, 4); + return DecodeMemBarrierOption(Inst, imm, Address, Decoder); + } + + unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1; + brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19; + brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18; + brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12; + brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20; + + if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +// Decode a shifted immediate operand. These basically consist +// of an 8-bit value, and a 4-bit directive that specifies either +// a splat operation or a rotation. +static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + unsigned ctrl = fieldFromInstruction32(Val, 10, 2); + if (ctrl == 0) { + unsigned byte = fieldFromInstruction32(Val, 8, 2); + unsigned imm = fieldFromInstruction32(Val, 0, 8); + switch (byte) { + case 0: + Inst.addOperand(MCOperand::CreateImm(imm)); + break; + case 1: + Inst.addOperand(MCOperand::CreateImm((imm << 16) | imm)); + break; + case 2: + Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 8))); + break; + case 3: + Inst.addOperand(MCOperand::CreateImm((imm << 24) | (imm << 16) | + (imm << 8) | imm)); + break; } + } else { + unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80; + unsigned rot = fieldFromInstruction32(Val, 7, 5); + unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31)); + Inst.addOperand(MCOperand::CreateImm(imm)); + } - // One last check for NEON/VFP instructions. - if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) - return decodeInstruction(insn); + return MCDisassembler::Success; +} + +static DecodeStatus +DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder){ + Inst.addOperand(MCOperand::CreateImm(Val << 1)); + return MCDisassembler::Success; +} - // Fall through. +static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder){ + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + switch (Val) { + default: + return MCDisassembler::Fail; + case 0xF: // SY + case 0xE: // ST + case 0xB: // ISH + case 0xA: // ISHST + case 0x7: // NSH + case 0x6: // NSHST + case 0x3: // OSH + case 0x2: // OSHST + break; } - return decodeThumbInstruction(insn); + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; } -// -// Public interface for the disassembler -// +static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (!Val) return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} -bool ARMDisassembler::getInstruction(MCInst &MI, - uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os) const { - // The machine instruction. - uint32_t insn; - uint8_t bytes[4]; +static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) - return false; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); - // Encoded as a small-endian 32-bit word in the stream. - insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); - - unsigned Opcode = decodeARMInstruction(insn); - ARMFormat Format = ARMFormats[Opcode]; - Size = 4; - - DEBUG({ - errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode) - << " Format=" << stringForARMFormat(Format) << '(' << (int)Format - << ")\n"; - showBitVector(errs(), insn); - }); - - OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); - if (!Builder) - return false; + if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; - Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), - getDisInfoBlock(), getMCContext(), - Address); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; - if (!Builder->Build(MI, insn)) - return false; + return S; +} - return true; + +static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder){ + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + + if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; + if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -bool ThumbDisassembler::getInstruction(MCInst &MI, - uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os) const { - // The Thumb instruction stream is a sequence of halfwords. - - // This represents the first halfword as well as the machine instruction - // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top - // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to - // the top half followed by the second halfword. - unsigned insn = 0; - // Possible second halfword. - uint16_t insn1 = 0; - - // A6.1 Thumb instruction set encoding - // - // If bits [15:11] of the halfword being decoded take any of the following - // values, the halfword is the first halfword of a 32-bit instruction: - // o 0b11101 - // o 0b11110 - // o 0b11111. - // - // Otherwise, the halfword is a 16-bit instruction. - - // Read 2 bytes of data first. - uint8_t bytes[2]; - if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) - return false; +static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} - // Encoded as a small-endian 16-bit halfword in the stream. - insn = (bytes[1] << 8) | bytes[0]; - unsigned bits15_11 = slice(insn, 15, 11); - bool IsThumb2 = false; +static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + if (Rm == 0xF) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} - // 32-bit instructions if the bits [15:11] of the halfword matches - // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }. - if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) { - IsThumb2 = true; - if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1) - return false; - // Encoded as a small-endian 16-bit halfword in the stream. - insn1 = (bytes[1] << 8) | bytes[0]; - insn = (insn << 16 | insn1); - } - - // The insn could potentially be bit-twiddled in order to be decoded as an ARM - // NEON/VFP opcode. In such case, the modified insn is later disassembled as - // an ARM NEON/VFP instruction. - // - // This is a short term solution for lack of encoding bits specified for the - // Thumb2 NEON/VFP instructions. The long term solution could be adding some - // infrastructure to have each instruction support more than one encodings. - // Which encoding is used would be based on which subtarget the compiler/ - // disassembler is working with at the time. This would allow the sharing of - // the NEON patterns between ARM and Thumb2, as well as potential greater - // sharing between the regular ARM instructions and the 32-bit wide Thumb2 - // instructions as well. - unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn); - - ARMFormat Format = ARMFormats[Opcode]; - Size = IsThumb2 ? 4 : 2; - - DEBUG({ - errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) - << " Format=" << stringForARMFormat(Format) << '(' << (int)Format - << ")\n"; - showBitVector(errs(), insn); - }); - - OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); - if (!Builder) - return false; - Builder->SetSession(const_cast<Session *>(&SO)); +static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), - getDisInfoBlock(), getMCContext(), - Address); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); - if (!Builder->Build(MI, insn)) - return false; + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; - return true; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrModeImm12Operand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; } -// A8.6.50 -// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. -static unsigned short CountITSize(unsigned ITMask) { - // First count the trailing zeros of the IT mask. - unsigned TZ = CountTrailingZeros_32(ITMask); - if (TZ > 3) { - DEBUG(errs() << "Encoding error: IT Mask '0000'"); - return 0; +static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned imm = fieldFromInstruction32(Insn, 0, 12); + imm |= fieldFromInstruction32(Insn, 16, 4) << 13; + imm |= fieldFromInstruction32(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSORegMemOperand(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 6, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 2) != 0) + align = 4; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } - return (4 - TZ); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -/// Init ITState. Note that at least one bit is always 1 in mask. -bool Session::InitIT(unsigned short bits7_0) { - ITCounter = CountITSize(slice(bits7_0, 3, 0)); - if (ITCounter == 0) - return false; +static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; - // A8.6.50 IT - unsigned short FirstCond = slice(bits7_0, 7, 4); - if (FirstCond == 0xF) { - DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); - return false; + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 6, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 2) != 0) + align = 4; } - if (FirstCond == 0xE && ITCounter != 1) { - DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); - return false; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } - ITState = bits7_0; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); - return true; + return S; } -/// Update ITState if necessary. -void Session::UpdateIT() { - assert(ITCounter); - --ITCounter; - if (ITCounter == 0) - ITState = 0; - else { - unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1; - setSlice(ITState, 4, 0, NewITState4_0); + +static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + index = fieldFromInstruction32(Insn, 5, 3); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 1: + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 1) != 0) + align = 8; + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -static MCDisassembler *createARMDisassembler(const Target &T) { - return new ARMDisassembler; +static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + index = fieldFromInstruction32(Insn, 5, 3); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 2; + break; + case 1: + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 5, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 4, 1) != 0) + align = 8; + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -static MCDisassembler *createThumbDisassembler(const Target &T) { - return new ThumbDisassembler; + +static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -extern "C" void LLVMInitializeARMDisassembler() { - // Register the disassembler. - TargetRegistry::RegisterMCDisassembler(TheARMTarget, - createARMDisassembler); - TargetRegistry::RegisterMCDisassembler(TheThumbTarget, - createThumbDisassembler); +static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + return MCDisassembler::Fail; // UNDEFINED + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -EDInstInfo *ARMDisassembler::getEDInfo() const { - return instInfoARM; + +static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 8; + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + align = 4 << fieldFromInstruction32(Insn, 4, 2); + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; } -EDInstInfo *ThumbDisassembler::getEDInfo() const { - return instInfoARM; +static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction32(Insn, 12, 4); + Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction32(Insn, 10, 2); + + unsigned align = 0; + unsigned index = 0; + unsigned inc = 1; + switch (size) { + default: + return MCDisassembler::Fail; + case 0: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 4; + index = fieldFromInstruction32(Insn, 5, 3); + break; + case 1: + if (fieldFromInstruction32(Insn, 4, 1)) + align = 8; + index = fieldFromInstruction32(Insn, 6, 2); + if (fieldFromInstruction32(Insn, 5, 1)) + inc = 2; + break; + case 2: + if (fieldFromInstruction32(Insn, 4, 2)) + align = 4 << fieldFromInstruction32(Insn, 4, 2); + index = fieldFromInstruction32(Insn, 7, 1); + if (fieldFromInstruction32(Insn, 6, 1)) + inc = 2; + break; + } + + if (Rm != 0xF) { // Writeback + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(align)); + if (Rm != 0xF) { + if (Rm != 0xD) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } else + Inst.addOperand(MCOperand::CreateReg(0)); + } + + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+2*inc, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd+3*inc, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(index)); + + return S; +} + +static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + + if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); + unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + + if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2 , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm , Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeSPRRegisterClass(Inst, Rm+1, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned pred = fieldFromInstruction16(Insn, 4, 4); + // The InstPrinter needs to have the low bit of the predicate in + // the mask operand to be able to print it properly. + unsigned mask = fieldFromInstruction16(Insn, 0, 5); + + if (pred == 0xF) { + pred = 0xE; + S = MCDisassembler::SoftFail; + } + + if ((mask & 0xF) == 0) { + // Preserve the high bit of the mask, which is the low bit of + // the predicate. + mask &= 0x10; + mask |= 0x8; + S = MCDisassembler::SoftFail; + } + + Inst.addOperand(MCOperand::CreateImm(pred)); + Inst.addOperand(MCOperand::CreateImm(mask)); + return S; } + +static DecodeStatus +DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + bool writeback = (W == 1) | (P == 0); + + addr |= (U << 8) | (Rn << 9); + + if (writeback && (Rn == Rt || Rn == Rt2)) + Check(S, MCDisassembler::SoftFail); + if (Rt == Rt2) + Check(S, MCDisassembler::SoftFail); + + // Rt + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + // Rt2 + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + // Writeback operand + if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + // addr + if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus +DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned addr = fieldFromInstruction32(Insn, 0, 8); + unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned P = fieldFromInstruction32(Insn, 24, 1); + bool writeback = (W == 1) | (P == 0); + + addr |= (U << 8) | (Rn << 9); + + if (writeback && (Rn == Rt || Rn == Rt2)) + Check(S, MCDisassembler::SoftFail); + + // Writeback operand + if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + // Rt + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + // Rt2 + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + // addr + if (!Check(S, DecodeT2AddrModeImm8s4(Inst, addr, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, + uint64_t Address, const void *Decoder) { + unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); + if (sign1 != sign2) return MCDisassembler::Fail; + + unsigned Val = fieldFromInstruction32(Insn, 0, 8); + Val |= fieldFromInstruction32(Insn, 12, 3) << 8; + Val |= fieldFromInstruction32(Insn, 26, 1) << 11; + Val |= sign1 << 12; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val))); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, + uint64_t Address, + const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + // Shift of "asr #32" is not allowed in Thumb2 mode. + if (Val == 0x20) S = MCDisassembler::SoftFail; + Inst.addOperand(MCOperand::CreateImm(Val)); + return S; +} + diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h deleted file mode 100644 index 0a74a38..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains the header for ARMDisassembler and ThumbDisassembler, both are -// subclasses of MCDisassembler. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMDISASSEMBLER_H -#define ARMDISASSEMBLER_H - -#include "llvm/MC/MCDisassembler.h" - -namespace llvm { - -class MCInst; -class MemoryObject; -class raw_ostream; - -struct EDInstInfo; - -/// ARMDisassembler - ARM disassembler for all ARM platforms. -class ARMDisassembler : public MCDisassembler { -public: - /// Constructor - Initializes the disassembler. - /// - ARMDisassembler() : - MCDisassembler() { - } - - ~ARMDisassembler() { - } - - /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; - - /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; -private: -}; - -// Forward declaration. -class ARMBasicMCBuilder; - -/// Session - Keep track of the IT Block progression. -class Session { - friend class ARMBasicMCBuilder; -public: - Session() : ITCounter(0), ITState(0) {} - ~Session() {} - /// InitIT - Initializes ITCounter/ITState. - bool InitIT(unsigned short bits7_0); - /// UpdateIT - Updates ITCounter/ITState as IT Block progresses. - void UpdateIT(); - -private: - unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4. - unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially. -}; - -/// ThumbDisassembler - Thumb disassembler for all ARM platforms. -class ThumbDisassembler : public MCDisassembler { -public: - /// Constructor - Initializes the disassembler. - /// - ThumbDisassembler() : - MCDisassembler(), SO() { - } - - ~ThumbDisassembler() { - } - - /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; - - /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; -private: - Session SO; -}; - -} // namespace llvm - -#endif diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp deleted file mode 100644 index d89c80a..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ /dev/null @@ -1,3818 +0,0 @@ -//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code to represent the core concepts of Builder and DisassembleFP -// to solve the problem of disassembling an ARM instr. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-disassembler" - -#include "ARMDisassemblerCore.h" -#include "ARMAddressingModes.h" -#include "ARMMCExpr.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -//#define DEBUG(X) do { X; } while (0) - -/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const -/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the -/// operand info for each ARMInsts[i]. -/// -/// Together with an instruction's encoding format, we can take advantage of the -/// NumOperands and the OpInfo fields of the target instruction description in -/// the quest to build out the MCOperand list for an MCInst. -/// -/// The general guideline is that with a known format, the number of dst and src -/// operands are well-known. The dst is built first, followed by the src -/// operand(s). The operands not yet used at this point are for the Implicit -/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is -/// defined with two components: -/// -/// def pred { // Operand PredicateOperand -/// ValueType Type = OtherVT; -/// string PrintMethod = "printPredicateOperand"; -/// string AsmOperandLowerMethod = ?; -/// dag MIOperandInfo = (ops i32imm, CCR); -/// AsmOperandClass ParserMatchClass = ImmAsmOperand; -/// dag DefaultOps = (ops (i32 14), (i32 zero_reg)); -/// } -/// -/// which is manifested by the MCOperandInfo[] of: -/// -/// { 0, 0|(1<<MCOI::Predicate), 0 }, -/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 } -/// -/// So the first predicate MCOperand corresponds to the immediate part of the -/// ARM condition field (Inst{31-28}), and the second predicate MCOperand -/// corresponds to a register kind of ARM::CPSR. -/// -/// For the Defs part, in the simple case of only cc_out:$s, we have: -/// -/// def cc_out { // Operand OptionalDefOperand -/// ValueType Type = OtherVT; -/// string PrintMethod = "printSBitModifierOperand"; -/// string AsmOperandLowerMethod = ?; -/// dag MIOperandInfo = (ops CCR); -/// AsmOperandClass ParserMatchClass = ImmAsmOperand; -/// dag DefaultOps = (ops (i32 zero_reg)); -/// } -/// -/// which is manifested by the one MCOperandInfo of: -/// -/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 } -/// - -namespace llvm { -extern MCInstrDesc ARMInsts[]; -} - -using namespace llvm; - -const char *ARMUtils::OpcodeName(unsigned Opcode) { - return ARMInsts[Opcode].Name; -} - -// Return the register enum Based on RegClass and the raw register number. -// FIXME: Auto-gened? -static unsigned -getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { - if (RegClassID == ARM::rGPRRegClassID) { - // Check for The register numbers 13 and 15 that are not permitted for many - // Thumb register specifiers. - if (RawRegister == 13 || RawRegister == 15) { - B->SetErr(-1); - return 0; - } - // For this purpose, we can treat rGPR as if it were GPR. - RegClassID = ARM::GPRRegClassID; - } - - // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). - // A7.3 register encoding - // Qd -> bit[12] == 0 - // Qn -> bit[16] == 0 - // Qm -> bit[0] == 0 - // - // If one of these bits is 1, the instruction is UNDEFINED. - if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) { - B->SetErr(-1); - return 0; - } - unsigned RegNum = - RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; - - switch (RegNum) { - default: - break; - case 0: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D0; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q0; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0; - } - break; - case 1: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D1; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q1; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1; - } - break; - case 2: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D2; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q2; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2; - } - break; - case 3: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D3; - case ARM::QPRRegClassID: case ARM::QPR_8RegClassID: - case ARM::QPR_VFP2RegClassID: - return ARM::Q3; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3; - } - break; - case 4: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D4; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4; - } - break; - case 5: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D5; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5; - } - break; - case 6: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D6; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6; - } - break; - case 7: - switch (RegClassID) { - case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7; - case ARM::DPRRegClassID: case ARM::DPR_8RegClassID: - case ARM::DPR_VFP2RegClassID: - return ARM::D7; - case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7; - } - break; - case 8: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R8; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8; - case ARM::QPRRegClassID: return ARM::Q8; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8; - } - break; - case 9: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R9; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9; - case ARM::QPRRegClassID: return ARM::Q9; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9; - } - break; - case 10: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R10; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10; - case ARM::QPRRegClassID: return ARM::Q10; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10; - } - break; - case 11: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R11; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11; - case ARM::QPRRegClassID: return ARM::Q11; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11; - } - break; - case 12: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::R12; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12; - case ARM::QPRRegClassID: return ARM::Q12; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12; - } - break; - case 13: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::SP; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13; - case ARM::QPRRegClassID: return ARM::Q13; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13; - } - break; - case 14: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::LR; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14; - case ARM::QPRRegClassID: return ARM::Q14; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14; - } - break; - case 15: - switch (RegClassID) { - case ARM::GPRRegClassID: return ARM::PC; - case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15; - case ARM::QPRRegClassID: return ARM::Q15; - case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15; - } - break; - case 16: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D16; - case ARM::SPRRegClassID: return ARM::S16; - } - break; - case 17: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D17; - case ARM::SPRRegClassID: return ARM::S17; - } - break; - case 18: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D18; - case ARM::SPRRegClassID: return ARM::S18; - } - break; - case 19: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D19; - case ARM::SPRRegClassID: return ARM::S19; - } - break; - case 20: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D20; - case ARM::SPRRegClassID: return ARM::S20; - } - break; - case 21: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D21; - case ARM::SPRRegClassID: return ARM::S21; - } - break; - case 22: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D22; - case ARM::SPRRegClassID: return ARM::S22; - } - break; - case 23: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D23; - case ARM::SPRRegClassID: return ARM::S23; - } - break; - case 24: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D24; - case ARM::SPRRegClassID: return ARM::S24; - } - break; - case 25: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D25; - case ARM::SPRRegClassID: return ARM::S25; - } - break; - case 26: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D26; - case ARM::SPRRegClassID: return ARM::S26; - } - break; - case 27: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D27; - case ARM::SPRRegClassID: return ARM::S27; - } - break; - case 28: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D28; - case ARM::SPRRegClassID: return ARM::S28; - } - break; - case 29: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D29; - case ARM::SPRRegClassID: return ARM::S29; - } - break; - case 30: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D30; - case ARM::SPRRegClassID: return ARM::S30; - } - break; - case 31: - switch (RegClassID) { - case ARM::DPRRegClassID: return ARM::D31; - case ARM::SPRRegClassID: return ARM::S31; - } - break; - } - DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n"); - // Encoding error. Mark the builder with error code != 0. - B->SetErr(-1); - return 0; -} - -/////////////////////////////// -// // -// Utility Functions // -// // -/////////////////////////////// - -// Extract/Decode Rd: Inst{15-12}. -static inline unsigned decodeRd(uint32_t insn) { - return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask; -} - -// Extract/Decode Rn: Inst{19-16}. -static inline unsigned decodeRn(uint32_t insn) { - return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask; -} - -// Extract/Decode Rm: Inst{3-0}. -static inline unsigned decodeRm(uint32_t insn) { - return (insn & ARMII::GPRRegMask); -} - -// Extract/Decode Rs: Inst{11-8}. -static inline unsigned decodeRs(uint32_t insn) { - return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask; -} - -static inline unsigned getCondField(uint32_t insn) { - return (insn >> ARMII::CondShift); -} - -static inline unsigned getIBit(uint32_t insn) { - return (insn >> ARMII::I_BitShift) & 1; -} - -static inline unsigned getAM3IBit(uint32_t insn) { - return (insn >> ARMII::AM3_I_BitShift) & 1; -} - -static inline unsigned getPBit(uint32_t insn) { - return (insn >> ARMII::P_BitShift) & 1; -} - -static inline unsigned getUBit(uint32_t insn) { - return (insn >> ARMII::U_BitShift) & 1; -} - -static inline unsigned getPUBits(uint32_t insn) { - return (insn >> ARMII::U_BitShift) & 3; -} - -static inline unsigned getSBit(uint32_t insn) { - return (insn >> ARMII::S_BitShift) & 1; -} - -static inline unsigned getWBit(uint32_t insn) { - return (insn >> ARMII::W_BitShift) & 1; -} - -static inline unsigned getDBit(uint32_t insn) { - return (insn >> ARMII::D_BitShift) & 1; -} - -static inline unsigned getNBit(uint32_t insn) { - return (insn >> ARMII::N_BitShift) & 1; -} - -static inline unsigned getMBit(uint32_t insn) { - return (insn >> ARMII::M_BitShift) & 1; -} - -// See A8.4 Shifts applied to a register. -// A8.4.2 Register controlled shifts. -// -// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits -// into llvm enums for shift opcode. The API clients should pass in the value -// encoded with two bits, so the assert stays to signal a wrong API usage. -// -// A8-12: DecodeRegShift() -static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) { - switch (bits) { - default: assert(0 && "No such value"); return ARM_AM::no_shift; - case 0: return ARM_AM::lsl; - case 1: return ARM_AM::lsr; - case 2: return ARM_AM::asr; - case 3: return ARM_AM::ror; - } -} - -// See A8.4 Shifts applied to a register. -// A8.4.1 Constant shifts. -// -// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5 -// encodings into the intended ShiftOpc and shift amount. -// -// A8-11: DecodeImmShift() -static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) { - if (ShImm != 0) - return; - switch (ShOp) { - case ARM_AM::no_shift: - case ARM_AM::rrx: - break; - case ARM_AM::lsl: - ShOp = ARM_AM::no_shift; - break; - case ARM_AM::lsr: - case ARM_AM::asr: - ShImm = 32; - break; - case ARM_AM::ror: - ShOp = ARM_AM::rrx; - break; - } -} - -// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding -// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode. The API -// clients should pass in the value encoded with two bits, so the assert stays -// to signal a wrong API usage. -static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { - switch (bits) { - default: assert(0 && "No such value"); return ARM_AM::bad_am_submode; - case 1: return ARM_AM::ia; // P=0 U=1 - case 3: return ARM_AM::ib; // P=1 U=1 - case 0: return ARM_AM::da; // P=0 U=0 - case 2: return ARM_AM::db; // P=1 U=0 - } -} - -//////////////////////////////////////////// -// // -// Disassemble function definitions // -// // -//////////////////////////////////////////// - -/// There is a separate Disassemble*Frm function entry for disassembly of an ARM -/// instr into a list of MCOperands in the appropriate order, with possible dst, -/// followed by possible src(s). -/// -/// The processing of the predicate, and the 'S' modifier bit, if MI modifies -/// the CPSR, is factored into ARMBasicMCBuilder's method named -/// TryPredicateAndSBitModifier. - -static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - assert(0 && "Unexpected pseudo instruction!"); - return false; -} - -// A8.6.94 MLA -// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE; -// -// A8.6.105 MUL -// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; -// -// A8.6.246 UMULL -// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE; -// if dHi == dLo then UNPREDICTABLE; -static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { - unsigned R19_16 = slice(insn, 19, 16); - unsigned R15_12 = slice(insn, 15, 12); - unsigned R11_8 = slice(insn, 11, 8); - unsigned R3_0 = slice(insn, 3, 0); - switch (Opcode) { - default: - // Did we miss an opcode? - DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!"); - return false; - case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT: - case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT: - case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR: - case ARM::USADA8: - if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - return false; - case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR: - case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT: - case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX: - // A8.6.167 SMLAD & A8.6.172 SMLSD - case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX: - case ARM::USAD8: - if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - return false; - case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL: - case ARM::UMULL: - case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT: - case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX: - if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) - return true; - if (R19_16 == R15_12) - return true; - return false;; - } -} - -// Multiply Instructions. -// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR, -// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience): -// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} -// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is -// only for {d, n, m}. -// -// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD, -// SMUADX, and USAD8 (for convenience): -// Rd{19-16} Rn{3-0} Rm{11-8} -// -// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, -// SMLALD, SMLADLX, SMLSLD, SMLSLDX: -// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8} -// -// The mapping of the multiply registers to the "regular" ARM registers, where -// there are convenience decoder functions, is: -// -// Inst{15-12} => Rd -// Inst{19-16} => Rn -// Inst{3-0} => Rm -// Inst{11-8} => Rs -static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm"); - assert(NumOps >= 3 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == ARM::GPRRegClassID - && "Expect three register operands"); - - // Sanity check for the register encodings. - if (BadRegsMulFrm(Opcode, insn)) - return false; - - // Instructions with two destination registers have RdLo{15-12} first. - if (NumDefs == 2) { - assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && - "Expect 4th register operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // The destination register: RdHi{19-16} or Rd{19-16}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - // The two src regsiters: Rn{3-0}, then Rm{11-8}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - OpIdx += 3; - - // Many multiply instructions (e.g., MLA) have three src registers. - // The third register operand is Ra{15-12}. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - return true; -} - -// Helper routines for disassembly of coprocessor instructions. - -static bool LdStCopOpcode(unsigned Opcode) { - if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) || - (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE)) - return true; - return false; -} -static bool CoprocessorOpcode(unsigned Opcode) { - if (LdStCopOpcode(Opcode)) - return true; - - switch (Opcode) { - default: - return false; - case ARM::CDP: case ARM::CDP2: - case ARM::MCR: case ARM::MCR2: case ARM::MRC: case ARM::MRC2: - case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2: - return true; - } -} -static inline unsigned GetCoprocessor(uint32_t insn) { - return slice(insn, 11, 8); -} -static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) { - return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21); -} -static inline unsigned GetCopOpc2(uint32_t insn) { - return slice(insn, 7, 5); -} -static inline unsigned GetCopOpc(uint32_t insn) { - return slice(insn, 7, 4); -} -// Most of the operands are in immediate forms, except Rd and Rn, which are ARM -// core registers. -// -// CDP, CDP2: cop opc1 CRd CRn CRm opc2 -// -// MCR, MCR2, MRC, MRC2: cop opc1 Rd CRn CRm opc2 -// -// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm -// -// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00 -// and friends -// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00 -// and friends -// <-- addrmode2 --> -// -// LDC_OPTION: cop CRd Rn imm8 -// and friends -// STC_OPTION: cop CRd Rn imm8 -// and friends -// -static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr"); - - unsigned &OpIdx = NumOpsAdded; - // A8.6.92 - // if coproc == '101x' then SEE "Advanced SIMD and VFP" - // But since the special instructions have more explicit encoding bits - // specified, if coproc == 10 or 11, we should reject it as invalid. - unsigned coproc = GetCoprocessor(insn); - if ((Opcode == ARM::MCR || Opcode == ARM::MCRR || - Opcode == ARM::MRC || Opcode == ARM::MRRC) && - (coproc == 10 || coproc == 11)) { - DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n"); - return false; - } - - bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 || - Opcode == ARM::MRRC || Opcode == ARM::MRRC2); - - // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}). - bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2); - bool LdStCop = LdStCopOpcode(Opcode); - bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2); - - OpIdx = 0; - - if (RtOut) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - MI.addOperand(MCOperand::CreateImm(coproc)); - ++OpIdx; - - if (LdStCop) { - // Unindex if P:W = 0b00 --> _OPTION variant - unsigned PW = getPBit(insn) << 1 | getWBit(insn); - - MI.addOperand(MCOperand::CreateImm(decodeRd(insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx += 2; - - if (PW) { - MI.addOperand(MCOperand::CreateReg(0)); - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, - ARM_AM::no_shift, IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0))); - ++OpIdx; - } - } else { - MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn) - : GetCopOpc1(insn, NoGPR))); - ++OpIdx; - - if (!RtOut) { - MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) - : MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - MI.addOperand(OneCopOpc ? MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn))) - : MCOperand::CreateImm(decodeRn(insn))); - - MI.addOperand(MCOperand::CreateImm(decodeRm(insn))); - - OpIdx += 2; - - if (!OneCopOpc) { - MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn))); - ++OpIdx; - } - } - - return true; -} - -// Branch Instructions. -// BL: SignExtend(Imm24:'00', 32) -// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 -// SMC: ZeroExtend(imm4, 32) -// SVC: ZeroExtend(Imm24, 32) -// -// Various coprocessor instructions are assigned BrFrm arbitrarily. -// Delegates to DisassembleCoprocessor() helper function. -// -// MRS/MRSsys: Rd -// MSR/MSRsys: Rm mask=Inst{19-16} -// BXJ: Rm -// MSRi/MSRsysi: so_imm -// SRSW/SRS: ldstm_mode:$amode mode_imm -// RFEW/RFE: ldstm_mode:$amode Rn -static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (CoprocessorOpcode(Opcode)) - return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - // MRS and MRSsys take one GPR reg Rd. - if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) { - assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - NumOpsAdded = 1; - return true; - } - // BXJ takes one GPR reg Rm. - if (Opcode == ARM::BXJ) { - assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 1; - return true; - } - // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in - // bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::MSR) { - assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | - slice(insn, 19, 16) /* Special Reg */ )); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 2; - return true; - } - // MSRi take a mask, followed by one so_imm operand. The mask contains the - // R Bit in bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::MSRi) { - // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate) - // The hints instructions have more specific encodings, so if mask == 0, - // we should reject this as an invalid instruction. - if (slice(insn, 19, 16) == 0) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | - slice(insn, 19, 16) /* Special Reg */ )); - // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. - // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}. - // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot(). - unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF; - unsigned Imm = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot))); - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::SRSW || Opcode == ARM::SRS || - Opcode == ARM::RFEW || Opcode == ARM::RFE) { - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - - if (Opcode == ARM::SRSW || Opcode == ARM::SRS) - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 3; - return true; - } - - assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred - || Opcode == ARM::SMC || Opcode == ARM::SVC) && - "Unexpected Opcode"); - - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); - - int Imm32 = 0; - if (Opcode == ARM::SMC) { - // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}. - Imm32 = slice(insn, 3, 0); - } else if (Opcode == ARM::SVC) { - // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}. - Imm32 = slice(insn, 23, 0); - } else { - // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}. - unsigned Imm26 = slice(insn, 23, 0) << 2; - //Imm32 = signextend<signed int, 26>(Imm26); - Imm32 = SignExtend32<26>(Imm26); - } - - MI.addOperand(MCOperand::CreateImm(Imm32)); - NumOpsAdded = 1; - - return true; -} - -// Misc. Branch Instructions. -// BX_RET, MOVPCLR -// BLX, BLX_pred, BX, BX_pred -// BLXi -static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // BX_RET and MOVPCLR have only two predicate operands; do an early return. - if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) - return true; - - // BLX and BX take one GPR reg. - if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred || - Opcode == ARM::BX || Opcode == ARM::BX_pred) { - assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - OpIdx = 1; - return true; - } - - // BLXi takes imm32 (the PC offset). - if (Opcode == ARM::BLXi) { - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); - // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}. - unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1; - int Imm32 = SignExtend32<26>(Imm26); - MI.addOperand(MCOperand::CreateImm(Imm32)); - OpIdx = 1; - return true; - } - - return false; -} - -static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { - uint32_t lsb = slice(insn, 11, 7); - uint32_t msb = slice(insn, 20, 16); - uint32_t Val = 0; - if (msb < lsb) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - - for (uint32_t i = lsb; i <= msb; ++i) - Val |= (1 << i); - mask = ~Val; - return true; -} - -// Standard data-processing instructions allow PC as a register specifier, -// but we should reject other DPFrm instructions with PC as registers. -static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - default: - // Did we miss an opcode? - if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) { - DEBUG(errs() << "DPFrm with bad reg specifier(s)\n"); - return true; - } - case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr: - case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr: - case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr: - case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr: - return false; - } -} - -// A major complication is the fact that some of the saturating add/subtract -// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. -// They are QADD, QDADD, QDSUB, and QSUB. -static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - bool isUnary = isUnaryDP(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Disassemble register def if there is one. - if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // Now disassemble the src operands. - if (OpIdx >= NumOps) - return false; - - // Special-case handling of BFC/BFI/SBFX/UBFX. - if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // A8.6.17 BFC & A8.6.18 BFI - // Sanity check Rd. - if (decodeRd(insn) == 15) - return false; - MI.addOperand(MCOperand::CreateReg(0)); - if (Opcode == ARM::BFI) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - } - uint32_t mask = 0; - if (!getBFCInvMask(insn, mask)) - return false; - - MI.addOperand(MCOperand::CreateImm(mask)); - OpIdx += 2; - return true; - } - if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { - // Sanity check Rd and Rm. - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1)); - OpIdx += 3; - return true; - } - - bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD || - Opcode == ARM::QDSUB || Opcode == ARM::QSUB); - - // BinaryDP has an Rn operand. - if (!isUnary) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - RmRn ? decodeRm(insn) : decodeRn(insn)))); - ++OpIdx; - } - - // If this is a two-address operand, skip it, e.g., MOVCCr operand 1. - if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Now disassemble operand 2. - if (OpIdx >= NumOps) - return false; - - if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - // We have a reg/reg form. - // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are - // routed here as well. - // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); - if (BadRegsDPFrm(Opcode, insn)) - return false; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - RmRn? decodeRn(insn) : decodeRm(insn)))); - ++OpIdx; - } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { - // These two instructions don't allow d as 15. - if (decodeRd(insn) == 15) - return false; - // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). - assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); - unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) - MI.addOperand(MCOperand::CreateImm(Imm16)); - ++OpIdx; - } else { - // We have a reg/imm form. - // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. - // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}. - // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot(). - assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); - unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF; - unsigned Imm = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot))); - ++OpIdx; - } - - return true; -} - -static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - unsigned short NumDefs = MCID.getNumDefs(); - bool isUnary = isUnaryDP(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Disassemble register def if there is one. - if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - } - - // Disassemble the src operands. - if (OpIdx >= NumOps) - return false; - - // BinaryDP has an Rn operand. - if (!isUnary) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // If this is a two-address operand, skip it, e.g., MOVCCs operand 1. - if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Disassemble operand 2, which consists of three components. - if (OpIdx + 2 >= NumOps) - return false; - - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+2].RegClass < 0) && - "Expect 3 reg operands"); - - // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. - unsigned Rs = slice(insn, 4, 4); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - if (Rs) { - // If Inst{7} != 0, we should reject this insn as an invalid encoding. - if (slice(insn, 7, 7)) - return false; - - // A8.6.3 ADC (register-shifted register) - // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE; - // - // This also accounts for shift instructions (register) where, fortunately, - // Inst{19-16} = 0b0000. - // A8.6.89 LSL (register) - // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || - decodeRm(insn) == 15 || decodeRs(insn) == 15) - return false; - - // Register-controlled shifts: [Rm, Rs, shift]. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0))); - } else { - // Constant shifts: [Rm, reg0, shift_imm]. - MI.addOperand(MCOperand::CreateReg(0)); // NoRegister - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm))); - } - OpIdx += 3; - - return true; -} - -static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack, - bool Imm) { - const StringRef Name = ARMInsts[Opcode].Name; - unsigned Rt = decodeRd(insn); - unsigned Rn = decodeRn(insn); - unsigned Rm = decodeRm(insn); - unsigned P = getPBit(insn); - unsigned W = getWBit(insn); - - if (Store) { - // Only STR (immediate, register) allows PC as the source. - if (Name.startswith("STRB") && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - if (WBack && (Rn == 15 || Rn == Rt)) { - DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); - return true; - } - if (!Imm && Rm == 15) { - DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - // Only LDR (immediate, register) allows PC as the destination. - if (Name.startswith("LDRB") && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - if (Imm) { - // Immediate - if (Rn == 15) { - // The literal form must be in offset mode; it's an encoding error - // otherwise. - if (!(P == 1 && W == 0)) { - DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n"); - return true; - } - // LDRB (literal) does not allow PC as the destination. - if (Opcode != ARM::LDRi12 && Rt == 15) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - // Write back while Rn == Rt does not make sense. - if (WBack && (Rn == Rt)) { - DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); - return true; - } - } - } else { - // Register - if (Rm == 15) { - DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); - return true; - } - if (WBack && (Rn == 15 || Rn == Rt)) { - DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); - return true; - } - } - } - return false; -} - -static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(((!isStore && MCID.getNumDefs() > 0) || - (isStore && (MCID.getNumDefs() == 0 || isPrePost))) - && "Invalid arguments"); - - // Operand 0 of a pre- and post-indexed store is the address base writeback. - if (isPrePost && isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the dst/src operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // After dst of a pre- and post-indexed load is the address base writeback. - if (isPrePost && !isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the base operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) - && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - // For reg/reg form, base reg is followed by +/- reg shop imm. - // For immediate form, it is followed by +/- imm12. - // See also ARMAddressingModes.h (Addressing Mode #2). - if (OpIdx + 1 >= NumOps) - return false; - - if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0)) - return false; - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - if (getIBit(insn) == 0) { - // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). - // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already - // been populated. - if (isPrePost) { - MI.addOperand(MCOperand::CreateReg(0)); - OpIdx += 1; - } - - unsigned Imm12 = slice(insn, 11, 0); - if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 || - Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) { - // Disassemble the 12-bit immediate offset, which is the second operand in - // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm). - int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12; - MI.addOperand(MCOperand::CreateImm(Offset)); - } else { - // Disassemble the 12-bit immediate offset, which is the second operand in - // $am2offset => (ops GPR, i32imm). - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift, - IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } - OpIdx += 1; - } else { - // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid. - if (slice(insn,4,4) == 1) - return false; - - // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode))); - OpIdx += 2; - } - - return true; -} - -static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B); -} - -static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); -} - -static bool HasDualReg(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST: - return true; - } -} - -static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(MCID.TSFlags); - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(((!isStore && MCID.getNumDefs() > 0) || - (isStore && (MCID.getNumDefs() == 0 || isPrePost))) - && "Invalid arguments"); - - // Operand 0 of a pre- and post-indexed store is the address base writeback. - if (isPrePost && isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the dst/src operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // Fill in LDRD and STRD's second operand Rt operand. - if (HasDualReg(Opcode)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn) + 1))); - ++OpIdx; - } - - // After dst of a pre- and post-indexed load is the address base writeback. - if (isPrePost && !isStore) { - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - // Disassemble the base operand. - if (OpIdx >= NumOps) - return false; - - assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) - && "Offset mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - // For reg/reg form, base reg is followed by +/- reg. - // For immediate form, it is followed by +/- imm8. - // See also ARMAddressingModes.h (Addressing Mode #3). - if (OpIdx + 1 >= NumOps) - return false; - - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned IndexMode = - (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; - if (getAM3IBit(insn) == 1) { - MI.addOperand(MCOperand::CreateReg(0)); - - // Disassemble the 8-bit immediate offset. - unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF; - unsigned Imm4L = insn & 0xF; - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L, - IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } else { - // Disassemble the offset reg (Rm). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode); - MI.addOperand(MCOperand::CreateImm(Offset)); - } - OpIdx += 2; - - return true; -} - -static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, - B); -} - -static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); -} - -// The algorithm for disassembly of LdStMulFrm is different from others because -// it explicitly populates the two predicate operands after the base register. -// After that, we need to populate the reglist with each affected register -// encoded as an MCOperand. -static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4"); - NumOpsAdded = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base, if necessary. - if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD || - Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD || - Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD || - Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - } - - // Add the base register operand. - MI.addOperand(MCOperand::CreateReg(Base)); - - // Handling the two predicate operands before the reglist. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondVal)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - - NumOpsAdded += 3; - - // Fill the variadic part of reglist. - unsigned RegListBits = insn & ((1 << 16) - 1); - for (unsigned i = 0; i < 16; ++i) { - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++NumOpsAdded; - } - } - - return true; -} - -// LDREX, LDREXB, LDREXH: Rd Rn -// LDREXD: Rd Rd+1 Rn -// STREX, STREXB, STREXH: Rd Rm Rn -// STREXD: Rd Rm Rm+1 Rn -// -// SWP, SWPB: Rd Rm Rn -static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool isStore = slice(insn, 20, 20) == 0; - bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - // Store register Exclusive needs a source operand. - if (isStore) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)+1))); - ++OpIdx; - } - } else if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)+1))); - ++OpIdx; - } - - // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - return true; -} - -// Misc. Arithmetic Instructions. -// CLZ: Rd Rm -// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5 -// RBIT, REV, REV16, REVSH: Rd Rm -static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - - // Sanity check the registers, which should not be 15. - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - if (ThreeReg && decodeRn(insn) == 15) - return false; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - if (ThreeReg) { - assert(NumOps >= 4 && "Expect >= 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - // If there is still an operand info left which is an immediate operand, add - // an additional imm5 LSL/ASR operand. - if (ThreeReg && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Extract the 5-bit immediate field Inst{11-7}. - unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; - ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; - if (Opcode == ARM::PKHBT) - Opc = ARM_AM::lsl; - else if (Opcode == ARM::PKHTB) - Opc = ARM_AM::asr; - getImmShiftSE(Opc, ShiftAmt); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); - ++OpIdx; - } - - return true; -} - -/// DisassembleSatFrm - Disassemble saturate instructions: -/// SSAT, SSAT16, USAT, and USAT16. -static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // A8.6.183 SSAT - // if d == 15 || n == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands - - // Disassemble register def. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - unsigned Pos = slice(insn, 20, 16); - if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16) - Pos += 1; - MI.addOperand(MCOperand::CreateImm(Pos)); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - if (NumOpsAdded == 4) { - ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 11, 7); - if (ShAmt == 0) { - // A8.6.183. Possible ASR shift amount of 32... - if (Opc == ARM_AM::asr) - ShAmt = 32; - else - Opc = ARM_AM::no_shift; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); - } - return true; -} - -// Extend instructions. -// SXT* and UXT*: Rd [Rn] Rm [rot_imm]. -// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the -// three register operand form. Otherwise, Rn=0b1111 and only Rm is used. -static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // A8.6.220 SXTAB - // if d == 15 || m == 15 then UNPREDICTABLE; - if (decodeRd(insn) == 15 || decodeRm(insn) == 15) - return false; - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && "Expect 2 reg operands"); - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - ++OpIdx; - - if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - ++OpIdx; - - // If there is still an operand info left which is an immediate operand, add - // an additional rotate immediate operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Extract the 2-bit rotate field Inst{11-10}. - unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3; - // Rotation by 8, 16, or 24 bits. - MI.addOperand(MCOperand::CreateImm(rot << 3)); - ++OpIdx; - } - - return true; -} - -///////////////////////////////////// -// // -// Utility Functions For VFP // -// // -///////////////////////////////////// - -// Extract/Decode Dd/Sd: -// -// SP => d = UInt(Vd:D) -// DP => d = UInt(D:Vd) -static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn)) - : (decodeRd(insn) | getDBit(insn) << 4); -} - -// Extract/Decode Dn/Sn: -// -// SP => n = UInt(Vn:N) -// DP => n = UInt(N:Vn) -static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn)) - : (decodeRn(insn) | getNBit(insn) << 4); -} - -// Extract/Decode Dm/Sm: -// -// SP => m = UInt(Vm:M) -// DP => m = UInt(M:Vm) -static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) { - return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn)) - : (decodeRm(insn) | getMBit(insn) << 4); -} - -// A7.5.1 -static APInt VFPExpandImm(unsigned char byte, unsigned N) { - assert(N == 32 || N == 64); - - uint64_t Result; - unsigned bit6 = slice(byte, 6, 6); - if (N == 32) { - Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19; - if (bit6) - Result |= 0x1f << 25; - else - Result |= 0x1 << 30; - } else { - Result = (uint64_t)slice(byte, 7, 7) << 63 | - (uint64_t)slice(byte, 5, 0) << 48; - if (bit6) - Result |= 0xffULL << 54; - else - Result |= 0x1ULL << 62; - } - return APInt(N, Result); -} - -// VFP Unary Format Instructions: -// -// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero -// VCVTDS, VCVTSD: converts between double-precision and single-precision -// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers. -static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - bool isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); - ++OpIdx; - - // Early return for compare with zero instructions. - if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS - || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS) - return true; - - RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); - ++OpIdx; - - return true; -} - -// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs. -// Some of them have operand constraints which tie the first operand in the -// InOperandList to that of the dst. As far as asm printing is concerned, this -// tied_to operand is simply skipped. -static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RegClass = OpInfo[OpIdx].RegClass; - assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) && - "Reg operand expected"); - bool isSP = (RegClass == ARM::SPRRegClassID); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); - ++OpIdx; - - // Skip tied_to operand constraint. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - assert(NumOps >= 4 && "Expect >=4 operands"); - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP)))); - ++OpIdx; - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); - ++OpIdx; - - return true; -} - -// A8.6.295 vcvt (floating-point <-> integer) -// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS -// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S -// -// A8.6.297 vcvt (floating-point and fixed-point) -// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) -static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 - bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297 - unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - if (fixed_point) { - // A8.6.297 - assert(NumOps >= 3 && "Expect >= 3 operands"); - int size = slice(insn, 7, 7) == 0 ? 16 : 32; - int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5)); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClassID, - decodeVFPRd(insn, SP)))); - - assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && - "Tied to operand expected"); - MI.addOperand(MI.getOperand(0)); - - assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef() && "Imm operand expected"); - MI.addOperand(MCOperand::CreateImm(fbits)); - - NumOpsAdded = 3; - } else { - // A8.6.295 - // The Rd (destination) and Rm (source) bits have different interpretations - // depending on their single-precisonness. - unsigned d, m; - if (slice(insn, 18, 18) == 1) { // to_integer operation - d = decodeVFPRd(insn, true /* Is Single Precision */); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::SPRRegClassID, d))); - m = decodeVFPRm(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m))); - } else { - d = decodeVFPRd(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d))); - m = decodeVFPRm(insn, true /* Is Single Precision */); - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::SPRRegClassID, m))); - } - NumOpsAdded = 2; - } - - return true; -} - -// VMOVRS - A8.6.330 -// Rt => Rd; Sn => UInt(Vn:N) -static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - decodeVFPRn(insn, true)))); - NumOpsAdded = 2; - return true; -} - -// VMOVRRD - A8.6.332 -// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm) -// -// VMOVRRS - A8.6.331 -// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 -static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx = 2; - - if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { - unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm+1))); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::DPRRegClassID, - decodeVFPRm(insn, false)))); - ++OpIdx; - } - return true; -} - -// VMOVSR - A8.6.330 -// Rt => Rd; Sn => UInt(Vn:N) -static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - decodeVFPRn(insn, true)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - NumOpsAdded = 2; - return true; -} - -// VMOVDRR - A8.6.332 -// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm) -// -// VMOVRRS - A8.6.331 -// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 -static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { - unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, - Sm+1))); - OpIdx += 2; - } else { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::DPRRegClassID, - decodeVFPRm(insn, false)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - OpIdx += 2; - return true; -} - -// VFP Load/Store Instructions. -// VLDRD, VLDRS, VSTRD, VSTRS -static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); - - bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS); - unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - // Extract Dd/Sd for operand 0. - unsigned RegD = decodeVFPRd(insn, isSPVFP); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD))); - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - MI.addOperand(MCOperand::CreateReg(Base)); - - // Next comes the AM5 Opcode. - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - unsigned char Imm8 = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8))); - - NumOpsAdded = 3; - - return true; -} - -// VFP Load/Store Multiple Instructions. -// We have an optional write back reg, the base, and two predicate operands. -// It is then followed by a reglist of either DPR(s) or SPR(s). -// -// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] -static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base, if necessary. - if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD || - Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD || - Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(Base)); - - // Handling the two predicate operands before the reglist. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondVal)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - - OpIdx += 3; - - bool isSPVFP = (Opcode == ARM::VLDMSIA || - Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMSIA || - Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); - unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; - - // Extract Dd/Sd. - unsigned RegD = decodeVFPRd(insn, isSPVFP); - - // Fill the variadic part of reglist. - unsigned char Imm8 = insn & 0xFF; - unsigned Regs = isSPVFP ? Imm8 : Imm8/2; - - // Apply some sanity checks before proceeding. - if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16)) - return false; - - for (unsigned i = 0; i < Regs; ++i) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, - RegD + i))); - ++OpIdx; - } - - return true; -} - -// Misc. VFP Instructions. -// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand) -// FCONSTD (DPR and a VFPf64Imm operand) -// FCONSTS (SPR and a VFPf32Imm operand) -// VMRS/VMSR (GPR operand) -static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - if (Opcode == ARM::FMSTAT) - return true; - - assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands"); - - unsigned RegEnum = 0; - switch (OpInfo[0].RegClass) { - case ARM::DPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false)); - break; - case ARM::SPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true)); - break; - case ARM::GPRRegClassID: - RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)); - break; - default: - assert(0 && "Invalid reg class id"); - return false; - } - - MI.addOperand(MCOperand::CreateReg(RegEnum)); - ++OpIdx; - - // Extract/decode the f64/f32 immediate. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // The asm syntax specifies the floating point value, not the 8-bit literal. - APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), - Opcode == ARM::FCONSTD ? 64 : 32); - APFloat immFP = APFloat(immRaw, true); - double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() : - immFP.convertToFloat(); - MI.addOperand(MCOperand::CreateFPImm(imm)); - - ++OpIdx; - } - - return true; -} - -// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file. -#include "ThumbDisassemblerCore.h" - -///////////////////////////////////////////////////// -// // -// Utility Functions For ARM Advanced SIMD // -// // -///////////////////////////////////////////////////// - -// The following NEON namings are based on A8.6.266 VABA, VABAL. Notice that -// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL. - -// A7.3 Register encoding - -// Extract/Decode NEON D/Vd: -// -// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for -// doubleword, Dd = UInt(D:Vd). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// D = Inst{22}, Vd = Inst{15-12} -static unsigned decodeNEONRd(uint32_t insn) { - return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask); -} - -// Extract/Decode NEON N/Vn: -// -// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for -// doubleword, Dn = UInt(N:Vn). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// N = Inst{7}, Vn = Inst{19-16} -static unsigned decodeNEONRn(uint32_t insn) { - return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask); -} - -// Extract/Decode NEON M/Vm: -// -// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for -// doubleword, Dm = UInt(M:Vm). We compensate for this difference by -// handling it in the getRegisterEnum() utility function. -// M = Inst{5}, Vm = Inst{3-0} -static unsigned decodeNEONRm(uint32_t insn) { - return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4 - | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask); -} - -namespace { -enum ElemSize { - ESizeNA = 0, - ESize8 = 8, - ESize16 = 16, - ESize32 = 32, - ESize64 = 64 -}; -} // End of unnamed namespace - -// size field -> Inst{11-10} -// index_align field -> Inst{7-4} -// -// The Lane Index interpretation depends on the Data Size: -// 8 (encoded as size = 0b00) -> Index = index_align[3:1] -// 16 (encoded as size = 0b01) -> Index = index_align[3:2] -// 32 (encoded as size = 0b10) -> Index = index_align[3] -// -// Ref: A8.6.317 VLD4 (single 4-element structure to one lane). -static unsigned decodeLaneIndex(uint32_t insn) { - unsigned size = insn >> 10 & 3; - assert((size == 0 || size == 1 || size == 2) && - "Encoding error: size should be either 0, 1, or 2"); - - unsigned index_align = insn >> 4 & 0xF; - return (index_align >> 1) >> size; -} - -// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4) -// op = Inst{5}, cmode = Inst{11-8} -// i = Inst{24} (ARM architecture) -// imm3 = Inst{18-16}, imm4 = Inst{3-0} -// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions. -static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { - unsigned char op = (insn >> 5) & 1; - unsigned char cmode = (insn >> 8) & 0xF; - unsigned char Imm8 = ((insn >> 24) & 1) << 7 | - ((insn >> 16) & 7) << 4 | - (insn & 0xF); - return (op << 12) | (cmode << 8) | Imm8; -} - -// A8.6.339 VMUL, VMULL (by scalar) -// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7 -// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15 -static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize16: - return insn & 7; - case ESize32: - return insn & 0xF; - default: - assert(0 && "Unreachable code!"); - return 0; - } -} - -// A8.6.339 VMUL, VMULL (by scalar) -// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7 -// ESize32 => index = Inst{5} (M) D0-D15 -static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize16: - return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1); - case ESize32: - return (insn >> 5) & 1; - default: - assert(0 && "Unreachable code!"); - return 0; - } -} - -// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD) -// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}. -static unsigned decodeVCVTFractionBits(uint32_t insn) { - return 64 - ((insn >> 16) & 0x3F); -} - -// A8.6.302 VDUP (scalar) -// ESize8 => index = Inst{19-17} -// ESize16 => index = Inst{19-18} -// ESize32 => index = Inst{19} -static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize8: - return (insn >> 17) & 7; - case ESize16: - return (insn >> 18) & 3; - case ESize32: - return (insn >> 19) & 1; - default: - assert(0 && "Unspecified element size!"); - return 0; - } -} - -// A8.6.328 VMOV (ARM core register to scalar) -// A8.6.329 VMOV (scalar to ARM core register) -// ESize8 => index = Inst{21:6-5} -// ESize16 => index = Inst{21:6} -// ESize32 => index = Inst{21} -static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) { - switch (esize) { - case ESize8: - return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3); - case ESize16: - return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1); - case ESize32: - return ((insn >> 21) & 1); - default: - assert(0 && "Unspecified element size!"); - return 0; - } -} - -// Imm6 = Inst{21-16}, L = Inst{7} -// -// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI): -// case L:imm6 of -// '0001xxx' => esize = 8; shift_amount = imm6 - 8 -// '001xxxx' => esize = 16; shift_amount = imm6 - 16 -// '01xxxxx' => esize = 32; shift_amount = imm6 - 32 -// '1xxxxxx' => esize = 64; shift_amount = imm6 -// -// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN): -// case L:imm6 of -// '0001xxx' => esize = 8; shift_amount = 16 - imm6 -// '001xxxx' => esize = 16; shift_amount = 32 - imm6 -// '01xxxxx' => esize = 32; shift_amount = 64 - imm6 -// '1xxxxxx' => esize = 64; shift_amount = 64 - imm6 -// -static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) { - ElemSize esize = ESizeNA; - unsigned L = (insn >> 7) & 1; - unsigned imm6 = (insn >> 16) & 0x3F; - if (L == 0) { - if (imm6 >> 3 == 1) - esize = ESize8; - else if (imm6 >> 4 == 1) - esize = ESize16; - else if (imm6 >> 5 == 1) - esize = ESize32; - else - assert(0 && "Wrong encoding of Inst{7:21-16}!"); - } else - esize = ESize64; - - if (LeftShift) - return esize == ESize64 ? imm6 : (imm6 - esize); - else - return esize == ESize64 ? (esize - imm6) : (2*esize - imm6); -} - -// A8.6.305 VEXT -// Imm4 = Inst{11-8} -static unsigned decodeN3VImm(uint32_t insn) { - return (insn >> 8) & 0xF; -} - -// VLD* -// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] -// VLD*LN* -// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx) -// VST* -// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... -// VST*LN* -// Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)] -// -// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. -static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, - unsigned alignment, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - // At least one DPR register plus addressing mode #6. - assert(NumOps >= 3 && "Expect >= 3 operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // We have homogeneous NEON registers for Load/Store. - unsigned RegClass = 0; - - // Double-spaced registers have increments of 2. - unsigned Inc = DblSpaced ? 2 : 1; - - unsigned Rn = decodeRn(insn); - unsigned Rm = decodeRm(insn); - unsigned Rd = decodeNEONRd(insn); - - // A7.7.1 Advanced SIMD addressing mode. - bool WB = Rm != 15; - - // LLVM Addressing Mode #6. - unsigned RmEnum = 0; - if (WB && Rm != 13) - RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm); - - if (Store) { - // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's, - // then possible lane index. - assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID && - "Reg operand expected"); - - if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - ++OpIdx; - } - - assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); - // addrmode6 := (ops GPR:$addr, i32imm) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment - OpIdx += 2; - - if (WB) { - MI.addOperand(MCOperand::CreateReg(RmEnum)); - ++OpIdx; - } - - assert(OpIdx < NumOps && - (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID || - OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) && - "Reg operand expected"); - - RegClass = OpInfo[OpIdx].RegClass; - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd))); - Rd += Inc; - ++OpIdx; - } - - // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); - ++OpIdx; - } - - } else { - // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg, - // possible TIED_TO DPR/QPR's (ignored), then possible lane index. - RegClass = OpInfo[0].RegClass; - - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd))); - Rd += Inc; - ++OpIdx; - } - - if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - ++OpIdx; - } - - assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); - // addrmode6 := (ops GPR:$addr, i32imm) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - Rn))); - MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment - OpIdx += 2; - - if (WB) { - MI.addOperand(MCOperand::CreateReg(RmEnum)); - ++OpIdx; - } - - while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 && - "Tied to operand expected"); - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); - ++OpIdx; - } - } - - // Accessing registers past the end of the NEON register file is not - // defined. - if (Rd > 32) - return false; - - return true; -} - -// A8.6.308, A8.6.311, A8.6.314, A8.6.317. -static bool Align4OneLaneInst(unsigned elem, unsigned size, - unsigned index_align, unsigned & alignment) { - unsigned bits = 0; - switch (elem) { - default: - return false; - case 1: - // A8.6.308 - if (size == 0) - return slice(index_align, 0, 0) == 0; - else if (size == 1) { - bits = slice(index_align, 1, 0); - if (bits != 0 && bits != 1) - return false; - if (bits == 1) - alignment = 16; - return true; - } else if (size == 2) { - bits = slice(index_align, 2, 0); - if (bits != 0 && bits != 3) - return false; - if (bits == 3) - alignment = 32; - return true;; - } - return true; - case 2: - // A8.6.311 - if (size == 0) { - if (slice(index_align, 0, 0) == 1) - alignment = 16; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) == 1) - alignment = 32; - return true; - } else if (size == 2) { - if (slice(index_align, 1, 1) != 0) - return false; - if (slice(index_align, 0, 0) == 1) - alignment = 64; - return true;; - } - return true; - case 3: - // A8.6.314 - if (size == 0) { - if (slice(index_align, 0, 0) != 0) - return false; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) != 0) - return false; - return true; - return true; - } else if (size == 2) { - if (slice(index_align, 1, 0) != 0) - return false; - return true;; - } - return true; - case 4: - // A8.6.317 - if (size == 0) { - if (slice(index_align, 0, 0) == 1) - alignment = 32; - return true; - } if (size == 1) { - if (slice(index_align, 0, 0) == 1) - alignment = 64; - return true; - } else if (size == 2) { - bits = slice(index_align, 1, 0); - if (bits == 3) - return false; - if (bits == 1) - alignment = 64; - else if (bits == 2) - alignment = 128; - return true;; - } - return true; - } -} - -// A7.7 -// If L (Inst{21}) == 0, store instructions. -// Find out about double-spaced-ness of the Opcode and pass it on to -// DisassembleNLdSt0(). -static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const StringRef Name = ARMInsts[Opcode].Name; - bool DblSpaced = false; - // 0 represents standard alignment, i.e., unaligned data access. - unsigned alignment = 0; - - unsigned elem = 0; // legal values: {1, 2, 3, 4} - if (Name.startswith("VST1") || Name.startswith("VLD1")) - elem = 1; - - if (Name.startswith("VST2") || Name.startswith("VLD2")) - elem = 2; - - if (Name.startswith("VST3") || Name.startswith("VLD3")) - elem = 3; - - if (Name.startswith("VST4") || Name.startswith("VLD4")) - elem = 4; - - if (Name.find("LN") != std::string::npos) { - // To one lane instructions. - // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane). - - // Utility function takes number of elements, size, and index_align. - if (!Align4OneLaneInst(elem, - slice(insn, 11, 10), - slice(insn, 7, 4), - alignment)) - return false; - - // <size> == 16 && Inst{5} == 1 --> DblSpaced = true - if (Name.endswith("16") || Name.endswith("16_UPD")) - DblSpaced = slice(insn, 5, 5) == 1; - - // <size> == 32 && Inst{6} == 1 --> DblSpaced = true - if (Name.endswith("32") || Name.endswith("32_UPD")) - DblSpaced = slice(insn, 6, 6) == 1; - } else if (Name.find("DUP") != std::string::npos) { - // Single element (or structure) to all lanes. - // Inst{9-8} encodes the number of element(s) in the structure, with: - // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32. - // 0b01 (VLD2DUP) - // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0) - // 0b11 (VLD4DUP) - // - // Inst{7-6} encodes the data size, with: - // 0b00 => 8, 0b01 => 16, 0b10 => 32 - // - // Inst{4} (the a bit) encodes the align action (0: standard alignment) - unsigned elem = slice(insn, 9, 8) + 1; - unsigned a = slice(insn, 4, 4); - if (elem != 3) { - // 0b11 is not a valid encoding for Inst{7-6}. - if (slice(insn, 7, 6) == 3) - return false; - unsigned data_size = 8 << slice(insn, 7, 6); - // For VLD1DUP, a bit makes sense only for data size of 16 and 32. - if (a && data_size == 8) - return false; - - // Now we can calculate the alignment! - if (a) - alignment = elem * data_size; - } else { - if (a) { - // A8.6.315 VLD3 (single 3-element structure to all lanes) - // The a bit must be encoded as 0. - return false; - } - } - } else { - // Multiple n-element structures with type encoded as Inst{11-8}. - // See, for example, A8.6.316 VLD4 (multiple 4-element structures). - - // Inst{5-4} encodes alignment. - unsigned align = slice(insn, 5, 4); - switch (align) { - default: - break; - case 1: - alignment = 64; break; - case 2: - alignment = 128; break; - case 3: - alignment = 256; break; - } - - unsigned type = slice(insn, 11, 8); - // Reject UNDEFINED instructions based on type and align. - // Plus set DblSpaced flag where appropriate. - switch (elem) { - default: - break; - case 1: - // n == 1 - // A8.6.307 & A8.6.391 - if ((type == 7 && slice(align, 1, 1) == 1) || - (type == 10 && align == 3) || - (type == 6 && slice(align, 1, 1) == 1)) - return false; - break; - case 2: - // n == 2 && type == 0b1001 -> DblSpaced = true - // A8.6.310 & A8.6.393 - if ((type == 8 || type == 9) && align == 3) - return false; - DblSpaced = (type == 9); - break; - case 3: - // n == 3 && type == 0b0101 -> DblSpaced = true - // A8.6.313 & A8.6.395 - if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1) - return false; - DblSpaced = (type == 5); - break; - case 4: - // n == 4 && type == 0b0001 -> DblSpaced = true - // A8.6.316 & A8.6.397 - if (slice(insn, 7, 6) == 3) - return false; - DblSpaced = (type == 1); - break; - } - } - return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B); -} - -// VMOV (immediate) -// Qd/Dd imm -// VBIC (immediate) -// VORR (immediate) -// Qd/Dd imm src(=Qd/Dd) -static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, - decodeNEONRd(insn)))); - - ElemSize esize = ESizeNA; - switch (Opcode) { - case ARM::VMOVv8i8: - case ARM::VMOVv16i8: - esize = ESize8; - break; - case ARM::VMOVv4i16: - case ARM::VMOVv8i16: - case ARM::VMVNv4i16: - case ARM::VMVNv8i16: - case ARM::VBICiv4i16: - case ARM::VBICiv8i16: - case ARM::VORRiv4i16: - case ARM::VORRiv8i16: - esize = ESize16; - break; - case ARM::VMOVv2i32: - case ARM::VMOVv4i32: - case ARM::VMVNv2i32: - case ARM::VMVNv4i32: - case ARM::VBICiv2i32: - case ARM::VBICiv4i32: - case ARM::VORRiv2i32: - case ARM::VORRiv4i32: - esize = ESize32; - break; - case ARM::VMOVv1i64: - case ARM::VMOVv2i64: - esize = ESize64; - break; - default: - assert(0 && "Unexpected opcode!"); - return false; - } - - // One register and a modified immediate value. - // Add the imm operand. - MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize))); - - NumOpsAdded = 2; - - // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in. - if (NumOps >= 3 && - (OpInfo[2].RegClass == ARM::DPRRegClassID || - OpInfo[2].RegClass == ARM::QPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, - decodeNEONRd(insn)))); - NumOpsAdded += 1; - } - - return true; -} - -namespace { -enum N2VFlag { - N2V_None, - N2V_VectorDupLane, - N2V_VectorConvert_Between_Float_Fixed -}; -} // End of unnamed namespace - -// Vector Convert [between floating-point and fixed-point] -// Qd/Dd Qm/Dm [fbits] -// -// Vector Duplicate Lane (from scalar to all elements) Instructions. -// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q: -// Qd/Dd Dm index -// -// Vector Move Long: -// Qd Dm -// -// Vector Move Narrow: -// Dd Qm -// -// Others -static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opc]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 2 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - ElemSize esize = ESizeNA; - if (Flag == N2V_VectorDupLane) { - // VDUPLN has its index embedded. Its size can be inferred from the Opcode. - assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q && - "Unexpected Opcode"); - esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8 - : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16 - : ESize32); - } - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - // VPADAL... - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRm(insn)))); - ++OpIdx; - - // VZIP and others have two TIED_TO reg operands. - int Idx; - while (OpIdx < NumOps && - (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Add TIED_TO operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Add the imm operand, if required. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - - unsigned imm = 0xFFFFFFFF; - - if (Flag == N2V_VectorDupLane) - imm = decodeNVLaneDupIndex(insn, esize); - if (Flag == N2V_VectorConvert_Between_Float_Fixed) - imm = decodeVCVTFractionBits(insn); - - assert(imm != 0xFFFFFFFF && "Internal error"); - MI.addOperand(MCOperand::CreateImm(imm)); - ++OpIdx; - } - - return true; -} - -static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_None, B); -} -static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorConvert_Between_Float_Fixed, B); -} -static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorDupLane, B); -} - -// Vector Shift [Accumulate] Instructions. -// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt -// -// Vector Shift Left Long (with maximum shift count) Instructions. -// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size) -// -static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - assert(NumOps >= 3 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 3 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID || - OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) && - "Reg operand expected"); - - // Qm/Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRm(insn)))); - ++OpIdx; - - assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); - - // Add the imm operand. - - // VSHLL has maximum shift count as the imm, inferred from its size. - unsigned Imm; - switch (Opcode) { - default: - Imm = decodeNVSAmt(insn, LeftShift); - break; - case ARM::VSHLLi8: - Imm = 8; - break; - case ARM::VSHLLi16: - Imm = 16; - break; - case ARM::VSHLLi32: - Imm = 32; - break; - } - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - - return true; -} - -// Left shift instructions. -static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true, - B); -} -// Right shift instructions have different shift amount interpretation. -static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false, - B); -} - -namespace { -enum N3VFlag { - N3V_None, - N3V_VectorExtract, - N3V_VectorShift, - N3V_Multiply_By_Scalar -}; -} // End of unnamed namespace - -// NEON Three Register Instructions with Optional Immediate Operand -// -// Vector Extract Instructions. -// Qd/Dd Qn/Dn Qm/Dm imm4 -// -// Vector Shift (Register) Instructions. -// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n) -// -// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions. -// Qd/Dd Qn/Dn RestrictedDm index -// -// Others -static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - - // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs. - assert(NumOps >= 3 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == ARM::DPRRegClassID || - OpInfo[1].RegClass == ARM::QPRRegClassID) && - "Expect >= 3 operands and first 2 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - bool VdVnVm = Flag == N3V_VectorShift ? false : true; - bool IsImm4 = Flag == N3V_VectorExtract ? true : false; - bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false; - ElemSize esize = ESizeNA; - if (Flag == N3V_Multiply_By_Scalar) { - unsigned size = (insn >> 20) & 3; - if (size == 1) esize = ESize16; - if (size == 2) esize = ESize32; - assert (esize == ESize16 || esize == ESize32); - } - - // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeNEONRd(insn)))); - ++OpIdx; - - // VABA, VABAL, VBSLd, VBSLq, ... - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - ++OpIdx; - } - - // Dn = Inst{7:19-16} => NEON Rn - // or - // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, - VdVnVm ? decodeNEONRn(insn) - : decodeNEONRm(insn)))); - ++OpIdx; - - // Dm = Inst{5:3-0} => NEON Rm - // or - // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise - // or - // Dn = Inst{7:19-16} => NEON Rn - unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize) - : decodeNEONRm(insn)) - : decodeNEONRn(insn); - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Add the imm operand. - unsigned Imm = 0; - if (IsImm4) - Imm = decodeN3VImm(insn); - else if (IsDmRestricted) - Imm = decodeRestrictedDmIndex(insn, esize); - else { - assert(0 && "Internal error: unreachable code!"); - return false; - } - - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - } - - return true; -} - -static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_None, B); -} -static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorShift, B); -} -static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorExtract, B); -} -static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_Multiply_By_Scalar, B); -} - -// Vector Table Lookup -// -// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm -// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm -// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm -// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm -static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::DPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass == ARM::DPRRegClassID && - "Expect >= 3 operands and first 3 as reg operands"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned Rn = decodeNEONRn(insn); - - // {Dn} encoded as len = 0b00 - // {Dn Dn+1} encoded as len = 0b01 - // {Dn Dn+1 Dn+2 } encoded as len = 0b10 - // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11 - unsigned Len = slice(insn, 9, 8) + 1; - - // Dd (the destination vector) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRd(insn)))); - ++OpIdx; - - // Process tied_to operand constraint. - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Do the <list> now. - for (unsigned i = 0; i < Len; ++i) { - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && - "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - Rn + i))); - ++OpIdx; - } - - // Dm (the index vector) - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && - "Reg operand (index vector) expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRm(insn)))); - ++OpIdx; - - return true; -} - -// Vector Get Lane (move scalar to ARM core register) Instructions. -// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index -static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(MCID.getNumDefs() == 1 && NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass < 0 && - "Expect >= 3 operands with one dst operand"); - - ElemSize esize = - Opcode == ARM::VGETLNi32 ? ESize32 - : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16 - : ESize8); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - // Dn = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRn(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); - - NumOpsAdded = 3; - return true; -} - -// Vector Set Lane (move ARM core register to scalar) Instructions. -// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index -static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - if (!OpInfo) return false; - - assert(MCID.getNumDefs() == 1 && NumOps >= 3 && - OpInfo[0].RegClass == ARM::DPRRegClassID && - OpInfo[1].RegClass == ARM::DPRRegClassID && - MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && - OpInfo[2].RegClass == ARM::GPRRegClassID && - OpInfo[3].RegClass < 0 && - "Expect >= 3 operands with one dst operand"); - - ElemSize esize = - Opcode == ARM::VSETLNi8 ? ESize8 - : (Opcode == ARM::VSETLNi16 ? ESize16 - : ESize32); - - // Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, - decodeNEONRn(insn)))); - - // TIED_TO operand. - MI.addOperand(MCOperand::CreateReg(0)); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); - - NumOpsAdded = 4; - return true; -} - -// Vector Duplicate Instructions (from ARM core register to all elements). -// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt -static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 2 && - (OpInfo[0].RegClass == ARM::DPRRegClassID || - OpInfo[0].RegClass == ARM::QPRRegClassID) && - OpInfo[1].RegClass == ARM::GPRRegClassID && - "Expect >= 2 operands and first 2 as reg operand"); - - unsigned RegClass = OpInfo[0].RegClass; - - // Qd/Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass, - decodeNEONRn(insn)))); - - // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - NumOpsAdded = 2; - return true; -} - -static inline bool PreLoadOpcode(unsigned Opcode) { - switch(Opcode) { - case ARM::PLDi12: case ARM::PLDrs: - case ARM::PLDWi12: case ARM::PLDWrs: - case ARM::PLIi12: case ARM::PLIrs: - return true; - default: - return false; - } -} - -static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // Preload Data/Instruction requires either 2 or 3 operands. - // PLDi12, PLDWi12, PLIi12: addrmode_imm12 - // PLDrs, PLDWrs, PLIrs: ldst_so_reg - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12 - || Opcode == ARM::PLIi12) { - unsigned Imm12 = slice(insn, 11, 0); - bool Negative = getUBit(insn) == 0; - - // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12. - if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) { - DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n"); - MI.setOpcode(ARM::PLDi12); - } - - // -0 is represented specially. All other values are as normal. - int Offset = Negative ? -1 * Imm12 : Imm12; - if (Imm12 == 0 && Negative) - Offset = INT32_MIN; - - MI.addOperand(MCOperand::CreateImm(Offset)); - NumOpsAdded = 2; - } else { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); - NumOpsAdded = 3; - } - - return true; -} - -static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) { - // Inst{3-0} encodes the memory barrier option for the variants. - unsigned opt = slice(insn, 3, 0); - switch (opt) { - case ARM_MB::SY: case ARM_MB::ST: - case ARM_MB::ISH: case ARM_MB::ISHST: - case ARM_MB::NSH: case ARM_MB::NSHST: - case ARM_MB::OSH: case ARM_MB::OSHST: - MI.addOperand(MCOperand::CreateImm(opt)); - NumOpsAdded = 1; - return true; - default: - return false; - } - } - - switch (Opcode) { - case ARM::CLREX: - case ARM::NOP: - case ARM::TRAP: - case ARM::YIELD: - case ARM::WFE: - case ARM::WFI: - case ARM::SEV: - return true; - case ARM::SWP: - case ARM::SWPB: - // SWP, SWPB: Rd Rm Rn - // Delegate to DisassembleLdStExFrm().... - return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - default: - break; - } - - if (Opcode == ARM::SETEND) { - NumOpsAdded = 1; - MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9))); - return true; - } - - // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different - // opcodes which match the same real instruction. This is needed since there's - // no current handling of optional arguments. Fix here when a better handling - // of optional arguments is implemented. - if (Opcode == ARM::CPS3p) { // M = 1 - // Let's reject these impossible imod values by returning false: - // 1. (imod=0b01) - // - // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an - // invalid combination, so we just check for imod=0b00 here. - if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 3; - return true; - } - if (Opcode == ARM::CPS2p) { // mode = 0, M = 0 - // Let's reject these impossible imod values by returning false: - // 1. (imod=0b00,M=0) - // 2. (imod=0b01) - if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) - return false; - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1 - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 1; - return true; - } - - // DBG has its option specified in Inst{3-0}. - if (Opcode == ARM::DBG) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}). - if (Opcode == ARM::BKPT) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 | - slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - if (PreLoadOpcode(Opcode)) - return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - assert(0 && "Unexpected misc instruction!"); - return false; -} - -/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP. -/// We divide the disassembly task into different categories, with each one -/// corresponding to a specific instruction encoding format. There could be -/// exceptions when handling a specific format, and that is why the Opcode is -/// also present in the function prototype. -static const DisassembleFP FuncPtrs[] = { - &DisassemblePseudo, - &DisassembleMulFrm, - &DisassembleBrFrm, - &DisassembleBrMiscFrm, - &DisassembleDPFrm, - &DisassembleDPSoRegFrm, - &DisassembleLdFrm, - &DisassembleStFrm, - &DisassembleLdMiscFrm, - &DisassembleStMiscFrm, - &DisassembleLdStMulFrm, - &DisassembleLdStExFrm, - &DisassembleArithMiscFrm, - &DisassembleSatFrm, - &DisassembleExtFrm, - &DisassembleVFPUnaryFrm, - &DisassembleVFPBinaryFrm, - &DisassembleVFPConv1Frm, - &DisassembleVFPConv2Frm, - &DisassembleVFPConv3Frm, - &DisassembleVFPConv4Frm, - &DisassembleVFPConv5Frm, - &DisassembleVFPLdStFrm, - &DisassembleVFPLdStMulFrm, - &DisassembleVFPMiscFrm, - &DisassembleThumbFrm, - &DisassembleMiscFrm, - &DisassembleNGetLnFrm, - &DisassembleNSetLnFrm, - &DisassembleNDupFrm, - - // VLD and VST (including one lane) Instructions. - &DisassembleNLdSt, - - // A7.4.6 One register and a modified immediate value - // 1-Register Instructions with imm. - // LLVM only defines VMOVv instructions. - &DisassembleN1RegModImmFrm, - - // 2-Register Instructions with no imm. - &DisassembleN2RegFrm, - - // 2-Register Instructions with imm (vector convert float/fixed point). - &DisassembleNVCVTFrm, - - // 2-Register Instructions with imm (vector dup lane). - &DisassembleNVecDupLnFrm, - - // Vector Shift Left Instructions. - &DisassembleN2RegVecShLFrm, - - // Vector Shift Righ Instructions, which has different interpretation of the - // shift amount from the imm6 field. - &DisassembleN2RegVecShRFrm, - - // 3-Register Data-Processing Instructions. - &DisassembleN3RegFrm, - - // Vector Shift (Register) Instructions. - // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand) - &DisassembleN3RegVecShFrm, - - // Vector Extract Instructions. - &DisassembleNVecExtractFrm, - - // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long] - // By Scalar Instructions. - &DisassembleNVecMulScalarFrm, - - // Vector Table Lookup uses byte indexes in a control vector to look up byte - // values in a table and generate a new vector. - &DisassembleNVTBLFrm, - - NULL -}; - -/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder. -/// The general idea is to set the Opcode for the MCInst, followed by adding -/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates -/// to the Format-specific disassemble function for disassembly, followed by -/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand -/// which follow the Dst/Src Operands. -bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { - // Stage 1 sets the Opcode. - MI.setOpcode(Opcode); - // If the number of operands is zero, we're done! - if (NumOps == 0) - return true; - - // Stage 2 calls the format-specific disassemble function to build the operand - // list. - if (Disasm == NULL) - return false; - unsigned NumOpsAdded = 0; - bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this); - - if (!OK || this->Err != 0) return false; - if (NumOpsAdded >= NumOps) - return true; - - // Stage 3 deals with operands unaccounted for after stage 2 is finished. - // FIXME: Should this be done selectively? - return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded); -} - -// A8.3 Conditional execution -// A8.3.1 Pseudocode details of conditional execution -// Condition bits '111x' indicate the instruction is always executed. -static uint32_t CondCode(uint32_t CondField) { - if (CondField == 0xF) - return ARMCC::AL; - return CondField; -} - -/// DoPredicateOperands - DoPredicateOperands process the predicate operands -/// of some Thumb instructions which come before the reglist operands. It -/// returns true if the two predicate operands have been processed. -bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, - uint32_t /* insn */, unsigned short NumOpsRemaining) { - - assert(NumOpsRemaining > 0 && "Invalid argument"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned Idx = MI.getNumOperands(); - - // First, we check whether this instr specifies the PredicateOperand through - // a pair of MCOperandInfos with isPredicate() property. - if (NumOpsRemaining >= 2 && - OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass < 0 && - OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) - { - // If we are inside an IT block, get the IT condition bits maintained via - // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). - // See also A2.5.2. - if (InITBlock()) - MI.addOperand(MCOperand::CreateImm(GetITCond())); - else - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - return true; - } - - return false; -} - -/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process -/// the possible Predicate and SBitModifier, to build the remaining MCOperand -/// constituents. -bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaining) { - - assert(NumOpsRemaining > 0 && "Invalid argument"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - const std::string &Name = ARMInsts[Opcode].Name; - unsigned Idx = MI.getNumOperands(); - uint64_t TSFlags = ARMInsts[Opcode].TSFlags; - - // First, we check whether this instr specifies the PredicateOperand through - // a pair of MCOperandInfos with isPredicate() property. - if (NumOpsRemaining >= 2 && - OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass < 0 && - OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) - { - // If we are inside an IT block, get the IT condition bits maintained via - // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). - // See also A2.5.2. - if (InITBlock()) - MI.addOperand(MCOperand::CreateImm(GetITCond())); - else { - if (Name.length() > 1 && Name[0] == 't') { - // Thumb conditional branch instructions have their cond field embedded, - // like ARM. - // - // A8.6.16 B - // Check for undefined encodings. - unsigned cond; - if (Name == "t2Bcc") { - if ((cond = slice(insn, 25, 22)) >= 14) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(cond))); - } else if (Name == "tBcc") { - if ((cond = slice(insn, 11, 8)) == 14) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(cond))); - } else - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - } else { - // ARM instructions get their condition field from Inst{31-28}. - // We should reject Inst{31-28} = 0b1111 as invalid encoding. - if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF) - return false; - MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); - } - } - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - Idx += 2; - NumOpsRemaining -= 2; - } - - if (NumOpsRemaining == 0) - return true; - - // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set. - if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0)); - --NumOpsRemaining; - } - - if (NumOpsRemaining == 0) - return true; - else - return false; -} - -/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary -/// after BuildIt is finished. -bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, - uint32_t insn) { - - if (!SP) return Status; - - if (Opcode == ARM::t2IT) - Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false; - else if (InITBlock()) - SP->UpdateIT(); - - return Status; -} - -/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. -ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, - unsigned short num) - : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) { - unsigned Idx = (unsigned)format; - assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format"); - Disasm = FuncPtrs[Idx]; -} - -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { - // For "Unknown format", fail by returning a NULL pointer. - if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) { - DEBUG(errs() << "Unknown format\n"); - return 0; - } - - return new ARMBasicMCBuilder(Opcode, Format, - ARMInsts[Opcode].getNumOperands()); -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has had any PC adjustment made by the caller. If the getOpInfo() -/// function was set as part of the setupBuilderForSymbolicDisassembly() call -/// then that function is called to get any symbolic information at the -/// builder's Address for this instrution. If that returns non-zero then the -/// symbolic information it returns is used to create an MCExpr and that is -/// added as an operand to the MCInst. This function returns true if it adds -/// an operand to the MCInst and false otherwise. -bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, - uint64_t InstSize, - MCInst &MI) { - if (!GetOpInfo) - return false; - - struct LLVMOpInfo1 SymbolicOp; - SymbolicOp.Value = Value; - if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) - return false; - - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) - MI.addOperand(MCOperand::CreateExpr(Expr)); - else - assert("bad SymbolicOp.VariantKind"); - - return true; -} diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h deleted file mode 100644 index a7ba141..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ /dev/null @@ -1,336 +0,0 @@ -//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// -// The first part defines the enumeration type of ARM instruction format, which -// specifies the encoding used by the instruction, as well as a helper function -// to convert the enums to printable char strings. -// -// It also contains code to represent the concepts of Builder and DisassembleFP -// to solve the problem of disassembling an ARM instr. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMDISASSEMBLERCORE_H -#define ARMDISASSEMBLERCORE_H - -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm-c/Disassembler.h" -#include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" -#include "ARMDisassembler.h" - -namespace llvm { -class MCContext; - -class ARMUtils { -public: - static const char *OpcodeName(unsigned Opcode); -}; - -///////////////////////////////////////////////////// -// // -// Enums and Utilities for ARM Instruction Format // -// // -///////////////////////////////////////////////////// - -#define ARM_FORMATS \ - ENTRY(ARM_FORMAT_PSEUDO, 0) \ - ENTRY(ARM_FORMAT_MULFRM, 1) \ - ENTRY(ARM_FORMAT_BRFRM, 2) \ - ENTRY(ARM_FORMAT_BRMISCFRM, 3) \ - ENTRY(ARM_FORMAT_DPFRM, 4) \ - ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \ - ENTRY(ARM_FORMAT_LDFRM, 6) \ - ENTRY(ARM_FORMAT_STFRM, 7) \ - ENTRY(ARM_FORMAT_LDMISCFRM, 8) \ - ENTRY(ARM_FORMAT_STMISCFRM, 9) \ - ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ - ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ - ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ - ENTRY(ARM_FORMAT_SATFRM, 13) \ - ENTRY(ARM_FORMAT_EXTFRM, 14) \ - ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ - ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ - ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ - ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ - ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ - ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ - ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ - ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ - ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ - ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ - ENTRY(ARM_FORMAT_THUMBFRM, 25) \ - ENTRY(ARM_FORMAT_MISCFRM, 26) \ - ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ - ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ - ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ - ENTRY(ARM_FORMAT_NLdSt, 30) \ - ENTRY(ARM_FORMAT_N1RegModImm, 31) \ - ENTRY(ARM_FORMAT_N2Reg, 32) \ - ENTRY(ARM_FORMAT_NVCVT, 33) \ - ENTRY(ARM_FORMAT_NVecDupLn, 34) \ - ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ - ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ - ENTRY(ARM_FORMAT_N3Reg, 37) \ - ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ - ENTRY(ARM_FORMAT_NVecExtract, 39) \ - ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ - ENTRY(ARM_FORMAT_NVTBL, 41) - -// ARM instruction format specifies the encoding used by the instruction. -#define ENTRY(n, v) n = v, -typedef enum { - ARM_FORMATS - ARM_FORMAT_NA -} ARMFormat; -#undef ENTRY - -// Converts enum to const char*. -static const inline char *stringForARMFormat(ARMFormat form) { -#define ENTRY(n, v) case n: return #n; - switch(form) { - ARM_FORMATS - case ARM_FORMAT_NA: - default: - return ""; - } -#undef ENTRY -} - -/// Expands on the enum definitions from ARMBaseInstrInfo.h. -/// They are being used by the disassembler implementation. -namespace ARMII { - enum { - NEONRegMask = 15, - GPRRegMask = 15, - NEON_RegRdShift = 12, - NEON_D_BitShift = 22, - NEON_RegRnShift = 16, - NEON_N_BitShift = 7, - NEON_RegRmShift = 0, - NEON_M_BitShift = 5 - }; -} - -/// Utility function for extracting [From, To] bits from a uint32_t. -static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) { - assert(From < 32 && To < 32 && From >= To); - return (Bits >> To) & ((1 << (From - To + 1)) - 1); -} - -/// Utility function for setting [From, To] bits to Val for a uint32_t. -static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, - unsigned Val) { - assert(From < 32 && To < 32 && From >= To); - uint32_t Mask = ((1 << (From - To + 1)) - 1); - Bits &= ~(Mask << To); - Bits |= (Val & Mask) << To; -} - -// Return an integer result equal to the number of bits of x that are ones. -static inline uint32_t -BitCount (uint64_t x) -{ - // c accumulates the total bits set in x - uint32_t c; - for (c = 0; x; ++c) - { - x &= x - 1; // clear the least significant bit set - } - return c; -} - -static inline bool -BitIsSet (const uint64_t value, const uint64_t bit) -{ - return (value & (1ull << bit)) != 0; -} - -static inline bool -BitIsClear (const uint64_t value, const uint64_t bit) -{ - return (value & (1ull << bit)) == 0; -} - -/// Various utilities for checking the target specific flags. - -/// A unary data processing instruction doesn't have an Rn operand. -static inline bool isUnaryDP(uint64_t TSFlags) { - return (TSFlags & ARMII::UnaryDP); -} - -/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111. -static inline bool isNEONDomain(uint64_t TSFlags) { - return (TSFlags & ARMII::DomainNEON) || - (TSFlags & ARMII::DomainNEONA8); -} - -/// This four-bit field describes the addressing mode used. -/// See also ARMBaseInstrInfo.h. -static inline unsigned getAddrMode(uint64_t TSFlags) { - return (TSFlags & ARMII::AddrModeMask); -} - -/// {IndexModePre, IndexModePost} -/// Only valid for load and store ops. -/// See also ARMBaseInstrInfo.h. -static inline unsigned getIndexMode(uint64_t TSFlags) { - return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; -} - -/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList. -static inline bool isPrePostLdSt(uint64_t TSFlags) { - return (TSFlags & ARMII::IndexModeMask) != 0; -} - -// Forward declaration. -class ARMBasicMCBuilder; - -// Builder Object is mostly ignored except in some Thumb disassemble functions. -typedef ARMBasicMCBuilder *BO; - -/// DisassembleFP - DisassembleFP points to a function that disassembles an insn -/// and builds the MCOperand list upon disassembly. It returns false on failure -/// or true on success. The number of operands added is updated upon success. -typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder); - -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - -/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that -/// knows how to build up the MCOperand list. -class ARMBasicMCBuilder { - friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - unsigned Opcode; - ARMFormat Format; - unsigned short NumOps; - DisassembleFP Disasm; - Session *SP; - int Err; // !=0 if the builder encounters some error condition during build. - -private: - /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. - ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); - -public: - ARMBasicMCBuilder(ARMBasicMCBuilder &B) - : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { - Err = 0; - } - - virtual ~ARMBasicMCBuilder() {} - - void SetSession(Session *sp) { - SP = sp; - } - - void SetErr(int ErrCode) { - Err = ErrCode; - } - - /// DoPredicateOperands - DoPredicateOperands process the predicate operands - /// of some Thumb instructions which come before the reglist operands. It - /// returns true if the two predicate operands have been processed. - bool DoPredicateOperands(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaning); - - /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process - /// the possible Predicate and SBitModifier, to build the remaining MCOperand - /// constituents. - bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, - uint32_t insn, unsigned short NumOpsRemaning); - - /// InITBlock - InITBlock returns true if we are inside an IT block. - bool InITBlock() { - if (SP) - return SP->ITCounter > 0; - - return false; - } - - /// Build - Build delegates to BuildIt to perform the heavy liftling. After - /// that, it invokes RunBuildAfterHook where some housekeepings can be done. - virtual bool Build(MCInst &MI, uint32_t insn) { - bool Status = BuildIt(MI, insn); - return RunBuildAfterHook(Status, MI, insn); - } - - /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder. - /// The general idea is to set the Opcode for the MCInst, followed by adding - /// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates - /// to the Format-specific disassemble function for disassembly, followed by - /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand - /// which follow the Dst/Src Operands. - virtual bool BuildIt(MCInst &MI, uint32_t insn); - - /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary - /// after BuildIt is finished. - virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn); - -private: - /// Get condition of the current IT instruction. - unsigned GetITCond() { - assert(SP); - return slice(SP->ITState, 7, 4); - } - -private: - // - // Hooks for symbolic disassembly via the public 'C' interface. - // - // The function to get the symbolic information for operands. - LLVMOpInfoCallback GetOpInfo; - // The pointer to the block of symbolic information for above call back. - void *DisInfo; - // The assembly context for creating symbols and MCExprs in place of - // immediate operands when there is symbolic information. - MCContext *Ctx; - // The address of the instruction being disassembled. - uint64_t Address; - -public: - void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, - void *disInfo, MCContext *ctx, - uint64_t address) { - GetOpInfo = getOpInfo; - DisInfo = disInfo; - Ctx = ctx; - Address = address; - } - - uint64_t getBuilderAddress() const { return Address; } - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has had any PC adjustment made by the caller. If the getOpInfo() - /// function was set as part of the setupBuilderForSymbolicDisassembly() call - /// then that function is called to get any symbolic information at the - /// builder's Address for this instrution. If that returns non-zero then the - /// symbolic information it returns is used to create an MCExpr and that is - /// added as an operand to the MCInst. This function returns true if it adds - /// an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); - -}; - -} // namespace llvm - -#endif diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h deleted file mode 100644 index 834c6f6..0000000 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ /dev/null @@ -1,2459 +0,0 @@ -//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is part of the ARM Disassembler. -// It contains code for disassembling a Thumb instr. It is to be included by -// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm() -// function which acts as the dispatcher to disassemble a Thumb instruction. -// -//===----------------------------------------------------------------------===// - -/////////////////////////////// -// // -// Utility Functions // -// // -/////////////////////////////// - -// Utilities for 16-bit Thumb instructions. -/* -15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 - [ tRt ] - [ tRm ] [ tRn ] [ tRd ] - D [ Rm ] [ Rd ] - - [ imm3] - [ imm5 ] - i [ imm5 ] - [ imm7 ] - [ imm8 ] - [ imm11 ] - - [ cond ] -*/ - -// Extract tRt: Inst{10-8}. -static inline unsigned getT1tRt(uint32_t insn) { - return slice(insn, 10, 8); -} - -// Extract tRm: Inst{8-6}. -static inline unsigned getT1tRm(uint32_t insn) { - return slice(insn, 8, 6); -} - -// Extract tRn: Inst{5-3}. -static inline unsigned getT1tRn(uint32_t insn) { - return slice(insn, 5, 3); -} - -// Extract tRd: Inst{2-0}. -static inline unsigned getT1tRd(uint32_t insn) { - return slice(insn, 2, 0); -} - -// Extract [D:Rd]: Inst{7:2-0}. -static inline unsigned getT1Rd(uint32_t insn) { - return slice(insn, 7, 7) << 3 | slice(insn, 2, 0); -} - -// Extract Rm: Inst{6-3}. -static inline unsigned getT1Rm(uint32_t insn) { - return slice(insn, 6, 3); -} - -// Extract imm3: Inst{8-6}. -static inline unsigned getT1Imm3(uint32_t insn) { - return slice(insn, 8, 6); -} - -// Extract imm5: Inst{10-6}. -static inline unsigned getT1Imm5(uint32_t insn) { - return slice(insn, 10, 6); -} - -// Extract i:imm5: Inst{9:7-3}. -static inline unsigned getT1Imm6(uint32_t insn) { - return slice(insn, 9, 9) << 5 | slice(insn, 7, 3); -} - -// Extract imm7: Inst{6-0}. -static inline unsigned getT1Imm7(uint32_t insn) { - return slice(insn, 6, 0); -} - -// Extract imm8: Inst{7-0}. -static inline unsigned getT1Imm8(uint32_t insn) { - return slice(insn, 7, 0); -} - -// Extract imm11: Inst{10-0}. -static inline unsigned getT1Imm11(uint32_t insn) { - return slice(insn, 10, 0); -} - -// Extract cond: Inst{11-8}. -static inline unsigned getT1Cond(uint32_t insn) { - return slice(insn, 11, 8); -} - -static inline bool IsGPR(unsigned RegClass) { - return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID; -} - -// Utilities for 32-bit Thumb instructions. - -static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; } - -// Extract imm4: Inst{19-16}. -static inline unsigned getImm4(uint32_t insn) { - return slice(insn, 19, 16); -} - -// Extract imm3: Inst{14-12}. -static inline unsigned getImm3(uint32_t insn) { - return slice(insn, 14, 12); -} - -// Extract imm8: Inst{7-0}. -static inline unsigned getImm8(uint32_t insn) { - return slice(insn, 7, 0); -} - -// A8.6.61 LDRB (immediate, Thumb) and friends -// +/-: Inst{9} -// imm8: Inst{7-0} -static inline int decodeImm8(uint32_t insn) { - int Offset = getImm8(insn); - return slice(insn, 9, 9) ? Offset : -Offset; -} - -// Extract imm12: Inst{11-0}. -static inline unsigned getImm12(uint32_t insn) { - return slice(insn, 11, 0); -} - -// A8.6.63 LDRB (literal) and friends -// +/-: Inst{23} -// imm12: Inst{11-0} -static inline int decodeImm12(uint32_t insn) { - int Offset = getImm12(insn); - return slice(insn, 23, 23) ? Offset : -Offset; -} - -// Extract imm2: Inst{7-6}. -static inline unsigned getImm2(uint32_t insn) { - return slice(insn, 7, 6); -} - -// For BFI, BFC, t2SBFX, and t2UBFX. -// Extract lsb: Inst{14-12:7-6}. -static inline unsigned getLsb(uint32_t insn) { - return getImm3(insn) << 2 | getImm2(insn); -} - -// For BFI and BFC. -// Extract msb: Inst{4-0}. -static inline unsigned getMsb(uint32_t insn) { - return slice(insn, 4, 0); -} - -// For t2SBFX and t2UBFX. -// Extract widthminus1: Inst{4-0}. -static inline unsigned getWidthMinus1(uint32_t insn) { - return slice(insn, 4, 0); -} - -// For t2ADDri12 and t2SUBri12. -// imm12 = i:imm3:imm8; -static inline unsigned getIImm3Imm8(uint32_t insn) { - return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn); -} - -// For t2MOVi16 and t2MOVTi16. -// imm16 = imm4:i:imm3:imm8; -static inline unsigned getImm16(uint32_t insn) { - return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 | - getImm3(insn) << 8 | getImm8(insn); -} - -// Inst{5-4} encodes the shift type. -static inline unsigned getShiftTypeBits(uint32_t insn) { - return slice(insn, 5, 4); -} - -// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount. -static inline unsigned getShiftAmtBits(uint32_t insn) { - return getImm3(insn) << 2 | getImm2(insn); -} - -// A8.6.17 BFC -// Encoding T1 ARMv6T2, ARMv7 -// LLVM-specific encoding for #<lsb> and #<width> -static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) { - uint32_t lsb = getImm3(insn) << 2 | getImm2(insn); - uint32_t msb = getMsb(insn); - uint32_t Val = 0; - if (msb < lsb) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - for (uint32_t i = lsb; i <= msb; ++i) - Val |= (1 << i); - mask = ~Val; - return true; -} - -// A8.4 Shifts applied to a register -// A8.4.1 Constant shifts -// A8.4.3 Pseudocode details of instruction-specified shifts and rotates -// -// decodeImmShift() returns the shift amount and the the shift opcode. -// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet. -static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5, - ARM_AM::ShiftOpc &ShOp) { - - assert(imm5 < 32 && "Invalid imm5 argument"); - switch (bits2) { - default: assert(0 && "No such value"); - case 0: - ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl); - return imm5; - case 1: - ShOp = ARM_AM::lsr; - return (imm5 == 0 ? 32 : imm5); - case 2: - ShOp = ARM_AM::asr; - return (imm5 == 0 ? 32 : imm5); - case 3: - ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror); - return (imm5 == 0 ? 1 : imm5); - } -} - -// A6.3.2 Modified immediate constants in Thumb instructions -// -// ThumbExpandImm() returns the modified immediate constant given an imm12 for -// Thumb data-processing instructions with modified immediate. -// See also A6.3.1 Data-processing (modified immediate). -static inline unsigned ThumbExpandImm(unsigned imm12) { - assert(imm12 <= 0xFFF && "Invalid imm12 argument"); - - // If the leading two bits is 0b00, the modified immediate constant is - // obtained by splatting the low 8 bits into the first byte, every other byte, - // or every byte of a 32-bit value. - // - // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is - // performed. - - if (slice(imm12, 11, 10) == 0) { - unsigned short control = slice(imm12, 9, 8); - unsigned imm8 = slice(imm12, 7, 0); - switch (control) { - default: - assert(0 && "No such value"); - return 0; - case 0: - return imm8; - case 1: - return imm8 << 16 | imm8; - case 2: - return imm8 << 24 | imm8 << 8; - case 3: - return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8; - } - } else { - // A rotate is required. - unsigned Val = 1 << 7 | slice(imm12, 6, 0); - unsigned Amt = slice(imm12, 11, 7); - return ARM_AM::rotr32(Val, Amt); - } -} - -static inline int decodeImm32_B_EncodingT3(uint32_t insn) { - bool S = slice(insn, 26, 26); - bool J1 = slice(insn, 13, 13); - bool J2 = slice(insn, 11, 11); - unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm21 |= 1 << 20; - if (J2) Imm21 |= 1 << 19; - if (J1) Imm21 |= 1 << 18; - - return SignExtend32<21>(Imm21); -} - -static inline int decodeImm32_B_EncodingT4(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -static inline int decodeImm32_BL(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -static inline int decodeImm32_BLX(uint32_t insn) { - unsigned S = slice(insn, 26, 26); - bool I1 = slice(insn, 13, 13) == S; - bool I2 = slice(insn, 11, 11) == S; - unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2; - if (S) Imm25 |= 1 << 24; - if (I1) Imm25 |= 1 << 23; - if (I2) Imm25 |= 1 << 22; - - return SignExtend32<25>(Imm25); -} - -// See, for example, A8.6.221 SXTAB16. -static inline unsigned decodeRotate(uint32_t insn) { - unsigned rotate = slice(insn, 5, 4); - return rotate << 3; -} - -/////////////////////////////////////////////// -// // -// Thumb1 instruction disassembly functions. // -// // -/////////////////////////////////////////////// - -// See "Utilities for 16-bit Thumb instructions" for register naming convention. - -// A6.2.1 Shift (immediate), add, subtract, move, and compare -// -// shift immediate: tRd CPSR tRn imm5 -// add/sub register: tRd CPSR tRn tRm -// add/sub 3-bit immediate: tRd CPSR tRn imm3 -// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8 -// mov/cmp immediate: tRt [CPSR] imm8 (CPSR present for mov) -// -// Special case: -// tMOVSr: tRd tRn -static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID - && "Invalid arguments"); - - bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3); - - // Use Rt implies use imm8. - bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 || - Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::tGPRRegClassID, - UseRt ? getT1tRt(insn) : getT1tRd(insn)))); - ++OpIdx; - - // Check whether the next operand to be added is a CCR Register. - if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { - assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); - ++OpIdx; - } - - // Check whether the next operand to be added is a Thumb1 Register. - assert(OpIdx < NumOps && "More operands expected"); - if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // For UseRt, the reg operand is tied to the first reg operand. - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::tGPRRegClassID, - UseRt ? getT1tRt(insn) : getT1tRn(insn)))); - ++OpIdx; - } - - // Special case for tMOVSr. - if (OpIdx == NumOps) - return true; - - // The next available operand is either a reg operand or an imm operand. - if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // Three register operand instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - } else { - assert(OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - unsigned Imm = 0; - if (UseRt) - Imm = getT1Imm8(insn); - else if (Imm3) - Imm = getT1Imm3(insn); - else { - Imm = getT1Imm5(insn); - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11)); - getImmShiftSE(ShOp, Imm); - } - MI.addOperand(MCOperand::CreateImm(Imm)); - } - ++OpIdx; - - return true; -} - -// A6.2.2 Data-processing -// -// tCMPr, tTST, tCMN: tRd tRn -// tMVN, tRSB: tRd CPSR tRn -// Others: tRd CPSR tRd(TIED_TO) tRn -static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == ARM::CCRRegClassID - || OpInfo[1].RegClass == ARM::tGPRRegClassID) - && "Invalid arguments"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - ++OpIdx; - - // Check whether the next operand to be added is a CCR Register. - if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { - assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); - ++OpIdx; - } - - // We have either { tRd(TIED_TO), tRn } or { tRn } remaining. - // Process the TIED_TO operand first. - - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // Process possible next reg operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { - // Add tRn operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - ++OpIdx; - } - - return true; -} - -// A6.2.3 Special data instructions and branch and exchange -// -// tADDhirr: Rd Rd(TIED_TO) Rm -// tCMPhir: Rd Rm -// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn -// tBX: Rm -// tBX_RET: 0 operand -// tBX_RET_vararg: Rm -// tBLXr_r9: Rm -// tBRIND: Rm -static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // tBX_RET has 0 operand. - if (NumOps == 0) - return true; - - // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm. - if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX || Opcode==ARM::tBRIND) { - if (Opcode == ARM::tBLXr_r9) { - // Handling the two predicate operands before the reg operand. - if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - return false; - NumOpsAdded += 2; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - getT1Rm(insn)))); - NumOpsAdded += 1; - - if (Opcode == ARM::tBX) { - // Handling the two predicate operands after the reg operand. - if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - return false; - NumOpsAdded += 2; - } - - return true; - } - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - // Add the destination operand. - unsigned RegClass = OpInfo[OpIdx].RegClass; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, - IsGPR(RegClass) ? getT1Rd(insn) - : getT1tRd(insn)))); - ++OpIdx; - - // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining. - // Process the TIED_TO operand first. - - assert(OpIdx < NumOps && "More operands expected"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } - - // The next reg operand is either Rm or tRn. - assert(OpIdx < NumOps && "More operands expected"); - RegClass = OpInfo[OpIdx].RegClass; - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, - IsGPR(RegClass) ? getT1Rm(insn) - : getT1tRn(insn)))); - ++OpIdx; - - return true; -} - -// A8.6.59 LDR (literal) -// -// tLDRpci: tRt imm8*4 -static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 && - !OpInfo[1].isPredicate() && - !OpInfo[1].isOptionalDef()) - && "Invalid arguments"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - - // And the (imm8 << 2) operand. - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2)); - - NumOpsAdded = 2; - - return true; -} - -// Thumb specific addressing modes (see ARMInstrThumb.td): -// -// t_addrmode_rr := reg + reg -// -// t_addrmode_s4 := reg + reg -// reg + imm5 * 4 -// -// t_addrmode_s2 := reg + reg -// reg + imm5 * 2 -// -// t_addrmode_s1 := reg + reg -// reg + imm5 -// -// t_addrmode_sp := sp + imm8 * 4 -// - -// A8.6.63 LDRB (literal) -// A8.6.79 LDRSB (literal) -// A8.6.75 LDRH (literal) -// A8.6.83 LDRSH (literal) -// A8.6.59 LDR (literal) -// -// These instrs calculate an address from the PC value and an immediate offset. -// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) -static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass < 0 && - "Expect >= 2 operands, first as reg, and second as imm operand"); - - // Build the register operand, followed by the (+/-)imm12 immediate. - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); - - NumOpsAdded = 2; - - return true; -} - - -// A6.2.4 Load/store single data item -// -// Load/Store Register (reg|imm): tRd tRn imm5|tRm -// Load Register Signed Byte|Halfword: tRd tRn tRm -static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::tGPRRegClassID - && OpInfo[1].RegClass == ARM::tGPRRegClassID - && "Expect >= 2 operands and first two as thumb reg operands"); - - // Add the destination reg and the base reg. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - OpIdx = 2; - - // We have either { imm5 } or { tRm } remaining. - // Note that STR/LDR (register) should skip the imm5 offset operand for - // t_addrmode_s[1|2|4]. - - assert(OpIdx < NumOps && "More operands expected"); - - if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && - !OpInfo[OpIdx].isOptionalDef()) { - // Table A6-5 16-bit Thumb Load/store instructions - // opA = 0b0101 for STR/LDR (register) and friends. - // Otherwise, we have STR/LDR (immediate) and friends. - assert(opA != 5 && "Immediate operand expected for this opcode"); - MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn))); - ++OpIdx; - } else { - // The next reg operand is tRm, the offset. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - ++OpIdx; - } - return true; -} - -// A6.2.4 Load/store single data item -// -// Load/Store Register SP relative: tRt ARM::SP imm8 -static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) - && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::tGPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass < 0 && - !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 3; - return true; -} - -// Table A6-1 16-bit Thumb instruction encoding -// A8.6.10 ADR -// -// tADDrPCi: tRt imm8 -static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 && - !OpInfo[1].isPredicate() && - !OpInfo[1].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 2; - return true; -} - -// Table A6-1 16-bit Thumb instruction encoding -// A8.6.8 ADD (SP plus immediate) -// -// tADDrSPi: tRt ARM::SP imm8 -static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::tGPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass < 0 && - !OpInfo[2].isPredicate() && - !OpInfo[2].isOptionalDef()) - && "Invalid arguments"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRt(insn)))); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); - NumOpsAdded = 3; - return true; -} - -// tPUSH, tPOP: Pred-Imm Pred-CCR register_list -// -// where register_list = low registers + [lr] for PUSH or -// low registers + [pc] for POP -// -// "low registers" is specified by Inst{7-0} -// lr|pc is specified by Inst{8} -static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode"); - - unsigned &OpIdx = NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - OpIdx += 2; - else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15) - | slice(insn, 7, 0); - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 16; ++i) { - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++OpIdx; - } - } - - return true; -} - -// A6.2.5 Miscellaneous 16-bit instructions -// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP. -// -// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7 -// t2IT: firstcond=Inst{7-4} mask=Inst{3-0} -// tCBNZ, tCBZ: tRd imm6*2 -// tBKPT: imm8 -// tNOP, tSEV, tYIELD, tWFE, tWFI: -// no operand (except predicate pair) -// tSETENDBE, tSETENDLE, : -// no operand -// Others: tRd tRn -static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (NumOps == 0) - return true; - - if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) - return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - // Predicate operands are handled elsewhere. - if (NumOps == 2 && - OpInfo[0].isPredicate() && OpInfo[1].isPredicate() && - OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { - return true; - } - - if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) { - // Special case handling for tADDspi and tSUBspi. - // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate) - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateReg(ARM::SP)); - MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn))); - NumOpsAdded = 3; - return true; - } - - if (Opcode == ARM::t2IT) { - // Special case handling for If-Then. - // A8.6.50 IT - // Tag the (firstcond[0] bit << 4) along with mask. - - // firstcond - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4))); - - // firstcond[0] and mask - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - NumOpsAdded = 2; - return true; - } - - if (Opcode == ARM::tBKPT) { - MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value - NumOpsAdded = 1; - return true; - } - - // CPS has a singleton $opt operand that contains the following information: - // The first op would be 0b10 as enable and 0b11 as disable in regular ARM, - // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's - // default one. The second get the AIF flags from Inst{2-0}. - if (Opcode == ARM::tCPS) { - MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0))); - NumOpsAdded = 2; - return true; - } - - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) - && "Expect >=2 operands"); - - // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRd(insn)))); - - if (OpInfo[1].RegClass == ARM::tGPRRegClassID) { - // Two register instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRn(insn)))); - } else { - // CBNZ, CBZ - assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode"); - MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2)); - } - - NumOpsAdded = 2; - - return true; -} - -// A8.6.53 LDM / LDMIA -// A8.6.189 STM / STMIA -// -// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list -// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list -static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, - unsigned &NumOpsAdded, BO B) { - assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD || - Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode"); - - unsigned tRt = getT1tRt(insn); - NumOpsAdded = 0; - - // WB register, if necessary. - if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - tRt))); - ++NumOpsAdded; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - tRt))); - ++NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { - NumOpsAdded += 2; - } else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = slice(insn, 7, 0); - if (BitCount(RegListBits) < 1) { - DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n"); - return false; - } - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 8; ++i) - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, - i))); - ++NumOpsAdded; - } - - return true; -} - -static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded, - B); -} - -static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded, - B); -} - -// A8.6.16 B Encoding T1 -// cond = Inst{11-8} & imm8 = Inst{7-0} -// imm32 = SignExtend(imm8:'0', 32) -// -// tBcc: offset Pred-Imm Pred-CCR -// tSVC: imm8 Pred-Imm Pred-CCR -// tTRAP: 0 operand (early return) -static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - if (Opcode == ARM::tTRAP) - return true; - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps == 3 && OpInfo[0].RegClass < 0 && - OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID - && "Exactly 3 operands expected"); - - unsigned Imm8 = getT1Imm8(insn); - MI.addOperand(MCOperand::CreateImm( - Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1) - : (int)Imm8)); - - // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier(). - // But note that for tBcc, if cond = '1110' then UNDEFINED. - if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) { - DEBUG(errs() << "if cond = '1110' then UNDEFINED\n"); - return false; - } - NumOpsAdded = 1; - - return true; -} - -// A8.6.16 B Encoding T2 -// imm11 = Inst{10-0} -// imm32 = SignExtend(imm11:'0', 32) -// -// tB: offset -static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); - - unsigned Imm11 = getT1Imm11(insn); - - MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1))); - - NumOpsAdded = 1; - - return true; - -} - -// See A6.2 16-bit Thumb instruction encoding for instruction classes -// corresponding to op. -// -// Table A6-1 16-bit Thumb instruction encoding (abridged) -// op Instruction or instruction class -// ------ -------------------------------------------------------------------- -// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7 -// 010000 Data-processing on page A6-8 -// 010001 Special data instructions and branch and exchange on page A6-9 -// 01001x Load from Literal Pool, see LDR (literal) on page A8-122 -// 0101xx Load/store single data item on page A6-10 -// 011xxx -// 100xxx -// 10100x Generate PC-relative address, see ADR on page A8-32 -// 10101x Generate SP-relative address, see ADD (SP plus immediate) on -// page A8-28 -// 1011xx Miscellaneous 16-bit instructions on page A6-11 -// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374 -// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a -// 1101xx Conditional branch, and Supervisor Call on page A6-13 -// 11100x Unconditional Branch, see B on page A8-44 -// -static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - unsigned op1 = slice(op, 5, 4); - unsigned op2 = slice(op, 3, 2); - unsigned op3 = slice(op, 1, 0); - unsigned opA = slice(op, 5, 2); - switch (op1) { - case 0: - // A6.2.1 Shift (immediate), add, subtract, move, and compare - return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 1: - switch (op2) { - case 0: - switch (op3) { - case 0: - // A6.2.2 Data-processing - return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 1: - // A6.2.3 Special data instructions and branch and exchange - return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - default: - // A8.6.59 LDR (literal) - return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - break; - default: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, - B); - break; - } - break; - case 2: - switch (op2) { - case 0: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, - B); - case 1: - // A6.2.4 Load/store single data item - return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 2: - if (op3 <= 1) { - // A8.6.10 ADR - return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } else { - // A8.6.8 ADD (SP plus immediate) - return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - default: - // A6.2.5 Miscellaneous 16-bit instructions - return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - break; - case 3: - switch (op2) { - case 0: - if (op3 <= 1) { - // A8.6.189 STM / STMIA / STMEA - return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // A8.6.53 LDM / LDMIA / LDMFD - return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - case 1: - // A6.2.6 Conditional branch, and Supervisor Call - return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B); - case 2: - // Unconditional Branch, see B on page A8-44 - return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B); - default: - assert(0 && "Unreachable code"); - break; - } - break; - default: - assert(0 && "Unreachable code"); - break; - } - - return false; -} - -/////////////////////////////////////////////// -// // -// Thumb2 instruction disassembly functions. // -// // -/////////////////////////////////////////////// - -/////////////////////////////////////////////////////////// -// // -// Note: the register naming follows the ARM convention! // -// // -/////////////////////////////////////////////////////////// - -static inline bool Thumb2SRSOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2SRSDBW: case ARM::t2SRSDB: - case ARM::t2SRSIAW: case ARM::t2SRSIA: - return true; - } -} - -static inline bool Thumb2RFEOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2RFEDBW: case ARM::t2RFEDB: - case ARM::t2RFEIAW: case ARM::t2RFEIA: - return true; - } -} - -// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0} -static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); - NumOpsAdded = 1; - return true; -} - -// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn -static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - unsigned Rn = decodeRn(insn); - if (Rn == 15) { - DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); - return false; - } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn))); - NumOpsAdded = 1; - return true; -} - -static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (Thumb2SRSOpcode(Opcode)) - return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded); - - if (Thumb2RFEOpcode(Opcode)) - return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD || - Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD || - Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || - Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) - && "Unexpected opcode"); - assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4"); - - NumOpsAdded = 0; - - unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); - - // Writeback to base. - if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD || - Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) { - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - } - - MI.addOperand(MCOperand::CreateReg(Base)); - ++NumOpsAdded; - - // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { - NumOpsAdded += 2; - } else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - - unsigned RegListBits = insn & ((1 << 16) - 1); - - // Fill the variadic part of reglist. - for (unsigned i = 0; i < 16; ++i) - if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - i))); - ++NumOpsAdded; - } - - return true; -} - -// t2LDREX: Rd Rn -// t2LDREXD: Rd Rs Rn -// t2LDREXB, t2LDREXH: Rd Rn -// t2STREX: Rs Rd Rn -// t2STREXD: Rm Rd Rs Rn -// t2STREXB, t2STREXH: Rm Rd Rn -static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 - && OpInfo[0].RegClass > 0 - && OpInfo[1].RegClass > 0 - && "Expect >=2 operands and first two as reg operands"); - - bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH); - bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX); - bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD); - - unsigned Rt = decodeRd(insn); - unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX. - unsigned Rd = decodeRm(insn); - unsigned Rn = decodeRn(insn); - - // Some sanity checking first. - if (isStore) { - // if d == n || d == t then UNPREDICTABLE - // if d == n || d == t || d == t2 then UNPREDICTABLE - if (isDW) { - if (Rd == Rn || Rd == Rt || Rd == Rt2) { - DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n"); - return false; - } - } else { - if (isSW) { - if (Rt2 == Rn || Rt2 == Rt) { - DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); - return false; - } - } else { - if (Rd == Rn || Rd == Rt) { - DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); - return false; - } - } - } - } else { - // Load - // A8.6.71 LDREXD - // if t == t2 then UNPREDICTABLE - if (isDW && Rt == Rt2) { - DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); - return false; - } - } - - // Add the destination operand for store. - if (isStore) { - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[OpIdx].RegClass, - isSW ? Rt2 : Rd))); - ++OpIdx; - } - - // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - Rt))); - ++OpIdx; - - // Thumb2 doubleword complication: with an extra source/destination operand. - if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - Rt2))); - ++OpIdx; - } - - // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - Rn))); - ++OpIdx; - - return true; -} - -// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode) -// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version) -// t2STRDi8: Rd Rs Rn imm8s4 (offset mode) -// -// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for -// disassembly only and do not have a tied_to writeback base register operand. -static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 4 - && OpInfo[0].RegClass > 0 - && OpInfo[0].RegClass == OpInfo[1].RegClass - && OpInfo[2].RegClass > 0 - && OpInfo[3].RegClass < 0 - && "Expect >= 4 operands and first 3 as reg operands"); - - // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1). - unsigned Rt = decodeRd(insn); - unsigned Rt2 = decodeRs(insn); - unsigned Rn = decodeRn(insn); - - // Some sanity checking first. - - // A8.6.67 LDRD (literal) has its W bit as (0). - if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) { - if (Rn == 15 && slice(insn, 21, 21) != 0) - return false; - } else { - // For Dual Store, PC cannot be used as the base register. - if (Rn == 15) { - DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); - return false; - } - } - if (Rt == Rt2) { - DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); - return false; - } - if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) { - if (Rn == Rt || Rn == Rt2) { - DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n"); - return false; - } - } - - // Add the <Rt> <Rt2> operands. - unsigned RegClassPair = OpInfo[0].RegClass; - unsigned RegClassBase = OpInfo[2].RegClass; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, - decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, - decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase, - decodeRn(insn)))); - - // Finally add (+/-)imm8*4, depending on the U bit. - int Offset = getImm8(insn) * 4; - if (getUBit(insn) == 0) - Offset = -Offset; - MI.addOperand(MCOperand::CreateImm(Offset)); - NumOpsAdded = 4; - - return true; -} - -// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR -static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert(NumOps >= 2 && "Expect >= 2 operands"); - - // The generic version of TBB/TBH needs a base register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - // Add the index register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - NumOpsAdded = 2; - - return true; -} - -static inline bool Thumb2ShiftOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2MOVCClsl: case ARM::t2MOVCClsr: - case ARM::t2MOVCCasr: case ARM::t2MOVCCror: - case ARM::t2LSLri: case ARM::t2LSRri: - case ARM::t2ASRri: case ARM::t2RORri: - return true; - } -} - -// A6.3.11 Data-processing (shifted register) -// -// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm -// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm -// Three register operands: Rs Rn Rm -// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm -// -// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 -// register with shift forms: (Rm, ConstantShiftSpecifier). -// Constant shift specifier: Imm = (ShOp | ShAmt<<3). -// -// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which -// only require two register operands: Rd, Rm in ARM Reference Manual terms, and -// nothing else, because the shift amount is already specified. -// Similar case holds for t2MOVrx, t2ADDrr, ..., etc. -static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - // Special case handling. - if (Opcode == ARM::t2BR_JT) { - assert(NumOps == 4 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass < 0 - && OpInfo[3].RegClass < 0 - && "Exactly 4 operands expect and first two as reg operands"); - // Only need to populate the src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(0)); - MI.addOperand(MCOperand::CreateImm(0)); - MI.addOperand(MCOperand::CreateImm(0)); - NumOpsAdded = 4; - return true; - } - - OpIdx = 0; - - assert(NumOps >= 2 - && (OpInfo[0].RegClass == ARM::GPRRegClassID || - OpInfo[0].RegClass == ARM::rGPRRegClassID) - && (OpInfo[1].RegClass == ARM::GPRRegClassID || - OpInfo[1].RegClass == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first two as reg operands"); - - bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID || - OpInfo[2].RegClass == ARM::rGPRRegClassID)); - bool NoDstReg = (decodeRs(insn) == 0xF); - - // Build the register operands, followed by the constant shift specifier. - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, OpInfo[0].RegClass, - NoDstReg ? decodeRn(insn) : decodeRs(insn)))); - ++OpIdx; - - if (ThreeReg) { - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Process tied_to operand constraint. - MI.addOperand(MI.getOperand(Idx)); - ++OpIdx; - } else if (!NoDstReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass, - decodeRn(insn)))); - ++OpIdx; - } else { - DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n"); - return false; - } - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRm(insn)))); - ++OpIdx; - - if (NumOps == OpIdx) - return true; - - if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef()) { - - if (Thumb2ShiftOpcode(Opcode)) { - unsigned Imm = getShiftAmtBits(insn); - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4)); - getImmShiftSE(ShOp, Imm); - MI.addOperand(MCOperand::CreateImm(Imm)); - } else { - // Build the constant shift specifier operand. - unsigned bits2 = getShiftTypeBits(insn); - unsigned imm5 = getShiftAmtBits(insn); - ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift; - unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); - } - ++OpIdx; - } - - return true; -} - -// A6.3.1 Data-processing (modified immediate) -// -// Two register operands: Rs Rn ModImm -// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm -// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - -// {t2MOVi, t2MVNi} -// -// ModImm = ThumbExpandImm(i:imm3:imm8) -static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RdRegClassID = OpInfo[0].RegClass; - assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || - RdRegClassID == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first one as reg operand"); - - unsigned RnRegClassID = OpInfo[1].RegClass; - bool TwoReg = (RnRegClassID == ARM::GPRRegClassID - || RnRegClassID == ARM::rGPRRegClassID); - bool NoDstReg = (decodeRs(insn) == 0xF); - - // Build the register operands, followed by the modified immediate. - - MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RdRegClassID, - NoDstReg ? decodeRn(insn) : decodeRs(insn)))); - ++OpIdx; - - if (TwoReg) { - if (NoDstReg) { - DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); - return false; - } - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // The reg operand is tied to the first reg operand. - MI.addOperand(MI.getOperand(Idx)); - } else { - // Add second reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - } - ++OpIdx; - } - - // The modified immediate operand should come next. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - // i:imm3:imm8 - // A6.3.2 Modified immediate constants in Thumb instructions - unsigned imm12 = getIImm3Imm8(insn); - MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12))); - ++OpIdx; - - return true; -} - -static inline bool Thumb2SaturateOpcode(unsigned Opcode) { - switch (Opcode) { - case ARM::t2SSAT: case ARM::t2SSAT16: - case ARM::t2USAT: case ARM::t2USAT16: - return true; - default: - return false; - } -} - -/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions: -/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt -/// o t2SSAT16, t2USAT16: Rs sat_pos Rn -static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned &NumOpsAdded, BO B) { - const MCInstrDesc &MCID = ARMInsts[Opcode]; - NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands - - // Disassemble the register def. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - unsigned Pos = slice(insn, 4, 0); - if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16) - Pos += 1; - MI.addOperand(MCOperand::CreateImm(Pos)); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - if (NumOpsAdded == 4) { - ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ? - ARM_AM::asr : ARM_AM::lsl); - // Inst{14-12:7-6} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); - if (ShAmt == 0) { - if (Opc == ARM_AM::asr) - ShAmt = 32; - else - Opc = ARM_AM::no_shift; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); - } - return true; -} - -// A6.3.3 Data-processing (plain binary immediate) -// -// o t2ADDri12, t2SUBri12: Rs Rn imm12 -// o t2LEApcrel (ADR): Rs imm12 -// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm -// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm -// o t2MOVi16: Rs imm16 -// o t2MOVTi16: Rs imm16 -// o t2SBFX (SBFX): Rs Rn lsb width -// o t2UBFX (UBFX): Rs Rn lsb width -// o t2BFI (BFI): Rs Rn lsb width -static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - unsigned RdRegClassID = OpInfo[0].RegClass; - assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || - RdRegClassID == ARM::rGPRRegClassID) - && "Expect >= 2 operands and first one as reg operand"); - - unsigned RnRegClassID = OpInfo[1].RegClass; - bool TwoReg = (RnRegClassID == ARM::GPRRegClassID - || RnRegClassID == ARM::rGPRRegClassID); - - // Build the register operand(s), followed by the immediate(s). - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID, - decodeRs(insn)))); - ++OpIdx; - - if (TwoReg) { - assert(NumOps >= 3 && "Expect >= 3 operands"); - int Idx; - if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { - // Process tied_to operand constraint. - MI.addOperand(MI.getOperand(Idx)); - } else { - // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - } - ++OpIdx; - } - - if (Opcode == ARM::t2BFI) { - // Add val reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); - ++OpIdx; - } - - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - // Pre-increment OpIdx. - ++OpIdx; - - if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 - || Opcode == ARM::t2LEApcrel) - MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { - if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { - uint32_t mask = 0; - if (getBitfieldInvMask(insn, mask)) - MI.addOperand(MCOperand::CreateImm(mask)); - else - return false; - } else { - // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) - && "Unexpected opcode"); - MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); - - ++OpIdx; - } - - return true; -} - -// A6.3.4 Table A6-15 Miscellaneous control instructions -// A8.6.41 DMB -// A8.6.42 DSB -// A8.6.49 ISB -static inline bool t2MiscCtrlInstr(uint32_t insn) { - if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 && - slice(insn, 12, 12) == 0) - return true; - - return false; -} - -// A6.3.4 Branches and miscellaneous control -// -// A8.6.16 B -// Branches: t2B, t2Bcc -> imm operand -// -// Branches: t2TPsoft -> no operand -// -// A8.6.23 BL, BLX (immediate) -// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand -// -// A8.6.26 -// t2BXJ -> Rn -// -// Miscellaneous control: -// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) -// -// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV -// -> no operand (except pred-imm pred-ccr) -// -// t2DBG -> imm4 = Inst{3-0} -// -// t2MRS/t2MRSsys -> Rs -// t2MSR/t2MSRsys -> Rn mask=Inst{11-8} -// t2SMC -> imm4 = Inst{19-16} -static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - if (NumOps == 0) - return true; - - if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) { - // Inst{3-0} encodes the memory barrier option for the variants. - unsigned opt = slice(insn, 3, 0); - switch (opt) { - case ARM_MB::SY: case ARM_MB::ST: - case ARM_MB::ISH: case ARM_MB::ISHST: - case ARM_MB::NSH: case ARM_MB::NSHST: - case ARM_MB::OSH: case ARM_MB::OSHST: - MI.addOperand(MCOperand::CreateImm(opt)); - NumOpsAdded = 1; - return true; - default: - return false; - } - } - - if (t2MiscCtrlInstr(insn)) - return true; - - switch (Opcode) { - case ARM::t2CLREX: - case ARM::t2NOP: - case ARM::t2YIELD: - case ARM::t2WFE: - case ARM::t2WFI: - case ARM::t2SEV: - return true; - default: - break; - } - - // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different - // opcodes which match the same real instruction. This is needed since there's - // no current handling of optional arguments. Fix here when a better handling - // of optional arguments is implemented. - if (Opcode == ARM::t2CPS3p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 3; - return true; - } - if (Opcode == ARM::t2CPS2p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod - MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags - NumOpsAdded = 2; - return true; - } - if (Opcode == ARM::t2CPS1p) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode - NumOpsAdded = 1; - return true; - } - - // DBG has its option specified in Inst{3-0}. - if (Opcode == ARM::t2DBG) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; - } - - // MRS and MRSsys take one GPR reg Rs. - if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); - NumOpsAdded = 1; - return true; - } - // BXJ takes one GPR reg Rn. - if (Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 1; - return true; - } - // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in - // bit 4, and the special register fields in bits 3-0. - if (Opcode == ARM::t2MSR) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ | - slice(insn, 11, 8) /* Special Reg */)); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - NumOpsAdded = 2; - return true; - } - // SMC take imm4. - if (Opcode == ARM::t2SMC) { - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); - NumOpsAdded = 1; - return true; - } - - // Some instructions have predicate operands first before the immediate. - if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) { - // Handling the two predicate operands before the imm operand. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - NumOpsAdded += 2; - else { - DEBUG(errs() << "Expected predicate operands not found.\n"); - return false; - } - } - - // Add the imm operand. - int Offset = 0; - - switch (Opcode) { - default: - assert(0 && "Unexpected opcode"); - return false; - case ARM::t2B: - Offset = decodeImm32_B_EncodingT4(insn); - break; - case ARM::t2Bcc: - Offset = decodeImm32_B_EncodingT3(insn); - break; - case ARM::tBLr9: - Offset = decodeImm32_BL(insn); - break; - case ARM::tBLXi_r9: - Offset = decodeImm32_BLX(insn); - break; - } - - if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) - MI.addOperand(MCOperand::CreateImm(Offset)); - - // This is an increment as some predicate operands may have been added first. - NumOpsAdded += 1; - - return true; -} - -static inline bool Thumb2PreloadOpcode(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWs: - case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIs: - return true; - } -} - -static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - // Preload Data/Instruction requires either 2 or 3 operands. - // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8 - // t2PLDr: Rn Rm - // t2PLDs: Rn Rm imm2=Inst{5-4} - // Same pattern applies for t2PLDW* and t2PLI*. - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - "Expect >= 2 operands and first one as reg operand"); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - ++OpIdx; - - if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - } else { - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - int Offset = 0; - if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || - Opcode == ARM::t2PLIi8) { - // A8.6.117 Encoding T2: add = FALSE - unsigned Imm8 = getImm8(insn); - Offset = -1 * Imm8; - } else { - // The i12 forms. See, for example, A8.6.117 Encoding T1. - // Note that currently t2PLDi12 also handles the previously named t2PLDpci - // opcode, that's why we use decodeImm12(insn) which returns +/- imm12. - Offset = decodeImm12(insn); - } - MI.addOperand(MCOperand::CreateImm(Offset)); - } - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && - !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs. - MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4))); - ++OpIdx; - } - - return true; -} - -static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load, - unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) { - - // Inst{22-21} encodes the data item transferred for load/store. - // For single word, it is encoded as ob10. - bool Word = (slice(insn, 22, 21) == 2); - bool Half = (slice(insn, 22, 21) == 1); - bool Byte = (slice(insn, 22, 21) == 0); - - if (UseRm && BadReg(R2)) { - DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n"); - return true; - } - - if (Load) { - if (!Word && R0 == 13) { - DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n"); - return true; - } - if (Byte) { - if (WB && R0 == 15 && slice(insn, 10, 8) == 3) { - // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0) - DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); - return true; - } - } - // A6.3.8 Load halfword, memory hints - if (Half) { - if (WB) { - if (R0 == R1) { - // A8.6.82 LDRSH (immediate) Encoding T2 - DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n"); - return true; - } - if (R0 == 15 && slice(insn, 10, 8) == 3) { - // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0) - DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); - return true; - } - } else { - if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) { - if (R0 == 15 && slice(insn, 10, 8) == 4) { - // A8.6.82 LDRSH (immediate) Encoding T2 - DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE" - << " \"Unallocated memory hints\"\n"); - return true; - } - } else { - if (R0 == 15) { - // A8.6.82 LDRSH (immediate) Encoding T1 - DEBUG(errs() << "if Rt == '1111' then SEE" - << " \"Unallocated memory hints\"\n"); - return true; - } - } - } - } - } else { - if (WB && R0 == R1) { - DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); - return true; - } - if ((WB && R0 == 15) || (!WB && R1 == 15)) { - DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n"); - return true; - } - if (Word) { - if ((WB && R1 == 15) || (!WB && R0 == 15)) { - DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); - return true; - } - } else { - if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) { - DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n"); - return true; - } - } - } - return false; -} - -// A6.3.10 Store single data item -// A6.3.9 Load byte, memory hints -// A6.3.8 Load halfword, memory hints -// A6.3.7 Load word -// -// For example, -// -// t2LDRi12: Rd Rn (+)imm12 -// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also -// DisassembleThumb2DPSoReg) -// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// -// t2STRi12: Rd Rn (+)imm12 -// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also -// DisassembleThumb2DPSoReg) -// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) -// -// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated -// correctly, as LLVM AsmPrinter depends on it. For indexed stores, the first -// operand is Rn; for all the other instructions, Rd is the first operand. -// -// Delegates to DisassembleThumb2PreLoad() for preload data/instruction. -// Delegates to DisassembleThumb2Ldpci() for load * literal operations. -static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - unsigned Rn = decodeRn(insn); - - if (Thumb2PreloadOpcode(Opcode)) - return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B); - - // See, for example, A6.3.7 Load word: Table A6-18 Load word. - if (Load && Rn == 15) - return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 3 && - OpInfo[0].RegClass > 0 && - OpInfo[1].RegClass > 0 && - "Expect >= 3 operands and first two as reg operands"); - - bool ThreeReg = (OpInfo[2].RegClass > 0); - bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1; - bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td - - // Build the register operands, followed by the immediate. - unsigned R0 = 0, R1 = 0, R2 = 0; - unsigned Rd = decodeRd(insn); - int Imm = 0; - - if (!Load && TIED_TO) { - R0 = Rn; - R1 = Rd; - } else { - R0 = Rd; - R1 = Rn; - } - if (ThreeReg) { - if (TIED_TO) { - R2 = Rn; - Imm = decodeImm8(insn); - } else { - R2 = decodeRm(insn); - // See, for example, A8.6.64 LDRB (register). - // And ARMAsmPrinter::printT2AddrModeSoRegOperand(). - // LSL is the default shift opc, and LLVM does not expect it to be encoded - // as part of the immediate operand. - // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4)); - Imm = slice(insn, 5, 4); - } - } else { - if (Imm12) - Imm = getImm12(insn); - else - Imm = decodeImm8(insn); - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - R0))); - ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - R1))); - ++OpIdx; - - if (ThreeReg) { - // This could be an offset register or a TIED_TO register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - R2))); - ++OpIdx; - } - - if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO, - TIED_TO)) - return false; - - assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() - && !OpInfo[OpIdx].isOptionalDef() - && "Pure imm operand expected"); - - MI.addOperand(MCOperand::CreateImm(Imm)); - ++OpIdx; - - return true; -} - -// A6.3.12 Data-processing (register) -// -// Two register operands [rotate]: Rs Rm [rotation(= (rotate:'000'))] -// Three register operands only: Rs Rn Rm -// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))] -// -// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm -// -// Miscellaneous operations: Rs [Rn] Rm -static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCInstrDesc &MCID = ARMInsts[Opcode]; - const MCOperandInfo *OpInfo = MCID.OpInfo; - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; - - assert(NumOps >= 2 && - OpInfo[0].RegClass > 0 && - OpInfo[1].RegClass > 0 && - "Expect >= 2 operands and first two as reg operands"); - - // Build the register operands, followed by the optional rotation amount. - - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRs(insn)))); - ++OpIdx; - - if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, - decodeRn(insn)))); - ++OpIdx; - } - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, - decodeRm(insn)))); - ++OpIdx; - - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 - && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // Add the rotation amount immediate. - MI.addOperand(MCOperand::CreateImm(decodeRotate(insn))); - ++OpIdx; - } - - return true; -} - -// A6.3.16 Multiply, multiply accumulate, and absolute difference -// -// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12} -// t2MUL, t2SMMUL: Rs Rn Rm -// t2SMLA[BB|BT|TB|TT|WB|WT]: Rs Rn Rm Ra=Inst{15-12} -// t2SMUL[BB|BT|TB|TT|WB|WT]: Rs Rn Rm -// -// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]: -// Rs Rn Rm Ra=Inst{15-12} -// -// Unsigned Sum of Absolute Differences [and Accumulate] -// Rs Rn Rm [Ra=Inst{15-12}] -static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && - OpInfo[2].RegClass == ARM::rGPRRegClassID && - "Expect >= 3 operands and first three as reg operands"); - - // Build the register operands. - - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRm(insn)))); - - if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRd(insn)))); - - NumOpsAdded = FourReg ? 4 : 3; - - return true; -} - -// A6.3.17 Long multiply, long multiply accumulate, and divide -// -// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm -// where RdLo = Inst{15-12} and RdHi = Inst{11-8} -// -// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm -static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - - assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && - OpInfo[2].RegClass == ARM::rGPRRegClassID && - "Expect >= 3 operands and first three as reg operands"); - - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - - // Build the register operands. - - if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRs(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRn(insn)))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, - decodeRm(insn)))); - - if (FourReg) - NumOpsAdded = 4; - else - NumOpsAdded = 3; - - return true; -} - -// See A6.3 32-bit Thumb instruction encoding for instruction classes -// corresponding to (op1, op2, op). -// -// Table A6-9 32-bit Thumb instruction encoding -// op1 op2 op Instruction class, see -// --- ------- -- ----------------------------------------------------------- -// 01 00xx0xx - Load/store multiple on page A6-23 -// 00xx1xx - Load/store dual, load/store exclusive, table branch on -// page A6-24 -// 01xxxxx - Data-processing (shifted register) on page A6-31 -// 1xxxxxx - Coprocessor instructions on page A6-40 -// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15 -// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19 -// - 1 Branches and miscellaneous control on page A6-20 -// 11 000xxx0 - Store single data item on page A6-30 -// 001xxx0 - Advanced SIMD element or structure load/store instructions -// on page A7-27 -// 00xx001 - Load byte, memory hints on page A6-28 -// 00xx011 - Load halfword, memory hints on page A6-26 -// 00xx101 - Load word on page A6-25 -// 00xx111 - UNDEFINED -// 010xxxx - Data-processing (register) on page A6-33 -// 0110xxx - Multiply, multiply accumulate, and absolute difference on -// page A6-38 -// 0111xxx - Long multiply, long multiply accumulate, and divide on -// page A6-39 -// 1xxxxxx - Coprocessor instructions on page A6-40 -// -static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, - unsigned &NumOpsAdded, BO B) { - - switch (op1) { - case 1: - if (slice(op2, 6, 5) == 0) { - if (slice(op2, 2, 2) == 0) { - // Load/store multiple. - return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - - // Load/store dual, load/store exclusive, table branch, otherwise. - assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!"); - if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) || - (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) { - // Load/store exclusive. - return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - if (Opcode == ARM::t2LDRDi8 || - Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST || - Opcode == ARM::t2STRDi8 || - Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) { - // Load/store dual. - return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) { - // Table branch. - return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - } else if (slice(op2, 6, 5) == 1) { - // Data-processing (shifted register). - return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } - - // FIXME: A6.3.18 Coprocessor instructions - // But see ThumbDisassembler::getInstruction(). - - break; - case 2: - if (op == 0) { - if (slice(op2, 5, 5) == 0) - // Data-processing (modified immediate) - return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - if (Thumb2SaturateOpcode(Opcode)) - return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B); - - // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - case 3: - switch (slice(op2, 6, 5)) { - case 0: - // Load/store instructions... - if (slice(op2, 0, 0) == 0) { - if (slice(op2, 4, 4) == 0) { - // Store single data item on page A6-30 - return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded, - B); - } else { - // FIXME: Advanced SIMD element or structure load/store instructions. - // But see ThumbDisassembler::getInstruction(). - ; - } - } else { - // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps, - NumOpsAdded, B); - } - break; - case 1: - if (slice(op2, 4, 4) == 0) { - // A6.3.12 Data-processing (register) - return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else if (slice(op2, 3, 3) == 0) { - // A6.3.16 Multiply, multiply accumulate, and absolute difference - return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // A6.3.17 Long multiply, long multiply accumulate, and divide - return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - break; - default: - // FIXME: A6.3.18 Coprocessor instructions - // But see ThumbDisassembler::getInstruction(). - ; - break; - } - - break; - default: - assert(0 && "Thumb2 encoding error!"); - break; - } - - return false; -} - -static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { - - uint16_t HalfWord = slice(insn, 31, 16); - - if (HalfWord == 0) { - // A6.2 16-bit Thumb instruction encoding - // op = bits[15:10] - uint16_t op = slice(insn, 15, 10); - return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); - } - - unsigned bits15_11 = slice(HalfWord, 15, 11); - - // A6.1 Thumb instruction set encoding - if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) { - assert("Bits[15:11] first halfword of Thumb2 instruction is out of range"); - return false; - } - - // A6.3 32-bit Thumb instruction encoding - - uint16_t op1 = slice(HalfWord, 12, 11); - uint16_t op2 = slice(HalfWord, 10, 4); - uint16_t op = slice(insn, 15, 15); - - return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); -} diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 78d3e47..ccdac3e 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "ARMBaseInfo.h" #include "ARMInstPrinter.h" -#include "ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -25,6 +25,23 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" +/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. +/// +/// getSORegOffset returns an integer from 0-31, representing '32' as 0. +static unsigned translateShiftImm(unsigned imm) { + if (imm == 0) + return 32; + return imm; +} + + +ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } @@ -33,11 +50,12 @@ void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { unsigned Opcode = MI->getOpcode(); // Check for MOVs and print canonical forms, instead. - if (Opcode == ARM::MOVs) { + if (Opcode == ARM::MOVsr) { // FIXME: Thumb variants? const MCOperand &Dst = MI->getOperand(0); const MCOperand &MO1 = MI->getOperand(1); @@ -51,20 +69,36 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << '\t' << getRegisterName(Dst.getReg()) << ", " << getRegisterName(MO1.getReg()); - if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) - return; + O << ", " << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + printAnnotation(O, Annot); + return; + } - O << ", "; + if (Opcode == ARM::MOVsi) { + // FIXME: Thumb variants? + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())); + printSBitModifierOperand(MI, 5, O); + printPredicateOperand(MI, 3, O); - if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) { + printAnnotation(O, Annot); + return; } + + O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printAnnotation(O, Annot); return; } + // A8.6.123 PUSH if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && MI->getOperand(0).getReg() == ARM::SP) { @@ -74,6 +108,15 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ".w"; O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } + if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(3).getImm() == -4) { + O << '\t' << "push"; + printPredicateOperand(MI, 4, O); + O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}"; + printAnnotation(O, Annot); return; } @@ -86,8 +129,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ".w"; O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); return; } + if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(4).getImm() == 4) { + O << '\t' << "pop"; + printPredicateOperand(MI, 5, O); + O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}"; + printAnnotation(O, Annot); + return; + } + // A8.6.355 VPUSH if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) && @@ -96,6 +149,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printPredicateOperand(MI, 2, O); O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); return; } @@ -106,10 +160,40 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printPredicateOperand(MI, 2, O); O << '\t'; printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } + + if (Opcode == ARM::tLDMIA) { + bool Writeback = true; + unsigned BaseReg = MI->getOperand(0).getReg(); + for (unsigned i = 3; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) + Writeback = false; + } + + O << "\tldm"; + + printPredicateOperand(MI, 1, O); + O << '\t' << getRegisterName(BaseReg); + if (Writeback) O << "!"; + O << ", "; + printRegisterList(MI, 3, O); + printAnnotation(O, Annot); + return; + } + + // Thumb1 NOP + if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 && + MI->getOperand(1).getReg() == ARM::R8) { + O << "\tnop"; + printPredicateOperand(MI, 2, O); + printAnnotation(O, Annot); return; } printInstruction(MI, O); + printAnnotation(O, Annot); } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -122,16 +206,38 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '#' << Op.getImm(); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } } } +void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + if (MO1.isExpr()) + O << *MO1.getExpr(); + else if (MO1.isImm()) + O << "[pc, #" << MO1.getImm() << "]"; + else + llvm_unreachable("Unknown LDR label operand?"); +} + // so_reg is a 4-operand unit corresponding to register forms of the A5.1 // "Addressing Mode 1 - Data-processing operands" forms. This includes: // REG 0 0 - e.g. R5 // REG REG 0,SH_OPC - e.g. R5, ROR R3 // REG 0 IMM,SH_OPC - e.g. R5, LSL #3 -void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, +void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); @@ -144,14 +250,27 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc == ARM_AM::rrx) return; - if (MO2.getReg()) { - O << ' ' << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else if (ShOpc != ARM_AM::rrx) { - O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); - } + + O << ' ' << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); +} + +void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << getRegisterName(MO1.getReg()); + + // Print the shift opc. + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc == ARM_AM::rrx) + return; + O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); } + //===--------------------------------------------------------------------===// // Addressing Mode #2 //===--------------------------------------------------------------------===// @@ -209,6 +328,22 @@ void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, << " #" << ShImm; } +void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()) << ", " + << getRegisterName(MO2.getReg()) << "]"; +} + +void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()) << ", " + << getRegisterName(MO2.getReg()) << ", lsl #1]"; +} + void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); @@ -284,7 +419,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, O << '[' << getRegisterName(MO1.getReg()); if (MO2.getReg()) { - O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) + O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) << getRegisterName(MO2.getReg()) << ']'; return; } @@ -315,8 +450,8 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); if (MO1.getReg()) { - O << (char)ARM_AM::getAM3Op(MO2.getImm()) - << getRegisterName(MO1.getReg()); + O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); return; } @@ -326,6 +461,31 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, << ImmOffs; } +void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + unsigned Imm = MO.getImm(); + O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff); +} + +void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg()); +} + +void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + unsigned Imm = MO.getImm(); + O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2); +} + + void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum) @@ -345,7 +505,9 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); + unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); + if (ImmOffs || Op == ARM_AM::sub) { O << ", #" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs * 4; @@ -402,20 +564,31 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned ShiftOp = MI->getOperand(OpNum).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - switch (Opc) { - case ARM_AM::no_shift: + bool isASR = (ShiftOp & (1 << 5)) != 0; + unsigned Amt = ShiftOp & 0x1f; + if (isASR) + O << ", asr #" << (Amt == 0 ? 32 : Amt); + else if (Amt) + O << ", lsl #" << Amt; +} + +void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + if (Imm == 0) return; - case ARM_AM::lsl: - O << ", lsl #"; - break; - case ARM_AM::asr: - O << ", asr #"; - break; - default: - assert(0 && "unexpected shift opcode for shift immediate operand"); - } - O << ARM_AM::getSORegOffset(ShiftOp); + assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!"); + O << ", lsl #" << Imm; +} + +void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + // A shift amount of 32 is encoded as 0. + if (Imm == 0) + Imm = 32; + assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!"); + O << ", asr #" << Imm; } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, @@ -450,6 +623,9 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, for (int i=2; i >= 0; --i) if (IFlags & (1 << i)) O << ARM_PROC::IFlagsToString(1 << i); + + if (IFlags == 0) + O << "none"; } void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, @@ -458,10 +634,43 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; + if (getAvailableFeatures() & ARM::FeatureMClass) { + switch (Op.getImm()) { + default: assert(0 && "Unexpected mask value!"); + case 0: O << "apsr"; return; + case 1: O << "iapsr"; return; + case 2: O << "eapsr"; return; + case 3: O << "xpsr"; return; + case 5: O << "ipsr"; return; + case 6: O << "epsr"; return; + case 7: O << "iepsr"; return; + case 8: O << "msp"; return; + case 9: O << "psp"; return; + case 16: O << "primask"; return; + case 17: O << "basepri"; return; + case 18: O << "basepri_max"; return; + case 19: O << "faultmask"; return; + case 20: O << "control"; return; + } + } + + // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as + // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively. + if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { + O << "APSR_"; + switch (Mask) { + default: assert(0); + case 4: O << "g"; return; + case 8: O << "nzcvq"; return; + case 12: O << "nzcvqg"; return; + } + llvm_unreachable("Unexpected mask value!"); + } + if (SpecRegRBit) - O << "spsr"; + O << "SPSR"; else - O << "cpsr"; + O << "CPSR"; if (Mask) { O << '_'; @@ -501,15 +710,20 @@ void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + raw_ostream &O) { O << "p" << MI->getOperand(OpNum).getImm(); } void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + raw_ostream &O) { O << "c" << MI->getOperand(OpNum).getImm(); } +void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "{" << MI->getOperand(OpNum).getImm() << "}"; +} + void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O) { llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); @@ -517,7 +731,13 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << MI->getOperand(OpNum).getImm() * 4; + O << "#" << MI->getOperand(OpNum).getImm() * 4; +} + +void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + O << "#" << (Imm == 0 ? 32 : Imm); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, @@ -610,7 +830,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc != ARM_AM::rrx) - O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); + O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); } void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, @@ -647,7 +867,9 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, int32_t OffImm = (int32_t)MO2.getImm(); // Don't print +0. - if (OffImm < 0) + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) O << ", #-" << -OffImm; else if (OffImm > 0) O << ", #" << OffImm; @@ -671,6 +893,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "]"; } +void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) + O << ", #" << MO2.getImm() * 4; + O << "]"; +} + void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -678,9 +912,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, int32_t OffImm = (int32_t)MO1.getImm(); // Don't print +0. if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; + O << ", #-" << -OffImm; + else + O << ", #" << OffImm; } void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, @@ -689,10 +923,13 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm() / 4; // Don't print +0. - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; + if (OffImm != 0) { + O << ", "; + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; + } } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, @@ -715,39 +952,10 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, O << "]"; } -void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) { - O << (float)MO.getFPImm(); - } else { - union { - uint32_t I; - float F; - } FPUnion; - - FPUnion.I = MO.getImm(); - O << FPUnion.F; - } -} - -void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) { - O << MO.getFPImm(); - } else { - // We expect the binary encoding of a floating point number here. - union { - uint64_t I; - double D; - } FPUnion; - - FPUnion.I = MO.getImm(); - O << FPUnion.D; - } + O << '#' << ARM_AM::getFPImmFloat(MO.getImm()); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, @@ -757,3 +965,28 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); O << "#0x" << utohexstr(Val); } + +void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + O << "#" << Imm + 1; +} + +void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNum).getImm(); + if (Imm == 0) + return; + O << ", ror #"; + switch (Imm) { + default: assert (0 && "illegal ror immediate!"); + case 1: O << "8"; break; + case 2: O << "16"; break; + case 3: O << "24"; break; + } +} + +void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "[" << MI->getOperand(OpNum).getImm() << "]"; +} diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index d5f238b..5c2173f 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -15,6 +15,7 @@ #define ARMINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { @@ -22,10 +23,9 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; @@ -38,8 +38,11 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, @@ -48,11 +51,15 @@ public: raw_ostream &O); void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O); + void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -65,8 +72,11 @@ public: raw_ostream &O); void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -88,6 +98,8 @@ public: raw_ostream &O); void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, @@ -108,11 +120,15 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt deleted file mode 100644 index 18645c0..0000000 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMAsmPrinter - ARMInstPrinter.cpp - ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile deleted file mode 100644 index 65d372e..0000000 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 595708f..9982fa6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -14,7 +14,8 @@ #ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H -#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -32,7 +33,8 @@ namespace ARM_AM { }; enum AddrOpc { - add = '+', sub = '-' + sub = 0, + add }; static inline const char *getAddrOpcStr(AddrOpc Op) { @@ -60,20 +62,6 @@ namespace ARM_AM { } } - static inline ShiftOpc getShiftOpcForNode(SDValue N) { - switch (N.getOpcode()) { - default: return ARM_AM::no_shift; - case ISD::SHL: return ARM_AM::lsl; - case ISD::SRL: return ARM_AM::lsr; - case ISD::SRA: return ARM_AM::asr; - case ISD::ROTR: return ARM_AM::ror; - //case ISD::ROTL: // Only if imm -> turn into ROTR. - // Can't handle RRX here, because it would require folding a flag into - // the addressing mode. :( This causes us to miss certain things. - //case ARMISD::RRX: return ARM_AM::rrx; - } - } - enum AMSubMode { bad_am_submode = 0, ia, @@ -588,6 +576,90 @@ namespace ARM_AM { AMSubMode getLoadStoreMultipleSubMode(int Opcode); + //===--------------------------------------------------------------------===// + // Floating-point Immediates + // + static inline float getFPImmFloat(unsigned Imm) { + // We expect an 8-bit binary encoding of a floating-point number here. + union { + uint32_t I; + float F; + } FPUnion; + + uint8_t Sign = (Imm >> 7) & 0x1; + uint8_t Exp = (Imm >> 4) & 0x7; + uint8_t Mantissa = Imm & 0xf; + + // 8-bit FP iEEEE Float Encoding + // abcd efgh aBbbbbbc defgh000 00000000 00000000 + // + // where B = NOT(b); + + FPUnion.I = 0; + FPUnion.I |= Sign << 31; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25; + FPUnion.I |= (Exp & 0x3) << 23; + FPUnion.I |= Mantissa << 19; + return FPUnion.F; + } + + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP32Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 + int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x7ffff) + return -1; + Mantissa >>= 19; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP32Imm(const APFloat &FPImm) { + return getFP32Imm(FPImm.bitcastToAPInt()); + } + + /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP64Imm(const APInt &Imm) { + uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; + int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 + uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0xffffffffffffULL) + return -1; + Mantissa >>= 48; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP64Imm(const APFloat &FPImm) { + return getFP64Imm(FPImm.bitcastToAPInt()); + } + } // end namespace ARM_AM } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 5e438a9..c31c5e6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" @@ -19,12 +20,12 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; namespace { @@ -35,13 +36,24 @@ public: /*HasRelocationAddend*/ false) {} }; -class ARMAsmBackend : public TargetAsmBackend { +class ARMAsmBackend : public MCAsmBackend { + const MCSubtargetInfo* STI; bool isThumbMode; // Currently emitting Thumb code. public: - ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {} + ARMAsmBackend(const Target &T, const StringRef TT) + : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), + isThumbMode(TT.startswith("thumb")) {} + + ~ARMAsmBackend() { + delete STI; + } unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; } + bool hasNOP() const { + return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; + } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -65,9 +77,9 @@ public: { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. { "fixup_arm_movt_hi16", 0, 20, 0 }, @@ -81,7 +93,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -123,20 +135,28 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { } bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 + const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP + const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 + const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP if (isThumb()) { - // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to - // use 0x46c0 (which is a 'mov r8, r8' insn). + const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding + : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write16(0xbf00); + OW->Write16(nopEncoding); if (Count & 1) OW->Write8(0); return true; } // ARM mode + const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding + : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0xe1a00000); + OW->Write32(nopEncoding); + // FIXME: should this function return false when unable to write exactly + // 'Count' bytes with NOP encodings? switch (Count % 4) { default: break; // No leftover bytes to write case 1: OW->Write8(0); break; @@ -163,8 +183,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_movw_lo16_pcrel: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; - assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) && - "Out of range pc-relative fixup value!"); // inst{19-16} = Hi4; // inst{11-0} = Lo12; Value = (Hi4 << 16) | (Lo12); @@ -185,10 +203,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // inst{26} = i; // inst{14-12} = Mid3; // inst{7-0} = Lo8; - // The value comes in as the whole thing, not just the portion required - // for this fixup, so we need to mask off the bits not handled by this - // portion (lo vs. hi). - Value &= 0xffff; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); uint64_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -382,8 +396,9 @@ namespace { class ELFARMAsmBackend : public ARMAsmBackend { public: Triple::OSType OSType; - ELFARMAsmBackend(const Target &T, Triple::OSType _OSType) - : ARMAsmBackend(T), OSType(_OSType) { } + ELFARMAsmBackend(const Target &T, const StringRef TT, + Triple::OSType _OSType) + : ARMAsmBackend(T, TT), OSType(_OSType) { } void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; @@ -414,8 +429,9 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, class DarwinARMAsmBackend : public ARMAsmBackend { public: const object::mach::CPUSubtypeARM Subtype; - DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st) - : ARMAsmBackend(T), Subtype(st) { } + DarwinARMAsmBackend(const Target &T, const StringRef TT, + object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T, TT), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, @@ -492,25 +508,24 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, } // end anonymous namespace -TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { if (TheTriple.getArchName() == "armv4t" || TheTriple.getArchName() == "thumbv4t") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T); else if (TheTriple.getArchName() == "armv5e" || TheTriple.getArchName() == "thumbv5e") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ); else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") - return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); - return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on ARM"); - return new ELFARMAsmBackend(T, Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, Triple(TT).getOS()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 458f7dd..ec4b6ff 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -17,12 +17,9 @@ #ifndef ARMBASEINFO_H #define ARMBASEINFO_H -#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" -// Note that the following auto-generated files only defined enum types, and -// so are safe to include here. - namespace llvm { // Enums corresponding to ARM condition codes @@ -191,6 +188,22 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { } } +/// isARMLowRegister - Returns true if the register is a low register (r0-r7). +/// +static inline bool isARMLowRegister(unsigned Reg) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// namespace ARMII { /// ARM Index Modes @@ -287,6 +300,148 @@ namespace ARMII { /// call operand. MO_PLT }; + + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This four-bit field describes the addressing mode used. + AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h + + // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load + // and store ops only. Generic "updating" flag is used for ld/st multiple. + // The index mode enums are declared in ARMBaseInfo.h + IndexModeShift = 5, + IndexModeMask = 3 << IndexModeShift, + + //===------------------------------------------------------------------===// + // Instruction encoding formats. + // + FormShift = 7, + FormMask = 0x3f << FormShift, + + // Pseudo instructions + Pseudo = 0 << FormShift, + + // Multiply instructions + MulFrm = 1 << FormShift, + + // Branch instructions + BrFrm = 2 << FormShift, + BrMiscFrm = 3 << FormShift, + + // Data Processing instructions + DPFrm = 4 << FormShift, + DPSoRegFrm = 5 << FormShift, + + // Load and Store + LdFrm = 6 << FormShift, + StFrm = 7 << FormShift, + LdMiscFrm = 8 << FormShift, + StMiscFrm = 9 << FormShift, + LdStMulFrm = 10 << FormShift, + + LdStExFrm = 11 << FormShift, + + // Miscellaneous arithmetic instructions + ArithMiscFrm = 12 << FormShift, + SatFrm = 13 << FormShift, + + // Extend instructions + ExtFrm = 14 << FormShift, + + // VFP formats + VFPUnaryFrm = 15 << FormShift, + VFPBinaryFrm = 16 << FormShift, + VFPConv1Frm = 17 << FormShift, + VFPConv2Frm = 18 << FormShift, + VFPConv3Frm = 19 << FormShift, + VFPConv4Frm = 20 << FormShift, + VFPConv5Frm = 21 << FormShift, + VFPLdStFrm = 22 << FormShift, + VFPLdStMulFrm = 23 << FormShift, + VFPMiscFrm = 24 << FormShift, + + // Thumb format + ThumbFrm = 25 << FormShift, + + // Miscelleaneous format + MiscFrm = 26 << FormShift, + + // NEON formats + NGetLnFrm = 27 << FormShift, + NSetLnFrm = 28 << FormShift, + NDupFrm = 29 << FormShift, + NLdStFrm = 30 << FormShift, + N1RegModImmFrm= 31 << FormShift, + N2RegFrm = 32 << FormShift, + NVCVTFrm = 33 << FormShift, + NVDupLnFrm = 34 << FormShift, + N2RegVShLFrm = 35 << FormShift, + N2RegVShRFrm = 36 << FormShift, + N3RegFrm = 37 << FormShift, + N3RegVShFrm = 38 << FormShift, + NVExtFrm = 39 << FormShift, + NVMulSLFrm = 40 << FormShift, + NVTBLFrm = 41 << FormShift, + + //===------------------------------------------------------------------===// + // Misc flags. + + // UnaryDP - Indicates this is a unary data processing instruction, i.e. + // it doesn't have a Rn operand. + UnaryDP = 1 << 13, + + // Xform16Bit - Indicates this Thumb2 instruction may be transformed into + // a 16-bit Thumb instruction if certain conditions are met. + Xform16Bit = 1 << 14, + + // ThumbArithFlagSetting - The instruction is a 16-bit flag setting Thumb + // instruction. Used by the parser to determine whether to require the 'S' + // suffix on the mnemonic (when not in an IT block) or preclude it (when + // in an IT block). + ThumbArithFlagSetting = 1 << 18, + + //===------------------------------------------------------------------===// + // Code domain. + DomainShift = 15, + DomainMask = 7 << DomainShift, + DomainGeneral = 0 << DomainShift, + DomainVFP = 1 << DomainShift, + DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, + + //===------------------------------------------------------------------===// + // Field shifts - such shifts are used to set field while generating + // machine instructions. + // + // FIXME: This list will need adjusting/fixing as the MC code emitter + // takes shape and the ARMCodeEmitter.cpp bits go away. + ShiftTypeShift = 4, + + M_BitShift = 5, + ShiftImmShift = 5, + ShiftShift = 7, + N_BitShift = 7, + ImmHiShift = 8, + SoRotImmShift = 8, + RegRsShift = 8, + ExtRotImmShift = 10, + RegRdLoShift = 12, + RegRdShift = 12, + RegRdHiShift = 16, + RegRnShift = 16, + S_BitShift = 20, + W_BitShift = 21, + AM3_I_BitShift = 22, + D_BitShift = 22, + U_BitShift = 23, + P_BitShift = 24, + I_BitShift = 25, + CondShift = 28 + }; + } // end namespace ARMII } // end namespace llvm; diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 350c92d..350c92d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 53b4c95..1c109e0 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -52,6 +52,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; CommentString = "@"; + Code16Directive = ".code\t16"; + Code32Directive = ".code\t32"; + SupportsDebugInformation = true; // Exceptions handling @@ -64,12 +67,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Data64bitsDirective = 0; CommentString = "@"; - - HasLEB128 = true; PrivateGlobalPrefix = ".L"; + Code16Directive = ".code\t16"; + Code32Directive = ".code\t32"; + WeakRefDirective = "\t.weak\t"; - HasLCOMMDirective = true; + LCOMMDirectiveType = LCOMM::NoAlignment; + HasLEB128 = true; SupportsDebugInformation = true; // Exceptions handling diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 39be3f0..865c3e2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -12,17 +12,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMFixupKinds.h" -#include "ARMInstrInfo.h" -#include "ARMMCExpr.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" @@ -112,11 +113,13 @@ public: /// immediate Thumb2 direct branch target. uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; - + /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. @@ -142,6 +145,16 @@ public: uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2' + /// operand. + uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2' + /// operand. + uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm' /// operand as needed by load/store instructions. @@ -183,6 +196,10 @@ public: uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + /// getPostIdxRegOpValue - Return encoding for postidx_reg operands. + uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands. uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; @@ -251,27 +268,13 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; /// getSORegOpValue - Return an encoded so_reg shifted register value. - unsigned getSORegOpValue(const MCInst &MI, unsigned Op, + unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getRotImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - switch (MI.getOperand(Op).getImm()) { - default: assert (0 && "Not a valid rot_imm value!"); - case 0: return 0; - case 8: return 1; - case 16: return 2; - case 24: return 3; - } - } - - unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - return MI.getOperand(Op).getImm() - 1; - } - unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { return 64 - MI.getOperand(Op).getImm(); @@ -280,12 +283,6 @@ public: unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getMsbOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; - - unsigned getSsatBitPosValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, @@ -306,6 +303,9 @@ public: unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, @@ -439,8 +439,10 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, bool isAdd = true; // Special value for #-0 - if (SImm == INT32_MIN) + if (SImm == INT32_MIN) { SImm = 0; + isAdd = false; + } // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (SImm < 0) { @@ -470,11 +472,34 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return 0; } +// Thumb BL and BLX use a strange offset encoding where bits 22 and 21 are +// determined by negating them and XOR'ing them with bit 23. +static int32_t encodeThumbBLOffset(int32_t offset) { + offset >>= 1; + uint32_t S = (offset & 0x800000) >> 23; + uint32_t J1 = (offset & 0x400000) >> 22; + uint32_t J2 = (offset & 0x200000) >> 21; + J1 = (~J1 & 0x1); + J2 = (~J2 & 0x1); + J1 ^= S; + J2 ^= S; + + offset &= ~0x600000; + offset |= J1 << 22; + offset |= J2 << 21; + + return offset; +} + /// getThumbBLTargetOpValue - Return encoding info for immediate branch target. uint32_t ARMMCCodeEmitter:: getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, + Fixups); + return encodeThumbBLOffset(MO.getImm()); } /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate @@ -482,28 +507,43 @@ getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, + Fixups); + return encodeThumbBLOffset(MO.getImm()); } /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, + Fixups); + return (MO.getImm() >> 1); } /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, + Fixups); + return (MO.getImm() >> 1); } /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); + return (MO.getImm() >> 1); } /// Return true if this branch has a non-always predication @@ -513,9 +553,9 @@ static bool HasConditionalBranch(const MCInst &MI) { for (int i = 0; i < NumOp-1; ++i) { const MCOperand &MCOp1 = MI.getOperand(i); const MCOperand &MCOp2 = MI.getOperand(i + 1); - if (MCOp1.isImm() && MCOp2.isReg() && + if (MCOp1.isImm() && MCOp2.isReg() && (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) { - if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) + if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) return true; } } @@ -541,15 +581,32 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - if (HasConditionalBranch(MI)) + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbranch, Fixups); return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); -} + ARM::fixup_arm_uncondbranch, Fixups); + } + return MO.getImm() >> 2; +} +uint32_t ARMMCCodeEmitter:: +getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbranch, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_uncondbranch, Fixups); + } + return MO.getImm() >> 1; +} /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit /// immediate branch target. @@ -579,9 +636,18 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, + Fixups); + int32_t offset = MO.getImm(); + uint32_t Val = 0x2000; + if (offset < 0) { + Val = 0x1000; + offset *= -1; + } + Val |= offset; + return Val; } /// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label @@ -589,9 +655,16 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, + Fixups); + int32_t Val = MO.getImm(); + if (Val < 0) { + Val *= -1; + Val |= 0x1000; + } + return Val; } /// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label @@ -599,9 +672,11 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, - Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, + Fixups); + return MO.getImm(); } /// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg' @@ -635,17 +710,26 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. - assert(MO.isExpr() && "Unexpected machine operand type!"); - const MCExpr *Expr = MO.getExpr(); - - MCFixupKind Kind; - if (isThumb2()) - Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); - else - Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); - ++MCNumCPRelocations; + MCFixupKind Kind; + if (isThumb2()) + Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); + else + Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + ++MCNumCPRelocations; + } else { + Reg = ARM::PC; + int32_t Offset = MO.getImm(); + if (Offset < 0) { + Offset *= -1; + isAdd = false; + } + Imm12 = Offset; + } } else isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups); @@ -657,6 +741,37 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getT2Imm8s4OpValue - Return encoding info for +/// '+/- imm8<<2' operand. +uint32_t ARMMCCodeEmitter:: +getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // FIXME: The immediate operand should have already been encoded like this + // before ever getting here. The encoder method should just need to combine + // the MI operands for the register and the offset into a single + // representation for the complex operand in the .td file. This isn't just + // style, unfortunately. As-is, we can't represent the distinct encoding + // for #-0. + + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + int32_t Imm8 = MI.getOperand(OpIdx).getImm(); + bool isAdd = Imm8 >= 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (Imm8 < 0) + Imm8 = -Imm8; + + // Scaled by 4. + Imm8 /= 4; + + uint32_t Binary = Imm8 & 0xff; + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + return Binary; +} + /// getT2AddrModeImm8s4OpValue - Return encoding info for /// 'reg +/- imm8<<2' operand. uint32_t ARMMCCodeEmitter:: @@ -683,6 +798,12 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, } else isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + // FIXME: The immediate operand should have already been encoded like this + // before ever getting here. The encoder method should just need to combine + // the MI operands for the register and the offset into a single + // representation for the complex operand in the .td file. This isn't just + // style, unfortunately. As-is, we can't represent the distinct encoding + // for #-0. uint32_t Binary = (Imm8 >> 2) & 0xff; // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (isAdd) @@ -691,6 +812,20 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for +/// 'reg + imm8<<2' operand. +uint32_t ARMMCCodeEmitter:: +getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {11-8} = reg + // {7-0} = imm8 + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Imm8 = MO1.getImm(); + return (Reg << 8) | Imm8; +} + // FIXME: This routine assumes that a binary // expression will always result in a PCRel expression // In reality, its only true if one or more subexpressions @@ -818,6 +953,17 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: +getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {4} isAdd + // {3-0} Rm + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + bool isAdd = MO1.getImm() != 0; + return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4); +} + +uint32_t ARMMCCodeEmitter:: getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { // {9} 1 == imm8, 0 == Rm @@ -891,7 +1037,10 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); + return (MO.getImm() >> 2); } /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. @@ -934,20 +1083,17 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, } unsigned ARMMCCodeEmitter:: -getSORegOpValue(const MCInst &MI, unsigned OpIdx, +getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be - // shifted. The second is either Rs, the amount to shift by, or reg0 in which - // case the imm contains the amount to shift by. + // shifted. The second is Rs, the amount to shift by, and the third specifies + // the type of the shift. // // {3-0} = Rm. - // {4} = 1 if reg shift, 0 if imm shift + // {4} = 1 // {6-5} = type - // If reg shift: - // {11-8} = Rs - // {7} = 0 - // else (imm shift) - // {11-7} = imm + // {11-8} = Rs + // {7} = 0 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -966,45 +1112,70 @@ getSORegOpValue(const MCInst &MI, unsigned OpIdx, // LSR - 0011 // ASR - 0101 // ROR - 0111 - // RRX - 0110 and bit[11:8] clear. switch (SOpc) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::lsl: SBits = 0x1; break; case ARM_AM::lsr: SBits = 0x3; break; case ARM_AM::asr: SBits = 0x5; break; case ARM_AM::ror: SBits = 0x7; break; - case ARM_AM::rrx: SBits = 0x6; break; - } - } else { - // Set shift operand (bit[6:4]). - // LSL - 000 - // LSR - 010 - // ASR - 100 - // ROR - 110 - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x0; break; - case ARM_AM::lsr: SBits = 0x2; break; - case ARM_AM::asr: SBits = 0x4; break; - case ARM_AM::ror: SBits = 0x6; break; } } Binary |= SBits << 4; - if (SOpc == ARM_AM::rrx) - return Binary; - // Encode the shift operation Rs or shift_imm (except rrx). - if (Rs) { - // Encode Rs bit[11:8]. - assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + // Encode the shift operation Rs. + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); +} + +unsigned ARMMCCodeEmitter:: +getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // Sub-operands are [reg, imm]. The first register is Rm, the reg to be + // shifted. The second is the amount to shift by. + // + // {3-0} = Rm. + // {4} = 0 + // {6-5} = type + // {11-7} = imm + + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); + + // Encode Rm. + unsigned Binary = getARMRegisterNumbering(MO.getReg()); + + // Encode the shift opcode. + unsigned SBits = 0; + + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + // RRX - 110 and bit[11:8] clear. + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + case ARM_AM::rrx: + Binary |= 0x60; + return Binary; } // Encode shift_imm bit[11:7]. - return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; + Binary |= SBits << 4; + unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm()); + assert(Offset && "Offset must be in range 1-32!"); + if (Offset == 32) Offset = 0; + return Binary | (Offset << 7); } + unsigned ARMMCCodeEmitter:: getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, SmallVectorImpl<MCFixup> &Fixups) const { @@ -1106,6 +1277,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, case ARM_AM::lsl: SBits = 0x0; break; case ARM_AM::lsr: SBits = 0x2; break; case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::rrx: // FALLTHROUGH case ARM_AM::ror: SBits = 0x6; break; } @@ -1131,24 +1303,6 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, } unsigned ARMMCCodeEmitter:: -getMsbOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - // MSB - 5 bits. - uint32_t lsb = MI.getOperand(Op-1).getImm(); - uint32_t width = MI.getOperand(Op).getImm(); - uint32_t msb = lsb+width-1; - assert (width != 0 && msb < 32 && "Illegal bit width!"); - return msb; -} - -unsigned ARMMCCodeEmitter:: -getSsatBitPosValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - // For ssat instructions, the bit position should be encoded decremented by 1 - return MI.getOperand(Op).getImm()-1; -} - -unsigned ARMMCCodeEmitter:: getRegisterListOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { // VLDM/VSTM: @@ -1158,8 +1312,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = ARM::SPRRegClass.contains(Reg); - bool DPRRegs = ARM::DPRRegClass.contains(Reg); + bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; @@ -1299,7 +1453,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, Size = Desc.getSize(); else llvm_unreachable("Unexpected instruction size!"); - + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); // Thumb 32-bit wide instructions need to emit the high order halfword // first. diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2727ba8..2727ba8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 0a2e883..0a2e883 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f8fcf2b..a55c410 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -13,10 +13,16 @@ #include "ARMMCTargetDesc.h" #include "ARMMCAsmInfo.h" +#include "ARMBaseInfo.h" +#include "InstPrinter/ARMInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" @@ -35,7 +41,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { unsigned Len = TT.size(); unsigned Idx = 0; - // FIXME: Enahnce Triple helper class to extract ARM version. + // FIXME: Enhance Triple helper class to extract ARM version. bool isThumb = false; if (Len >= 5 && TT.substr(0, 4) == "armv") Idx = 4; @@ -50,18 +56,21 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv - ARMArchFeature = "+v7,+noarm,+db,+hwdiv"; + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk"; + // FeatureT2XtPk, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; } else - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2 - ARMArchFeature = "+v7,+neon,+db,+t2dsp"; + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; } else if (SubVer == '6') { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') ARMArchFeature = "+v6t2"; + else if (Len >= Idx+2 && TT[Idx+1] == 'm') + // v6m: FeatureNoARM, FeatureMClass + ARMArchFeature = "+v6t2,+noarm,+mclass"; else ARMArchFeature = "+v6"; } else if (SubVer == '5') { @@ -80,6 +89,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { ARMArchFeature += ",+thumb-mode"; } + Triple TheTriple(TT); + if (TheTriple.getOS() == Triple::NativeClient) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+nacl-mode"; + else + ARMArchFeature += ",+nacl-mode"; + } + return ARMArchFeature; } @@ -98,36 +115,18 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -// Force static initialization. -extern "C" void LLVMInitializeARMMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, - ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, - ARM_MC::createARMMCSubtargetInfo); -} - static MCInstrInfo *createARMMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitARMMCInstrInfo(X); return X; } -extern "C" void LLVMInitializeARMMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); -} - -static MCRegisterInfo *createARMMCRegisterInfo() { +static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitARMMCRegisterInfo(X); + InitARMMCRegisterInfo(X, ARM::LR); return X; } -extern "C" void LLVMInitializeARMMCRegInfo() { - TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); -} - static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); @@ -137,8 +136,128 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { return new ARMELFMCAsmInfo(); } -extern "C" void LLVMInitializeARMMCAsmInfo() { - // Register the target asm info. +static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) { + Triple TheTriple(TT); + // Default relocation model on Darwin is PIC, not DynamicNoPIC. + RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; + } + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) + return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + + if (TheTriple.isOSWindows()) { + llvm_unreachable("ARM does not support Windows COFF format"); + return NULL; + } + + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createARMMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new ARMInstPrinter(MAI, STI); + return 0; +} + +namespace { + +class ARMMCInstrAnalysis : public MCInstrAnalysis { +public: + ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + virtual bool isUnconditionalBranch(const MCInst &Inst) const { + // BCCs with the "always" predicate are unconditional branches. + if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) + return true; + return MCInstrAnalysis::isUnconditionalBranch(Inst); + } + + virtual bool isConditionalBranch(const MCInst &Inst) const { + // BCCs with the "always" predicate are unconditional branches. + if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) + return false; + return MCInstrAnalysis::isConditionalBranch(Inst); + } + + uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + // We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) + return -1ULL; + + int64_t Imm = Inst.getOperand(0).getImm(); + // FIXME: This is not right for thumb. + return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + } +}; + +} + +static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { + return new ARMMCInstrAnalysis(Info); +} + +// Force static initialization. +extern "C" void LLVMInitializeARMTargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo); RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, + ARM_MC::createARMMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, + ARM_MC::createARMMCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget, + createARMMCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget, + createARMMCInstrAnalysis); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 74701e3..9b3d3bd 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -14,12 +14,19 @@ #ifndef ARMMCTARGETDESC_H #define ARMMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" #include <string> namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; -class Target; class StringRef; +class Target; +class raw_ostream; extern Target TheARMTarget, TheThumbTarget; @@ -33,6 +40,18 @@ namespace ARM_MC { StringRef FS); } +MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); + +/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. +MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + } // End llvm namespace // Defines symbolic names for ARM registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index a36e47d..352c73e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" -#include "ARMFixupKinds.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" @@ -19,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; using namespace llvm::object; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 68daf42..0000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMARMDesc - ARMMCTargetDesc.cpp - ARMMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile deleted file mode 100644 index 448ed9d..0000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp b/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp deleted file mode 100644 index c85d1e9..0000000 --- a/contrib/llvm/lib/Target/ARM/NEONMoveFix.cpp +++ /dev/null @@ -1,149 +0,0 @@ -//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "neon-mov-fix" -#include "ARM.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumVMovs, "Number of reg-reg moves converted"); - -namespace { - struct NEONMoveFixPass : public MachineFunctionPass { - static char ID; - NEONMoveFixPass() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "NEON reg-reg move conversion"; - } - - private: - const TargetRegisterInfo *TRI; - const ARMBaseInstrInfo *TII; - bool isA8; - - typedef DenseMap<unsigned, const MachineInstr*> RegMap; - - bool InsertMoves(MachineBasicBlock &MBB); - }; - char NEONMoveFixPass::ID = 0; -} - -static bool inNEONDomain(unsigned Domain, bool isA8) { - return (Domain & ARMII::DomainNEON) || - (isA8 && (Domain & ARMII::DomainNEONA8)); -} - -bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { - RegMap Defs; - bool Modified = false; - - // Walk over MBB tracking the def points of the registers. - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; - for (; MII != E; MII = NextMII) { - NextMII = llvm::next(MII); - MachineInstr *MI = &*MII; - - if (MI->getOpcode() == ARM::VMOVD && - !TII->isPredicated(MI)) { - unsigned SrcReg = MI->getOperand(1).getReg(); - // If we do not find an instruction defining the reg, this means the - // register should be live-in for this BB. It's always to better to use - // NEON reg-reg moves. - unsigned Domain = ARMII::DomainNEON; - RegMap::iterator DefMI = Defs.find(SrcReg); - if (DefMI != Defs.end()) { - Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask; - // Instructions in general domain are subreg accesses. - // Map them to NEON reg-reg moves. - if (Domain == ARMII::DomainGeneral) - Domain = ARMII::DomainNEON; - } - - if (inNEONDomain(Domain, isA8)) { - // Convert VMOVD to VORRd - unsigned DestReg = MI->getOperand(0).getReg(); - - DEBUG({errs() << "vmov convert: "; MI->dump();}); - - // It's safe to ignore imp-defs / imp-uses here, since: - // - We're running late, no intelligent condegen passes should be run - // afterwards - // - The imp-defs / imp-uses are superregs only, we don't care about - // them. - AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VORRd), DestReg) - .addReg(SrcReg).addReg(SrcReg)); - MBB.erase(MI); - MachineBasicBlock::iterator I = prior(NextMII); - MI = &*I; - - DEBUG({errs() << " into: "; MI->dump();}); - - Modified = true; - ++NumVMovs; - } else { - assert((Domain & ARMII::DomainVFP) && "Invalid domain!"); - // Do nothing. - } - } - - // Update def information. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned MOReg = MO.getReg(); - - Defs[MOReg] = MI; - // Catch aliases as well. - for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R) - Defs[*R] = MI; - } - } - - return Modified; -} - -bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { - ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); - const TargetMachine &TM = Fn.getTarget(); - - if (AFI->isThumb1OnlyFunction()) - return false; - - TRI = TM.getRegisterInfo(); - TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); - isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8(); - - bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; - Modified |= InsertMoves(MBB); - } - - return Modified; -} - -/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix -/// pass. -FunctionPass *llvm::createNEONMoveFixPass() { - return new NEONMoveFixPass(); -} diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 163a0a9..500e3de 100644 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -9,7 +9,7 @@ #include "ARM.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheARMTarget, llvm::TheThumbTarget; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index c258870..d848177 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -21,7 +21,7 @@ using namespace llvm; -bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { +bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -133,9 +133,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) - .setMIFlags(MachineInstr::FrameSetup); + .setMIFlags(MachineInstr::FrameSetup)); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. @@ -155,6 +155,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + // Thumb1 does not currently support dynamic stack realignment. Report a + // fatal error rather then silently generate bad code. + if (RegInfo->needsStackRealignment(MF)) + report_fatal_error("Dynamic stack realignment not supported for thumb1."); + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index 4eb0b6c..e8ed482 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "Thumb1RegisterInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -182,7 +181,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, int Opc = 0; int ExtraOpc = 0; bool NeedCC = false; - bool NeedPred = false; if (DestReg == BaseReg && BaseReg == ARM::SP) { assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); @@ -217,7 +215,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } else { Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; NumBits = 8; - NeedPred = NeedCC = true; + NeedCC = true; } isTwoAddr = true; } @@ -241,7 +239,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) + .setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) @@ -262,18 +261,15 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(DestReg).addImm(ThisVal); - if (NeedPred) - MIB = AddDefaultPred(MIB); + MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); - } - else { + } else { bool isKill = BaseReg != ARM::SP; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - if (NeedPred) - MIB = AddDefaultPred(MIB); + MIB = AddDefaultPred(MIB); MIB.setMIFlags(MIFlags); BaseReg = DestReg; @@ -285,7 +281,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Scale = 1; Chunk = ((1 << NumBits) - 1) * Scale; Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NeedPred = NeedCC = isTwoAddr = true; + NeedCC = isTwoAddr = true; } } } @@ -405,7 +401,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned Scale = 1; if (FrameReg != ARM::SP) { Opcode = ARM::tADDi3; - MI.setDesc(TII.get(Opcode)); NumBits = 3; } else { NumBits = 8; @@ -419,10 +414,9 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset and add predicate operands. + // Remove offset MI.RemoveOperand(FrameRegIdx+1); MachineInstrBuilder MIB(&MI); - AddDefaultPred(MIB); return true; } @@ -431,6 +425,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp if (Opcode == ARM::tADDi3) { + MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) @@ -459,6 +454,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) if (Opcode == ARM::tADDi3) { + MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); @@ -479,10 +475,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.setDesc(TII.get(ARM::tADDhirr)); MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); - if (Opcode == ARM::tADDi3) { - MachineInstrBuilder MIB(&MI); - AddDefaultPred(MIB); - } } return true; } else { @@ -545,9 +537,9 @@ Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - bool Done = false; - Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); + bool Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); assert (Done && "Unable to resolve frame index!"); + (void)Done; } /// saveScavengerRegister - Spill the register so it can be used by the diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 360ec00..b627400 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -124,6 +124,27 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, if (Uses.count(DstReg) || Defs.count(SrcReg)) return false; + // If the CPSR is defined by this copy, then we don't want to move it. E.g., + // if we have: + // + // movs r1, r1 + // rsb r1, 0 + // movs r2, r2 + // rsb r2, 0 + // + // we don't want this to be converted to: + // + // movs r1, r1 + // movs r2, r2 + // itt mi + // rsb r1, 0 + // rsb r2, 0 + // + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef() && + MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) + return false; + // Then peek at the next instruction to see if it's predicated on CC or OCC. // If not, then there is nothing to be gained by moving the copy. MachineBasicBlock::iterator I = MI; ++I; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 51b56aa..cf040c82 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -14,9 +14,9 @@ #include "Thumb2InstrInfo.h" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -122,7 +122,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || + RC == ARM::GPRnopcRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -149,7 +150,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || + RC == ARM::GPRnopcRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -233,9 +235,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - // FIXME: Fix Thumb1 immediate encoding. - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags)); NumBytes = 0; continue; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index c741a6e..89a155c 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -9,11 +9,11 @@ #define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -97,11 +97,11 @@ namespace { { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. @@ -507,6 +507,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. + AddDefaultPred(MIB); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); @@ -546,6 +547,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, } case ARM::t2RSBri: case ARM::t2RSBSri: + case ARM::t2SXTB: + case ARM::t2SXTH: + case ARM::t2UXTB: + case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; @@ -742,7 +747,11 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; if ((MCID.getOpcode() == ARM::t2RSBSri || - MCID.getOpcode() == ARM::t2RSBri) && i == 2) + MCID.getOpcode() == ARM::t2RSBri || + MCID.getOpcode() == ARM::t2SXTB || + MCID.getOpcode() == ARM::t2SXTH || + MCID.getOpcode() == ARM::t2UXTB || + MCID.getOpcode() == ARM::t2UXTH) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); diff --git a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp index 46ae286..5dce06a 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp @@ -26,8 +26,8 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 7b91fea..f877c65 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -80,7 +80,7 @@ namespace { // Otherwise we don't know that the it's okay to zapnot this entire // byte. Only do this iff we can prove that the missing bits are // already null, so the bytezap doesn't need to really null them. - BitsToCheck |= ~Constant & (0xFF << 8*i); + BitsToCheck |= ~Constant & (0xFFULL << 8*i); } } } @@ -114,9 +114,8 @@ namespace { if (!x) return 0; unsigned at = CountLeadingZeros_64(x); uint64_t complow = 1ULL << (63 - at); - uint64_t comphigh = 1ULL << (64 - at); - //cerr << x << ":" << complow << ":" << comphigh << "\n"; - if (abs64(complow - x) <= abs64(comphigh - x)) + uint64_t comphigh = complow << 1; + if (x - complow <= comphigh - x) return complow; else return comphigh; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index de003fb..3057eb8 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -49,6 +49,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) // Set up the TargetLowering object. //I am having problems with shr n i8 1 setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); @@ -153,6 +154,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setStackPointerRegisterToSaveRestore(Alpha::R30); setJumpBufSize(272); @@ -160,10 +164,12 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setMinFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); } -MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const { +EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i64; } diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h index 13383f4..80f8efa 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h @@ -66,7 +66,7 @@ namespace llvm { virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } /// getSetCCResultType - Get the SETCC result ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp index 4dcec8f..8df2ed7 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -16,7 +16,6 @@ #include "AlphaMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td index b201712..c8c9377 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td +++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td @@ -607,6 +607,8 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)), def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), (MB)>; +def : Pat<(atomic_fence (imm), (imm)), (MB)>; + //Basic Floating point ops //Floats diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp index df8f157..8b6230f 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -40,8 +39,7 @@ using namespace llvm; AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(), - TII(tii) { + : AlphaGenRegisterInfo(Alpha::R26), TII(tii) { } static long getUpper16(long l) { @@ -178,10 +176,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned AlphaRegisterInfo::getRARegister() const { - return Alpha::R26; -} - unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -198,16 +192,6 @@ unsigned AlphaRegisterInfo::getEHHandlerRegister() const { return 0; } -int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - -int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - std::string AlphaRegisterInfo::getPrettyName(unsigned reg) { std::string s(AlphaRegDesc[reg].Name); diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h index 1072bf7..e35be27 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h @@ -42,16 +42,12 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - static std::string getPrettyName(unsigned reg); }; diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp index 624a5e2..bd55ce9 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaSubtarget.cpp @@ -13,7 +13,6 @@ #include "AlphaSubtarget.h" #include "Alpha.h" -#include "llvm/Target/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp index 3b65d41..fc9a677 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -14,7 +14,7 @@ #include "AlphaTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeAlphaTarget() { @@ -22,19 +22,17 @@ extern "C" void LLVMInitializeAlphaTarget() { RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget); } -AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout("e-f128:128:128-n64"), FrameLowering(Subtarget), Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this) { - setRelocationModel(Reloc::PIC_); } - //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h index cf00e58..48bb948 100644 --- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h +++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h @@ -36,8 +36,9 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaSelectionDAGInfo TSInfo; public: - AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + AlphaTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp index 562052b..4ad021c 100644 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "AlphaMCTargetDesc.h" #include "AlphaMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "AlphaGenInstrInfo.inc" @@ -36,8 +37,10 @@ static MCInstrInfo *createAlphaMCInstrInfo() { return X; } -extern "C" void LLVMInitializeAlphaMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); +static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAlphaMCRegisterInfo(X, Alpha::R26); + return X; } static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,11 +50,29 @@ static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeAlphaMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, - createAlphaMCSubtargetInfo); +static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(Reloc::PIC_, CM); + return X; } -extern "C" void LLVMInitializeAlphaMCAsmInfo() { +// Force static initialization. +extern "C" void LLVMInitializeAlphaTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget, + createAlphaMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, + createAlphaMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index ad0dd26..0000000 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMAlphaDesc - AlphaMCTargetDesc.cpp - AlphaMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile deleted file mode 100644 index d55175f..0000000 --- a/contrib/llvm/lib/Target/Alpha/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp index f7099b9..bdc69e7 100644 --- a/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Alpha.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; llvm::Target llvm::TheAlphaTarget; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp index 6ba258b..ed9844e 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp @@ -29,9 +29,9 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h index 726fa2c..169aa8e 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHA_FRAMEINFO_H -#define ALPHA_FRAMEINFO_H +#ifndef BLACKFIN_FRAMEINFO_H +#define BLACKFIN_FRAMEINFO_H #include "Blackfin.h" #include "BlackfinSubtarget.h" diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp index d572832..7d4c45f 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -42,6 +42,7 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); @@ -99,6 +100,7 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) // Blackfin has no intrinsics for these particular operations. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -134,7 +136,7 @@ const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const { +EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const { // SETCC always sets the CC register. Technically that is an i1 register, but // that type is not legal, so we treat it as an i32 register. return MVT::i32; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h index b65775b..90908ba 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h @@ -33,7 +33,7 @@ namespace llvm { public: BlackfinTargetLowering(TargetMachine &TM); virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp index d190ae7..c06a919 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -16,10 +16,10 @@ #include "Blackfin.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "BlackfinGenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index ae8ee9e..7135676 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -34,7 +34,7 @@ namespace bfinIntrinsic { } -std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, +std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys, unsigned numTys) const { static const char *const names[] = { #define GET_INTRINSIC_NAME_TABLE @@ -81,8 +81,8 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { #include "BlackfinGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES -static const FunctionType *getType(LLVMContext &Context, unsigned id) { - const Type *ResultTy = NULL; +static FunctionType *getType(LLVMContext &Context, unsigned id) { + Type *ResultTy = NULL; std::vector<Type*> ArgTys; bool IsVarArg = false; @@ -94,7 +94,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) { } Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - const Type **Tys, + Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded"); AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID); diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h index 7c4b5a9..f05db5a 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinIntrinsicInfo.h @@ -19,11 +19,11 @@ namespace llvm { class BlackfinIntrinsicInfo : public TargetIntrinsicInfo { public: - std::string getName(unsigned IntrID, const Type **Tys = 0, + std::string getName(unsigned IntrID, Type **Tys = 0, unsigned numTys = 0) const; unsigned lookupName(const char *Name, unsigned Len) const; bool isOverloaded(unsigned IID) const; - Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0, unsigned numTys = 0) const; }; diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 3a7c104..0d415c5 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -37,7 +36,7 @@ using namespace llvm; BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii) - : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {} + : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {} const unsigned* BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -327,10 +326,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned BlackfinRegisterInfo::getRARegister() const { - return BF::RETS; -} - unsigned BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -347,14 +342,3 @@ unsigned BlackfinRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} - -int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, - bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; -} diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h index 86f45c1..6ac22af 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -53,15 +53,11 @@ namespace llvm { int SPAdj, RegScavenger *RS = NULL) const; unsigned getFrameRegister(const MachineFunction &MF) const; - unsigned getRARegister() const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - // Utility functions void adjustRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp index ec919cd..0bdce09 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinSubtarget.cpp @@ -13,7 +13,7 @@ #include "BlackfinSubtarget.h" #include "Blackfin.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp index a1c9f1c..a4ae46b 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -13,7 +13,7 @@ #include "BlackfinTargetMachine.h" #include "Blackfin.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -22,10 +22,12 @@ extern "C" void LLVMInitializeBlackfinTarget() { } BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, + StringRef CPU, + StringRef FS, + Reloc::Model RM, + CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, CPU, FS), TLInfo(*this), diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h index bd7dc84..c85337fe2 100644 --- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -35,8 +35,9 @@ namespace llvm { BlackfinFrameLowering FrameLowering; BlackfinIntrinsicInfo IntrinsicInfo; public: - BlackfinTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + BlackfinTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp index 0fa1471..272e3c2 100644 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "BlackfinMCTargetDesc.h" #include "BlackfinMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "BlackfinGenInstrInfo.inc" @@ -36,12 +37,12 @@ static MCInstrInfo *createBlackfinMCInstrInfo() { return X; } -extern "C" void LLVMInitializeBlackfinMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, - createBlackfinMCInstrInfo); +static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitBlackfinMCRegisterInfo(X, BF::RETS); + return X; } - static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { @@ -50,11 +51,31 @@ static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, return X; } -extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, - createBlackfinMCSubtargetInfo); +static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeBlackfinMCAsmInfo() { +// Force static initialization. +extern "C" void LLVMInitializeBlackfinTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget, + createBlackfinMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, + createBlackfinMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget, + createBlackfinMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, + createBlackfinMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 8cd924f..0000000 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMBlackfinDesc - BlackfinMCTargetDesc.cpp - BlackfinMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile deleted file mode 100644 index 6b26101..0000000 --- a/contrib/llvm/lib/Target/Blackfin/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp index 402e0af..57f1d3e 100644 --- a/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Blackfin.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp index 415beb1..69d8c46 100644 --- a/contrib/llvm/lib/Target/CBackend/CBackend.cpp +++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp @@ -37,10 +37,11 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" @@ -48,6 +49,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Host.h" #include "llvm/Config/config.h" #include <algorithm> @@ -62,12 +64,6 @@ extern "C" void LLVMInitializeCBackendTarget() { RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); } -extern "C" void LLVMInitializeCBackendMCAsmInfo() {} - -extern "C" void LLVMInitializeCBackendMCInstrInfo() {} - -extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {} - namespace { class CBEMCAsmInfo : public MCAsmInfo { public: @@ -86,6 +82,8 @@ namespace { LoopInfo *LI; const Module *TheModule; const MCAsmInfo* TAsm; + const MCRegisterInfo *MRI; + const MCObjectFileInfo *MOFI; MCContext *TCtx; const TargetData* TD; @@ -99,14 +97,14 @@ namespace { /// UnnamedStructIDs - This contains a unique ID for each struct that is /// either anonymous or has no name. - DenseMap<const StructType*, unsigned> UnnamedStructIDs; + DenseMap<StructType*, unsigned> UnnamedStructIDs; public: static char ID; explicit CWriter(formatted_raw_ostream &o) : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), - NextAnonValueNumber(0) { + TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0), + OpaqueCounter(0), NextAnonValueNumber(0) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); FPCounter = 0; } @@ -145,6 +143,8 @@ namespace { delete Mang; delete TCtx; delete TAsm; + delete MRI; + delete MOFI; FPConstantMap.clear(); ByValParams.clear(); intrinsicPrototypesAlreadyGenerated.clear(); @@ -152,20 +152,20 @@ namespace { return false; } - raw_ostream &printType(raw_ostream &Out, const Type *Ty, + raw_ostream &printType(raw_ostream &Out, Type *Ty, bool isSigned = false, const std::string &VariableName = "", bool IgnoreName = false, const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, + raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar = ""); void printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, - const PointerType *Ty); + PointerType *Ty); - std::string getStructName(const StructType *ST); + std::string getStructName(StructType *ST); /// writeOperandDeref - Print the result of dereferencing the specified /// operand with '*'. This is equivalent to printing '*' then using @@ -188,7 +188,7 @@ namespace { void writeOperandWithCast(Value* Operand, const ICmpInst &I); bool writeInstructionCast(const Instruction &I); - void writeMemoryAccess(Value *Operand, const Type *OperandType, + void writeMemoryAccess(Value *Operand, Type *OperandType, bool IsVolatile, unsigned Alignment); private : @@ -200,7 +200,7 @@ namespace { void printIntrinsicDefinition(const Function &F, raw_ostream &Out); void printModuleTypes(); - void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &); + void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &); void printFloatingPointConstants(Function &F); void printFloatingPointConstants(const Constant *C); void printFunctionSignature(const Function *F, bool Prototype); @@ -209,7 +209,7 @@ namespace { void printBasicBlock(BasicBlock *BB); void printLoop(Loop *L); - void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy); + void printCast(unsigned opcode, Type *SrcTy, Type *DstTy); void printConstant(Constant *CPV, bool Static); void printConstantWithCast(Constant *CPV, unsigned Opcode); bool printConstExprCast(const ConstantExpr *CE, bool Static); @@ -288,10 +288,12 @@ namespace { void visitInvokeInst(InvokeInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } - void visitUnwindInst(UnwindInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } + void visitResumeInst(ResumeInst &I) { + llvm_unreachable("DwarfEHPrepare pass didn't work!"); + } void visitUnreachableInst(UnreachableInst &I); void visitPHINode(PHINode &I); @@ -360,8 +362,8 @@ static std::string CBEMangle(const std::string &S) { return Result; } -std::string CWriter::getStructName(const StructType *ST) { - if (!ST->isAnonymous() && !ST->getName().empty()) +std::string CWriter::getStructName(StructType *ST) { + if (!ST->isLiteral() && !ST->getName().empty()) return CBEMangle("l_"+ST->getName().str()); return "l_unnamed_" + utostr(UnnamedStructIDs[ST]); @@ -373,20 +375,20 @@ std::string CWriter::getStructName(const StructType *ST) { /// print it as "Struct (*)(...)", for struct return functions. void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, - const PointerType *TheTy) { - const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); + PointerType *TheTy) { + FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); std::string tstr; raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (*) ("; bool PrintedType = false; FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - const Type *RetTy = cast<PointerType>(*I)->getElementType(); + Type *RetTy = cast<PointerType>(*I)->getElementType(); unsigned Idx = 1; for (++I, ++Idx; I != E; ++I, ++Idx) { if (PrintedType) FunctionInnards << ", "; - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -408,7 +410,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, } raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, +CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar) { assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && "Invalid type for printSimpleType"); @@ -444,7 +446,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, " __attribute__((vector_size(64))) " + NameSoFar); case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); + VectorType *VTy = cast<VectorType>(Ty); return printSimpleType(Out, VTy->getElementType(), isSigned, " __attribute__((vector_size(" + utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); @@ -461,7 +463,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, +raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { @@ -471,14 +473,14 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, switch (Ty->getTypeID()) { case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); + FunctionType *FTy = cast<FunctionType>(Ty); std::string tstr; raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (" << NameSoFar << ") ("; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) { - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -502,7 +504,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, return Out; } case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); + StructType *STy = cast<StructType>(Ty); // Check to see if the type is named. if (!IgnoreName) @@ -523,7 +525,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, } case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); + PointerType *PTy = cast<PointerType>(Ty); std::string ptrName = "*" + NameSoFar; if (PTy->getElementType()->isArrayTy() || @@ -537,7 +539,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, } case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); unsigned NumElements = ATy->getNumElements(); if (NumElements == 0) NumElements = 1; // Arrays are wrapped in structs to allow them to have normal @@ -560,7 +562,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. // - const Type *ETy = CPA->getType()->getElementType(); + Type *ETy = CPA->getType()->getElementType(); bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) || ETy == Type::getInt8Ty(CPA->getContext())); @@ -682,7 +684,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) { /// Print out the casting for a cast operation. This does the double casting /// necessary for conversion to the destination type, if necessary. /// @brief Print a cast -void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { +void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { // Print the destination type cast switch (opc) { case Instruction::UIToFP: @@ -917,7 +919,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { - const Type* Ty = CI->getType(); + Type* Ty = CI->getType(); if (Ty == Type::getInt1Ty(CPV->getContext())) Out << (CI->getZExtValue() ? '1' : '0'); else if (Ty == Type::getInt32Ty(CPV->getContext())) @@ -1027,7 +1029,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { printConstantArray(CA, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - const ArrayType *AT = cast<ArrayType>(CPV->getType()); + ArrayType *AT = cast<ArrayType>(CPV->getType()); Out << '{'; if (AT->getNumElements()) { Out << ' '; @@ -1054,7 +1056,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { printConstantVector(CV, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - const VectorType *VT = cast<VectorType>(CPV->getType()); + VectorType *VT = cast<VectorType>(CPV->getType()); Out << "{ "; Constant *CZ = Constant::getNullValue(VT->getElementType()); printConstant(CZ, Static); @@ -1074,7 +1076,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ")"; } if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { - const StructType *ST = cast<StructType>(CPV->getType()); + StructType *ST = cast<StructType>(CPV->getType()); Out << '{'; if (ST->getNumElements()) { Out << ' '; @@ -1123,7 +1125,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { // care of detecting that case and printing the cast for the ConstantExpr. bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { bool NeedsExplicitCast = false; - const Type *Ty = CE->getOperand(0)->getType(); + Type *Ty = CE->getOperand(0)->getType(); bool TypeIsSigned = false; switch (CE->getOpcode()) { case Instruction::Add: @@ -1175,7 +1177,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { // Extract the operand's type, we'll need it. - const Type* OpTy = CPV->getType(); + Type* OpTy = CPV->getType(); // Indicate whether to do the cast or not. bool shouldCast = false; @@ -1267,7 +1269,7 @@ std::string CWriter::GetValueName(const Value *Operand) { void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && @@ -1330,7 +1332,7 @@ void CWriter::writeOperand(Value *Operand, bool Static) { // This function takes care of detecting that case and printing the cast // for the Instruction. bool CWriter::writeInstructionCast(const Instruction &I) { - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); switch (I.getOpcode()) { case Instruction::Add: case Instruction::Sub: @@ -1362,7 +1364,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) { void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { // Extract the operand's type, we'll need it. - const Type* OpTy = Operand->getType(); + Type* OpTy = Operand->getType(); // Indicate whether to do the cast or not. bool shouldCast = false; @@ -1430,7 +1432,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { bool castIsSigned = Cmp.isSigned(); // If the operand was a pointer, convert to a large integer type. - const Type* OpTy = Operand->getType(); + Type* OpTy = Operand->getType(); if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); @@ -1665,7 +1667,8 @@ bool CWriter::doInitialization(Module &M) { TAsm = Match->createMCAsmInfo(Triple); #endif TAsm = new CBEMCAsmInfo(); - TCtx = new MCContext(*TAsm, NULL); + MRI = new MCRegisterInfo(); + TCtx = new MCContext(*TAsm, *MRI, NULL); Mang = new Mangler(*TCtx, *TD); // Keep track of which functions are static ctors/dtors so they can have @@ -2049,7 +2052,7 @@ void CWriter::printModuleTypes() { for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *ST = StructTypes[i]; - if (ST->isAnonymous() || ST->getName().empty()) + if (ST->isLiteral() || ST->getName().empty()) UnnamedStructIDs[ST] = NextTypeID++; std::string Name = getStructName(ST); @@ -2060,7 +2063,7 @@ void CWriter::printModuleTypes() { Out << '\n'; // Keep track of which structures have been printed so far. - SmallPtrSet<const Type *, 16> StructPrinted; + SmallPtrSet<Type *, 16> StructPrinted; // Loop over all structures then push them into the stack so they are // printed in the correct order. @@ -2077,8 +2080,8 @@ void CWriter::printModuleTypes() { // // TODO: Make this work properly with vector types // -void CWriter::printContainedStructs(const Type *Ty, - SmallPtrSet<const Type *, 16> &StructPrinted) { +void CWriter::printContainedStructs(Type *Ty, + SmallPtrSet<Type *, 16> &StructPrinted) { // Don't walk through pointers. if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) return; @@ -2088,7 +2091,7 @@ void CWriter::printContainedStructs(const Type *Ty, E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - if (const StructType *ST = dyn_cast<StructType>(Ty)) { + if (StructType *ST = dyn_cast<StructType>(Ty)) { // Check to see if we have already printed this struct. if (!StructPrinted.insert(Ty)) return; @@ -2120,7 +2123,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { } // Loop over the arguments, printing them... - const FunctionType *FT = cast<FunctionType>(F->getFunctionType()); + FunctionType *FT = cast<FunctionType>(F->getFunctionType()); const AttrListPtr &PAL = F->getAttributes(); std::string tstr; @@ -2150,7 +2153,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { ArgName = GetValueName(I); else ArgName = ""; - const Type *ArgTy = I->getType(); + Type *ArgTy = I->getType(); if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { ArgTy = cast<PointerType>(ArgTy)->getElementType(); ByValParams.insert(I); @@ -2177,7 +2180,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { for (; I != E; ++I) { if (PrintedArg) FunctionInnards << ", "; - const Type *ArgTy = *I; + Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); @@ -2205,7 +2208,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { FunctionInnards << ')'; // Get the return tpe for the function. - const Type *RetTy; + Type *RetTy; if (!isStructReturn) RetTy = F->getReturnType(); else { @@ -2222,8 +2225,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { static inline bool isFPIntBitCast(const Instruction &I) { if (!isa<BitCastInst>(I)) return false; - const Type *SrcTy = I.getOperand(0)->getType(); - const Type *DstTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); + Type *DstTy = I.getType(); return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } @@ -2237,7 +2240,7 @@ void CWriter::printFunction(Function &F) { // If this is a struct return function, handle the result with magic. if (isStructReturn) { - const Type *StructTy = + Type *StructTy = cast<PointerType>(F.arg_begin()->getType())->getElementType(); Out << " "; printType(Out, StructTy, false, "StructReturn"); @@ -2380,22 +2383,29 @@ void CWriter::visitReturnInst(ReturnInst &I) { void CWriter::visitSwitchInst(SwitchInst &SI) { + Value* Cond = SI.getCondition(); + Out << " switch ("; - writeOperand(SI.getOperand(0)); + writeOperand(Cond); Out << ") {\n default:\n"; printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); Out << ";\n"; - for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) { + + unsigned NumCases = SI.getNumCases(); + // Skip the first item since that's the default case. + for (unsigned i = 1; i < NumCases; ++i) { + ConstantInt* CaseVal = SI.getCaseValue(i); + BasicBlock* Succ = SI.getSuccessor(i); Out << " case "; - writeOperand(SI.getOperand(i)); + writeOperand(CaseVal); Out << ":\n"; - BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1)); printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } + Out << " }\n"; } @@ -2656,7 +2666,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) { Out << ")"; } -static const char * getFloatBitCastField(const Type *Ty) { +static const char * getFloatBitCastField(Type *Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Invalid Type"); case Type::FloatTyID: return "Float"; @@ -2672,8 +2682,8 @@ static const char * getFloatBitCastField(const Type *Ty) { } void CWriter::visitCastInst(CastInst &I) { - const Type *DstTy = I.getType(); - const Type *SrcTy = I.getOperand(0)->getType(); + Type *DstTy = I.getType(); + Type *SrcTy = I.getOperand(0)->getType(); if (isFPIntBitCast(I)) { Out << '('; // These int<->float and long<->double casts need to be handled specially @@ -2719,7 +2729,7 @@ void CWriter::visitSelectInst(SelectInst &I) { // Returns the macro name or value of the max or min of an integer type // (as defined in limits.h). -static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax, +static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax, raw_ostream &Out) { const char* type; const char* sprefix = ""; @@ -2745,16 +2755,16 @@ static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax, } #ifndef NDEBUG -static bool isSupportedIntegerSize(const IntegerType &T) { +static bool isSupportedIntegerSize(IntegerType &T) { return T.getBitWidth() == 8 || T.getBitWidth() == 16 || T.getBitWidth() == 32 || T.getBitWidth() == 64; } #endif void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) { - const FunctionType *funT = F.getFunctionType(); - const Type *retT = F.getReturnType(); - const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); + FunctionType *funT = F.getFunctionType(); + Type *retT = F.getReturnType(); + IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); assert(isSupportedIntegerSize(*elemT) && "CBackend does not support arbitrary size integers."); @@ -2829,7 +2839,6 @@ void CWriter::lowerIntrinsics(Function &F) { if (Function *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { case Intrinsic::not_intrinsic: - case Intrinsic::memory_barrier: case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: @@ -2908,8 +2917,8 @@ void CWriter::visitCallInst(CallInst &I) { Value *Callee = I.getCalledValue(); - const PointerType *PTy = cast<PointerType>(Callee->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + PointerType *PTy = cast<PointerType>(Callee->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); // If this is a call to a struct-return function, assign to the first // parameter instead of passing it to the call. @@ -3020,9 +3029,6 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, WroteCallee = true; return false; } - case Intrinsic::memory_barrier: - Out << "__sync_synchronize()"; - return true; case Intrinsic::vastart: Out << "0; "; @@ -3217,7 +3223,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { std::vector<std::pair<Value*, int> > ResultVals; if (CI.getType() == Type::getVoidTy(CI.getContext())) ; - else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) { + else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) ResultVals.push_back(std::make_pair(&CI, (int)i)); } else { @@ -3348,7 +3354,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, // Find out if the last index is into a vector. If so, we have to print this // specially. Since vectors can't have elements of indexable type, only the // last index could possibly be of a vector element. - const VectorType *LastIndexIsVector = 0; + VectorType *LastIndexIsVector = 0; { for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) LastIndexIsVector = dyn_cast<VectorType>(*TmpI); @@ -3421,7 +3427,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, Out << ")"; } -void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType, +void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, bool IsVolatile, unsigned Alignment) { bool IsUnaligned = Alignment && @@ -3463,7 +3469,7 @@ void CWriter::visitStoreInst(StoreInst &I) { Out << " = "; Value *Operand = I.getOperand(0); Constant *BitMask = 0; - if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) + if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) if (!ITy->isPowerOf2ByteWidth()) // We have a bit width that doesn't match an even power-of-2 byte // size. Consequently we must & the value with the type's bit mask @@ -3492,7 +3498,7 @@ void CWriter::visitVAArgInst(VAArgInst &I) { } void CWriter::visitInsertElementInst(InsertElementInst &I) { - const Type *EltTy = I.getType()->getElementType(); + Type *EltTy = I.getType()->getElementType(); writeOperand(I.getOperand(0)); Out << ";\n "; Out << "(("; @@ -3507,7 +3513,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) { void CWriter::visitExtractElementInst(ExtractElementInst &I) { // We know that our operand is not inlined. Out << "(("; - const Type *EltTy = + Type *EltTy = cast<VectorType>(I.getOperand(0)->getType())->getElementType(); printType(Out, PointerType::getUnqual(EltTy)); Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; @@ -3519,9 +3525,9 @@ void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Out << "("; printType(Out, SVI.getType()); Out << "){ "; - const VectorType *VT = SVI.getType(); + VectorType *VT = SVI.getType(); unsigned NumElts = VT->getNumElements(); - const Type *EltTy = VT->getElementType(); + Type *EltTy = VT->getElementType(); for (unsigned i = 0; i != NumElts; ++i) { if (i) Out << ", "; @@ -3557,9 +3563,9 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) { Out << GetValueName(&IVI); for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); i != e; ++i) { - const Type *IndexedTy = + Type *IndexedTy = ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - ArrayRef<unsigned>(b, i+1)); + makeArrayRef(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else @@ -3579,9 +3585,9 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { Out << GetValueName(EVI.getOperand(0)); for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); i != e; ++i) { - const Type *IndexedTy = + Type *IndexedTy = ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - ArrayRef<unsigned>(b, i+1)); + makeArrayRef(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else diff --git a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h index e64216b..4f1ca97 100644 --- a/contrib/llvm/lib/Target/CBackend/CTargetMachine.h +++ b/contrib/llvm/lib/Target/CBackend/CTargetMachine.h @@ -20,8 +20,9 @@ namespace llvm { struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) + CTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp index f7e8ff2..e8274ff 100644 --- a/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp @@ -9,7 +9,7 @@ #include "CTargetMachine.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCBackendTarget; @@ -17,3 +17,5 @@ Target llvm::TheCBackendTarget; extern "C" void LLVMInitializeCBackendTargetInfo() { RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); } + +extern "C" void LLVMInitializeCBackendTargetMC() {} diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 85fb258..0000000 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMCellSPUDesc - SPUMCTargetDesc.cpp - SPUMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile deleted file mode 100644 index 10d9a42..0000000 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp index 26c5a4b..d5af2a8 100644 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "SPUMCTargetDesc.h" #include "SPUMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SPUGenInstrInfo.inc" @@ -35,8 +37,10 @@ static MCInstrInfo *createSPUMCInstrInfo() { return X; } -extern "C" void LLVMInitializeCellSPUMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); +static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSPUMCRegisterInfo(X, SPU::R0); + return X; } static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,11 +50,43 @@ static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, - createSPUMCSubtargetInfo); +static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM); + return X; } -extern "C" void LLVMInitializeCellSPUMCAsmInfo() { - RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget); +// Force static initialization. +extern "C" void LLVMInitializeCellSPUTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget, + createSPUMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget, + createCellSPUMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, + createSPUMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h index c5c037d..a3717b0 100644 --- a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file provides Alpha specific target descriptions. +// This file provides CellSPU specific target descriptions. // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp index fd96694..90b5270 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -29,10 +29,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp index a3e7e73..093f99f 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -181,18 +181,6 @@ void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { MachineLocation FPSrc(MachineLocation::VirtualFP); Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); } - } else { - // This is a leaf function -- insert a branch hint iff there are - // sufficient number instructions in the basic block. Note that - // this is just a best guess based on the basic block's size. - if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - dl = MBBI->getDebugLoc(); - - // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) - .addSym(MMI.getContext().CreateTempSymbol()); - } } } @@ -249,14 +237,6 @@ void SPUFrameLowering::emitEpilogue(MachineFunction &MF, } } -void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(SPU::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const{ // Mark LR and SP unused, since the prolog spills them to stack and diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h index 4fee72d..b837f2c 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h @@ -43,9 +43,6 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - //! Perform target-specific stack frame setup. - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - //! Return a function's saved spill slots /*! For CellSPU, a function's saved spill slots is just the link register. @@ -77,17 +74,6 @@ namespace llvm { static int FItoStackOffset(int frame_index) { return frame_index * stackSlotSize(); } - //! Number of instructions required to overcome hint-for-branch latency - /*! - HBR (hint-for-branch) instructions can be inserted when, for example, - we know that a given function is going to be called, such as printf(), - in the control flow graph. HBRs are only inserted if a sufficient number - of instructions occurs between the HBR and the target. Currently, HBRs - take 6 cycles, ergo, the magic number 6. - */ - static int branchHintPenalty() { - return 6; - } }; } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp index f0ceee2..ac33111 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -69,7 +69,7 @@ namespace { TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { EVT ArgVT = Op.getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; @@ -80,7 +80,7 @@ namespace { TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = + Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -174,6 +174,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); // SPU has no division/remainder instructions setOperationAction(ISD::SREM, MVT::i8, Expand); @@ -401,6 +402,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Set operation actions to legal types only. + if (!isTypeLegal(VT)) continue; + // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); @@ -438,6 +442,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? setStackPointerRegisterToSaveRestore(SPU::R1); @@ -497,7 +502,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const // Return the Cell SPU's SETCC result type //===----------------------------------------------------------------------===// -MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { +EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { // i8, i16 and i32 are valid SETCC result types MVT::SimpleValueType retval; @@ -2727,6 +2732,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) // the type to extend from needs to be i64 or i32. assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + (void)OpVT; // Create shuffle mask unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 @@ -3216,7 +3222,7 @@ SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode. bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, - const Type *Ty) const { + Type *Ty) const { // SPU's addresses are 256K: return (V > -(1 << 18) && V < (1 << 18) - 1); } @@ -3239,7 +3245,7 @@ bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { bool SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type * ) const{ + Type * ) const{ // A-form: 18bit absolute address. if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h index d23f6cc..aa4a168 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h @@ -107,7 +107,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ValueType for ISD::SETCC - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } @@ -147,7 +147,7 @@ namespace llvm { /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode. - virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; virtual bool isLegalAddressImmediate(GlobalValue *) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -179,7 +179,7 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; virtual bool isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const; + Type *Ty) const; }; } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp index e67b10c..007bc0e 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -17,9 +17,9 @@ #include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR @@ -290,6 +290,8 @@ static void removeHBR( MachineBasicBlock &MBB) { if (I->getOpcode() == SPU::HBRA || I->getOpcode() == SPU::HBR_LABEL){ I=MBB.erase(I); + if (I == MBB.end()) + break; } } } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td index e103c9b..f76ebd7 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td @@ -1594,8 +1594,8 @@ multiclass BitwiseOrImm { def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>; - def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val), - [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>; + def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; // i16i32: hacked version of the ori instruction to extend 16-bit quantities // to 32-bit quantities. used exclusively to match "anyext" conversions (vide @@ -3467,8 +3467,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { [/* no pattern */]>; // Indirect branch - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + let isIndirectBranch = 1 in { + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + } } // Conditional branches: diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp index 19896c0..bbac6fd 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" @@ -187,7 +186,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii) : - SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii) + SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii) { } @@ -311,28 +310,12 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } unsigned -SPURegisterInfo::getRARegister() const -{ - return SPU::R0; -} - -unsigned SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SPU::R1; } int -SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int SPURegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - return SPUGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); -} - -int SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const { switch(dFormOpcode) diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h index 5e014f8..b7818a4 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h @@ -74,8 +74,6 @@ namespace llvm { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - //! Get return address register (LR, aka R0) - unsigned getRARegister() const; //! Get the stack frame register (SP, aka R1) unsigned getFrameRegister(const MachineFunction &MF) const; @@ -83,10 +81,6 @@ namespace llvm { // New methods added: //------------------------------------------------------------------------ - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - //! Convert D-form load/store to X-form load/store /*! Converts a regiser displacement load/store into a register-indexed diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp index 856dc82..43335ab 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp @@ -14,7 +14,7 @@ #include "SPUSubtarget.h" #include "SPU.h" #include "SPURegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp index 3542a2b..93a7f6e 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -16,7 +16,8 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -31,9 +32,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { return &LR[0]; } -SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU,const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), @@ -41,9 +43,6 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - // For the time being, use static relocations, since there's really no - // support for PIC yet. - setRelocationModel(Reloc::Static); } //===----------------------------------------------------------------------===// @@ -59,8 +58,16 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, // passes to run just before printing the assembly bool SPUTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) -{ +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // load the TCE instruction scheduler, if available via + // loaded plugins + typedef llvm::FunctionPass* (*BuilderFunc)(const char*); + BuilderFunc schedulerCreator = + (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( + "createTCESchedulerPass"); + if (schedulerCreator != NULL) + PM.add(schedulerCreator("cellspu")); + //align instructions with nops/lnops for dual issue PM.add(createSPUNopFillerPass(*this)); return true; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h index d96f86d..fffe77c 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h @@ -38,8 +38,9 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: - SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp index 049ea23..84aadfa 100644 --- a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp @@ -9,7 +9,7 @@ #include "SPU.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCellSPUTarget; diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp index 10d18f6..ae0e3c4 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp +++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp @@ -29,7 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include <algorithm> @@ -77,22 +77,12 @@ extern "C" void LLVMInitializeCppBackendTarget() { RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget); } -extern "C" void LLVMInitializeCppBackendMCAsmInfo() {} - -extern "C" void LLVMInitializeCppBackendMCInstrInfo() { - RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget); -} - -extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() { - RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget); -} - namespace { - typedef std::vector<const Type*> TypeList; - typedef std::map<const Type*,std::string> TypeMap; + typedef std::vector<Type*> TypeList; + typedef std::map<Type*,std::string> TypeMap; typedef std::map<const Value*,std::string> ValueMap; typedef std::set<std::string> NameSet; - typedef std::set<const Type*> TypeSet; + typedef std::set<Type*> TypeSet; typedef std::set<const Value*> ValueSet; typedef std::map<const Value*,std::string> ForwardRefMap; @@ -143,14 +133,14 @@ namespace { void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); - std::string getCppName(const Type* val); - inline void printCppName(const Type* val); + std::string getCppName(Type* val); + inline void printCppName(Type* val); std::string getCppName(const Value* val); inline void printCppName(const Value* val); void printAttributes(const AttrListPtr &PAL, const std::string &name); - void printType(const Type* Ty); + void printType(Type* Ty); void printTypes(const Module* M); void printConstant(const Constant *CPV); @@ -164,7 +154,7 @@ namespace { void printFunctionHead(const Function *F); void printFunctionBody(const Function *F); void printInstruction(const Instruction *I, const std::string& bbname); - std::string getOpName(Value*); + std::string getOpName(const Value*); void printModuleBody(); }; @@ -184,7 +174,7 @@ static inline void sanitize(std::string &str) { str[i] = '_'; } -static std::string getTypePrefix(const Type *Ty) { +static std::string getTypePrefix(Type *Ty) { switch (Ty->getTypeID()) { case Type::VoidTyID: return "void_"; case Type::IntegerTyID: @@ -339,7 +329,7 @@ void CppWriter::printEscapedString(const std::string &Str) { } } -std::string CppWriter::getCppName(const Type* Ty) { +std::string CppWriter::getCppName(Type* Ty) { // First, handle the primitive types .. easy if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { switch (Ty->getTypeID()) { @@ -379,7 +369,7 @@ std::string CppWriter::getCppName(const Type* Ty) { // See if the type has a name in the symboltable and build accordingly std::string name; - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (STy->hasName()) name = STy->getName(); @@ -393,7 +383,7 @@ std::string CppWriter::getCppName(const Type* Ty) { return TypeNames[Ty] = name; } -void CppWriter::printCppName(const Type* Ty) { +void CppWriter::printCppName(Type* Ty) { printEscapedString(getCppName(Ty)); } @@ -480,6 +470,9 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(NoImplicitFloat); HANDLE_ATTR(Naked); HANDLE_ATTR(InlineHint); + HANDLE_ATTR(ReturnsTwice); + HANDLE_ATTR(UWTable); + HANDLE_ATTR(NonLazyBind); #undef HANDLE_ATTR if (attrs & Attribute::StackAlignment) Out << " | Attribute::constructStackAlignmentFromInt(" @@ -499,7 +492,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, } } -void CppWriter::printType(const Type* Ty) { +void CppWriter::printType(Type* Ty) { // We don't print definitions for primitive types if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return; @@ -514,13 +507,13 @@ void CppWriter::printType(const Type* Ty) { // Print the type definition switch (Ty->getTypeID()) { case Type::FunctionTyID: { - const FunctionType* FT = cast<FunctionType>(Ty); + FunctionType* FT = cast<FunctionType>(Ty); Out << "std::vector<Type*>" << typeName << "_args;"; nl(Out); FunctionType::param_iterator PI = FT->param_begin(); FunctionType::param_iterator PE = FT->param_end(); for (; PI != PE; ++PI) { - const Type* argTy = static_cast<const Type*>(*PI); + Type* argTy = static_cast<Type*>(*PI); printType(argTy); std::string argName(getCppName(argTy)); Out << typeName << "_args.push_back(" << argName; @@ -539,13 +532,21 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::StructTyID: { - const StructType* ST = cast<StructType>(Ty); - if (!ST->isAnonymous()) { - Out << "StructType *" << typeName << " = "; - Out << "StructType::createNamed(mod->getContext(), \""; + StructType* ST = cast<StructType>(Ty); + if (!ST->isLiteral()) { + Out << "StructType *" << typeName << " = mod->getTypeByName(\""; + printEscapedString(ST->getName()); + Out << "\");"; + nl(Out); + Out << "if (!" << typeName << ") {"; + nl(Out); + Out << typeName << " = "; + Out << "StructType::create(mod->getContext(), \""; printEscapedString(ST->getName()); Out << "\");"; nl(Out); + Out << "}"; + nl(Out); // Indicate that this type is now defined. DefinedTypes.insert(Ty); } @@ -555,7 +556,7 @@ void CppWriter::printType(const Type* Ty) { StructType::element_iterator EI = ST->element_begin(); StructType::element_iterator EE = ST->element_end(); for (; EI != EE; ++EI) { - const Type* fieldTy = static_cast<const Type*>(*EI); + Type* fieldTy = static_cast<Type*>(*EI); printType(fieldTy); std::string fieldName(getCppName(fieldTy)); Out << typeName << "_fields.push_back(" << fieldName; @@ -563,21 +564,27 @@ void CppWriter::printType(const Type* Ty) { nl(Out); } - if (ST->isAnonymous()) { + if (ST->isLiteral()) { Out << "StructType *" << typeName << " = "; Out << "StructType::get(" << "mod->getContext(), "; } else { + Out << "if (" << typeName << "->isOpaque()) {"; + nl(Out); Out << typeName << "->setBody("; } Out << typeName << "_fields, /*isPacked=*/" << (ST->isPacked() ? "true" : "false") << ");"; nl(Out); + if (!ST->isLiteral()) { + Out << "}"; + nl(Out); + } break; } case Type::ArrayTyID: { - const ArrayType* AT = cast<ArrayType>(Ty); - const Type* ET = AT->getElementType(); + ArrayType* AT = cast<ArrayType>(Ty); + Type* ET = AT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -589,8 +596,8 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::PointerTyID: { - const PointerType* PT = cast<PointerType>(Ty); - const Type* ET = PT->getElementType(); + PointerType* PT = cast<PointerType>(Ty); + Type* ET = PT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -602,8 +609,8 @@ void CppWriter::printType(const Type* Ty) { break; } case Type::VectorTyID: { - const VectorType* PT = cast<VectorType>(Ty); - const Type* ET = PT->getElementType(); + VectorType* PT = cast<VectorType>(Ty); + Type* ET = PT->getElementType(); printType(ET); if (DefinedTypes.find(Ty) == DefinedTypes.end()) { std::string elemName(getCppName(ET)); @@ -766,9 +773,7 @@ void CppWriter::printConstant(const Constant *CV) { Out << "Constant* " << constName << " = ConstantExpr::getGetElementPtr(" << getCppName(CE->getOperand(0)) << ", " - << "&" << constName << "_indices[0], " - << constName << "_indices.size()" - << ");"; + << constName << "_indices);"; } else if (CE->isCast()) { printConstant(CE->getOperand(0)); Out << "Constant* " << constName << " = ConstantExpr::getCast("; @@ -988,7 +993,7 @@ void CppWriter::printVariableBody(const GlobalVariable *GV) { } } -std::string CppWriter::getOpName(Value* V) { +std::string CppWriter::getOpName(const Value* V) { if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) return getCppName(V); @@ -1053,14 +1058,17 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::Switch: { const SwitchInst *SI = cast<SwitchInst>(I); Out << "SwitchInst* " << iName << " = SwitchInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " + << getOpName(SI->getCondition()) << ", " + << getOpName(SI->getDefaultDest()) << ", " << SI->getNumCases() << ", " << bbname << ");"; nl(Out); - for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { + unsigned NumCases = SI->getNumCases(); + for (unsigned i = 1; i < NumCases; ++i) { + const ConstantInt* CaseVal = SI->getCaseValue(i); + const BasicBlock* BB = SI->getSuccessor(i); Out << iName << "->addCase(" - << opNames[i] << ", " - << opNames[i+1] << ");"; + << getOpName(CaseVal) << ", " + << getOpName(BB) << ");"; nl(Out); } break; @@ -1076,6 +1084,11 @@ void CppWriter::printInstruction(const Instruction *I, } break; } + case Instruction::Resume: { + Out << "ResumeInst::Create(mod->getContext(), " << opNames[0] + << ", " << bbname << ");"; + break; + } case Instruction::Invoke: { const InvokeInst* inv = cast<InvokeInst>(I); Out << "std::vector<Value*> " << iName << "_params;"; @@ -1090,8 +1103,7 @@ void CppWriter::printInstruction(const Instruction *I, << getOpName(inv->getCalledFunction()) << ", " << getOpName(inv->getNormalDest()) << ", " << getOpName(inv->getUnwindDest()) << ", " - << iName << "_params.begin(), " - << iName << "_params.end(), \""; + << iName << "_params, \""; printEscapedString(inv->getName()); Out << "\", " << bbname << ");"; nl(Out) << iName << "->setCallingConv("; @@ -1252,8 +1264,7 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); } Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices.begin(), " - << iName << "_indices.end()"; + << opNames[0] << ", " << iName << "_indices"; } Out << ", \""; printEscapedString(gep->getName()); @@ -1304,7 +1315,7 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::PtrToInt: Out << "PtrToIntInst"; break; case Instruction::IntToPtr: Out << "IntToPtrInst"; break; case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(!"Unreachable"); break; + default: assert(0 && "Unreachable"); break; } Out << "(" << opNames[0] << ", " << getCppName(cst->getType()) << ", \""; @@ -1331,8 +1342,7 @@ void CppWriter::printInstruction(const Instruction *I, } Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[call->getNumArgOperands()] << ", " - << iName << "_params.begin(), " - << iName << "_params.end(), \""; + << iName << "_params, \""; } else if (call->getNumArgOperands() == 1) { Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; @@ -1415,7 +1425,7 @@ void CppWriter::printInstruction(const Instruction *I, Out << "ExtractValueInst* " << getCppName(evi) << " = ExtractValueInst::Create(" << opNames[0] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + << iName << "_indices, \""; printEscapedString(evi->getName()); Out << "\", " << bbname << ");"; break; @@ -1432,7 +1442,7 @@ void CppWriter::printInstruction(const Instruction *I, Out << "InsertValueInst* " << getCppName(ivi) << " = InsertValueInst::Create(" << opNames[0] << ", " << opNames[1] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + << iName << "_indices, \""; printEscapedString(ivi->getName()); Out << "\", " << bbname << ");"; break; @@ -1542,13 +1552,12 @@ void CppWriter::printFunctionUses(const Function* F) { void CppWriter::printFunctionHead(const Function* F) { nl(Out) << "Function* " << getCppName(F); - if (is_inline) { - Out << " = mod->getFunction(\""; - printEscapedString(F->getName()); - Out << "\", " << getCppName(F->getFunctionType()) << ");"; - nl(Out) << "if (!" << getCppName(F) << ") {"; - nl(Out) << getCppName(F); - } + Out << " = mod->getFunction(\""; + printEscapedString(F->getName()); + Out << "\");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + Out<< " = Function::Create("; nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; nl(Out) << "/*Linkage=*/"; @@ -1585,10 +1594,8 @@ void CppWriter::printFunctionHead(const Function* F) { Out << "->setGC(\"" << F->getGC() << "\");"; nl(Out); } - if (is_inline) { - Out << "}"; - nl(Out); - } + Out << "}"; + nl(Out); printAttributes(F->getAttributes(), getCppName(F)); printCppName(F); Out << "->setAttributes(" << getCppName(F) << "_PAL);"; @@ -1873,7 +1880,7 @@ void CppWriter::printVariable(const std::string& fname, void CppWriter::printType(const std::string &fname, const std::string &typeName) { - const Type* Ty = TheModule->getTypeByName(typeName); + Type* Ty = TheModule->getTypeByName(typeName); if (!Ty) { error(std::string("Type '") + typeName + "' not found in input module"); return; diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h index 7322e3e..287e537 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h +++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h @@ -22,8 +22,9 @@ namespace llvm { class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { - CPPTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) + CPPTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp index d0aeb12..a8ac0a2 100644 --- a/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp +++ b/contrib/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -9,7 +9,7 @@ #include "CPPTargetMachine.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheCppBackendTarget; @@ -24,3 +24,5 @@ extern "C" void LLVMInitializeCppBackendTargetInfo() { "C++ backend", &CppBackend_TripleMatchQuality); } + +extern "C" void LLVMInitializeCppBackendTargetMC() {} diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt deleted file mode 100644 index 87e7cb5..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeAsmParser - MBlazeAsmLexer.cpp - MBlazeAsmParser.cpp - ) - diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 1596596..2d357bb 100644 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -7,8 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" -#include "MBlazeTargetMachine.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -17,10 +16,10 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include <string> #include <map> @@ -29,7 +28,7 @@ using namespace llvm; namespace { - class MBlazeBaseAsmLexer : public TargetAsmLexer { + class MBlazeBaseAsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; const AsmToken &lexDefinite() { @@ -42,7 +41,7 @@ namespace { rmap_ty RegisterMap; - void InitRegisterMap(const TargetRegisterInfo *info) { + void InitRegisterMap(const MCRegisterInfo *info) { unsigned numRegs = info->getNumRegs(); for (unsigned i = 0; i < numRegs; ++i) { @@ -76,20 +75,16 @@ namespace { } public: MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : MCTargetAsmLexer(T), AsmInfo(MAI) { } }; class MBlazeAsmLexer : public MBlazeBaseAsmLexer { public: - MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI) + MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI, + const MCAsmInfo &MAI) : MBlazeBaseAsmLexer(T, MAI) { - std::string tripleString("mblaze-unknown-unknown"); - std::string featureString; - std::string CPU; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + InitRegisterMap(&MRI); } }; } @@ -123,6 +118,6 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() { } extern "C" void LLVMInitializeMBlazeAsmLexer() { - RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); + RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); } diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index eebd9d8..97d311c 100644 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -7,19 +7,16 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" -#include "MBlazeSubtarget.h" -#include "MBlazeRegisterInfo.h" -#include "MBlazeISelLowering.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -30,7 +27,7 @@ using namespace llvm; namespace { struct MBlazeOperand; -class MBlazeAsmParser : public TargetAsmParser { +class MBlazeAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; MCAsmParser &getParser() const { return Parser; } @@ -64,7 +61,7 @@ class MBlazeAsmParser : public TargetAsmParser { public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : TargetAsmParser(), Parser(_Parser) {} + : MCTargetAsmParser(), Parser(_Parser) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -286,19 +283,19 @@ void MBlazeOperand::print(raw_ostream &OS) const { break; case Register: OS << "<register R"; - OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">"; + OS << getMBlazeRegisterNumbering(getReg()) << ">"; break; case Token: OS << "'" << getToken() << "'"; break; case Memory: { OS << "<memory R"; - OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase()); + OS << getMBlazeRegisterNumbering(getMemBase()); OS << ", "; unsigned RegOff = getMemOffReg(); if (RegOff) - OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff); + OS << "R" << getMBlazeRegisterNumbering(RegOff); else OS << getMemOff(); OS << ">"; @@ -326,6 +323,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned ErrorInfo; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + default: break; case Match_Success: Out.EmitInstruction(Inst); return false; @@ -521,7 +519,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, return false; } -/// ParseDirective parses the arm specific directives +/// ParseDirective parses the MBlaze specific directives bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") @@ -558,7 +556,7 @@ extern "C" void LLVMInitializeMBlazeAsmLexer(); /// Force static initialization. extern "C" void LLVMInitializeMBlazeAsmParser() { - RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); + RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); LLVMInitializeMBlazeAsmLexer(); } diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile deleted file mode 100644 index 611a0f4..0000000 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmParser - -# Hack: we need to include 'main' MBlaze target directory for private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt deleted file mode 100644 index 9376e68..0000000 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeDisassembler - MBlazeDisassembler.cpp - ) - -# workaround for hanging compilation on MSVC9 and 10 -if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -set_property( - SOURCE MBlazeDisassembler.cpp - PROPERTY COMPILE_FLAGS "/Od" - ) -endif() - -add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 88d80a1..fd761f1 100644 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -20,9 +20,9 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" // #include "MBlazeGenDecoderTables.inc" @@ -60,27 +60,27 @@ static unsigned mblazeBinary2Opcode[] = { }; static unsigned getRD(uint32_t insn) { - if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F)) + if (!isMBlazeRegister((insn>>21)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>21)&0x1F); } static unsigned getRA(uint32_t insn) { - if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F)) + if (!getMBlazeRegisterFromNumbering((insn>>16)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>16)&0x1F); } static unsigned getRB(uint32_t insn) { - if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F)) + if (!getMBlazeRegisterFromNumbering((insn>>11)&0x1F)) return UNSUPPORTED; - return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F); + return getMBlazeRegisterFromNumbering((insn>>11)&0x1F); } static int64_t getRS(uint32_t insn) { - if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF)) + if (!isSpecialMBlazeRegister(insn&0x3FFF)) return UNSUPPORTED; - return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF); + return getSpecialMBlazeRegisterFromNumbering(insn&0x3FFF); } static int64_t getIMM(uint32_t insn) { @@ -493,11 +493,12 @@ EDInstInfo *MBlazeDisassembler::getEDInfo() const { // Public interface for the disassembler // -bool MBlazeDisassembler::getInstruction(MCInst &instr, +MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream) const { + raw_ostream &vStream, + raw_ostream &cStream) const { // The machine instruction. uint32_t insn; uint64_t read; @@ -508,7 +509,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // We want to read exactly 4 bytes of data. if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4) - return false; + return Fail; // Encoded as a big-endian 32-bit word in the stream. insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0); @@ -517,7 +518,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // that it is a valid instruction. unsigned opcode = getOPCODE(insn); if (opcode == UNSUPPORTED) - return false; + return Fail; instr.setOpcode(opcode); @@ -529,11 +530,11 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, uint64_t tsFlags = MBlazeInsts[opcode].TSFlags; switch ((tsFlags & MBlazeII::FormMask)) { default: - return false; + return Fail; case MBlazeII::FRRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RB)); instr.addOperand(MCOperand::CreateReg(RA)); @@ -541,7 +542,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateReg(RB)); @@ -550,23 +551,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRI: switch (opcode) { default: - return false; + return Fail; case MBlaze::MFS: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); break; case MBlaze::MTS: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlaze::MSRSET: case MBlaze::MSRCLR: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x7FFF)); break; @@ -575,7 +576,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRI: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); switch (opcode) { @@ -592,35 +593,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FCRR: if (RA == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCRI: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FRCR: if (RD == UNSUPPORTED || RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRCI: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FCCR: if (RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RB)); break; @@ -630,7 +631,7 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRCI: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getSHT(insn))); @@ -638,35 +639,35 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FRRC: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRCX: if (RD == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; case MBlazeII::FRCS: if (RD == UNSUPPORTED || RS == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateReg(RS)); break; case MBlazeII::FCRCS: if (RS == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RS)); instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FCRCX: if (RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; @@ -677,13 +678,13 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, case MBlazeII::FCR: if (RB == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRIR: if (RD == UNSUPPORTED || RA == UNSUPPORTED) - return false; + return Fail; instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); instr.addOperand(MCOperand::CreateReg(RA)); @@ -693,11 +694,12 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, // We always consume 4 bytes of data on success size = 4; - return true; + return Success; } -static MCDisassembler *createMBlazeDisassembler(const Target &T) { - return new MBlazeDisassembler; +static MCDisassembler *createMBlazeDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new MBlazeDisassembler(STI); } extern "C" void LLVMInitializeMBlazeDisassembler() { diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h index d05eced..0ac0d89 100644 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h +++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h @@ -32,19 +32,20 @@ class MBlazeDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - MBlazeDisassembler() : - MCDisassembler() { + MBlazeDisassembler(const MCSubtargetInfo &STI) : + MCDisassembler(STI) { } ~MBlazeDisassembler() { } /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, + MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream) const; + raw_ostream &vStream, + raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. EDInstInfo *getEDInfo() const; diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile deleted file mode 100644 index 0530b32..0000000 --- a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeDisassembler - -# Hack: we need to include 'main' MBlaze target directory to grab headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt deleted file mode 100644 index 242a573..0000000 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMBlazeAsmPrinter - MBlazeInstPrinter.cpp - ) - -add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp index a7fd287..a1f1dbc 100644 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp @@ -25,8 +25,10 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #include "MBlazeGenAsmWriter.inc" -void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index eacca41..570ab08 100644 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -24,7 +24,7 @@ namespace llvm { MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile deleted file mode 100644 index 9fb6e86..0000000 --- a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmPrinter - -# Hack: we need to include 'main' MBlaze target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h index 3390794..1399b85 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlaze.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h @@ -15,6 +15,7 @@ #ifndef TARGET_MBLAZE_H #define TARGET_MBLAZE_H +#include "MCTargetDesc/MBlazeBaseInfo.h" #include "MCTargetDesc/MBlazeMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" @@ -22,17 +23,6 @@ namespace llvm { class MBlazeTargetMachine; class FunctionPass; class MachineCodeEmitter; - class MCCodeEmitter; - class MCInstrInfo; - class MCSubtargetInfo; - class TargetAsmBackend; - class formatted_raw_ostream; - - MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - - TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &); FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0016df5..97bd083 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -38,10 +38,10 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <cctype> @@ -136,19 +136,17 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() { const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + unsigned RegNum = getMBlazeRegisterNumbering(Reg); if (MBlaze::GPRRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); } // Return Address and Frame registers must also be set in CPUBitmask. if (TFI->hasFP(*MF)) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getFrameRegister(*MF))); + CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getFrameRegister(*MF))); if (MFI->adjustsStack()) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getRARegister())); + CPUBitmask |= (1 << getMBlazeRegisterNumbering(RI.getRARegister())); // Print CPUBitmask OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask)); @@ -318,18 +316,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return I == Pred->end() || !I->getDesc().isBarrier(); } -static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeMBlazeAsmPrinter() { RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); - TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget, - createMBlazeMCInstPrinter); - } diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp index e763902..f28d5a7 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// +//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp index 62dfdcc..8ec548f 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -59,6 +59,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // MBlaze does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass); @@ -187,7 +188,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) computeRegisterProperties(); } -MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const { +EVT MBlazeTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -964,13 +965,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // The last register argument that must be saved is MBlaze::R10 TargetRegisterClass *RC = MBlaze::GPRRegisterClass; - unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5); - unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1); - unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10); + unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5); + unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1); + unsigned End = getMBlazeRegisterNumbering(MBlaze::R10); unsigned StackLoc = Start - Begin + 1; for (; Start <= End; ++Start, ++StackLoc) { - unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); + unsigned Reg = getMBlazeRegisterFromNumbering(Start); unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); @@ -1096,7 +1097,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h index bb128da..8b49bc3 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h @@ -102,7 +102,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - get the ISD::SETCC result ValueType - MVT::SimpleValueType getSetCCResultType(EVT VT) const; + EVT getSetCCResultType(EVT VT) const; private: // Subtarget Info diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 188f10a..7ae05b3 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR @@ -239,7 +239,8 @@ unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { +bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> + &Cond) const { assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!"); switch (Cond[0].getImm()) { default: return true; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h index 79f962b..7174405 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -166,62 +166,6 @@ namespace MBlaze { } } -/// MBlazeII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace MBlazeII { - enum { - // PseudoFrm - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - FPseudo = 0, - FRRR, - FRRI, - FCRR, - FCRI, - FRCR, - FRCI, - FCCR, - FCCI, - FRRCI, - FRRC, - FRCX, - FRCS, - FCRCS, - FCRCX, - FCX, - FCR, - FRIR, - FRRRR, - FRI, - FC, - FormMask = 63 - - //===------------------------------------------------------------------===// - // MBlaze Specific MachineOperand flags. - // MO_NO_FLAG, - - /// MO_GOT - Represents the offset into the global offset table at which - /// the address the relocation entry symbol resides during execution. - // MO_GOT, - - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides - /// during execution. This is different from the above since this flag - /// can only be present in call instructions. - // MO_GOT_CALL, - - /// MO_GPREL - Represents the offset from the current gp value to be used - /// for the relocatable object file being produced. - // MO_GPREL, - - /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol - /// address. - // MO_ABS_HILO - - }; -} - class MBlazeInstrInfo : public MBlazeGenInstrInfo { MBlazeTargetMachine &TM; const MBlazeRegisterInfo RI; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td index 950f2d7..1d8c987 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -442,17 +442,19 @@ let Predicates=[HasMul] in { //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, isReMaterializable = 1 in { - def LBU : LoadM<0x30, 0x000, "lbu ">; - def LBUR : LoadM<0x30, 0x200, "lbur ">; + let neverHasSideEffects = 1 in { + def LBU : LoadM<0x30, 0x000, "lbu ">; + def LBUR : LoadM<0x30, 0x200, "lbur ">; - def LHU : LoadM<0x31, 0x000, "lhu ">; - def LHUR : LoadM<0x31, 0x200, "lhur ">; + def LHU : LoadM<0x31, 0x000, "lhu ">; + def LHUR : LoadM<0x31, 0x200, "lhur ">; - def LW : LoadM<0x32, 0x000, "lw ">; - def LWR : LoadM<0x32, 0x200, "lwr ">; + def LW : LoadM<0x32, 0x000, "lw ">; + def LWR : LoadM<0x32, 0x200, "lwr ">; - let Defs = [CARRY] in { - def LWX : LoadM<0x32, 0x400, "lwx ">; + let Defs = [CARRY] in { + def LWX : LoadM<0x32, 0x400, "lwx ">; + } } def LBUI : LoadMI<0x38, "lbui ", zextloadi8>; @@ -877,6 +879,9 @@ def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>; // Peepholes def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>; +// Atomic fence +def : Pat<(atomic_fence (imm), (imm)), (MEMBARRIER)>; + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 32d67b2..ea81dd6 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -37,7 +37,7 @@ namespace mblazeIntrinsic { #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN } -std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, +std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, Type **Tys, unsigned numTys) const { static const char *const names[] = { #define GET_INTRINSIC_NAME_TABLE @@ -90,8 +90,8 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES -static const FunctionType *getType(LLVMContext &Context, unsigned id) { - const Type *ResultTy = NULL; +static FunctionType *getType(LLVMContext &Context, unsigned id) { + Type *ResultTy = NULL; std::vector<Type*> ArgTys; bool IsVarArg = false; @@ -103,7 +103,7 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) { } Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - const Type **Tys, + Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h index 9804c77..80760d8 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h @@ -19,12 +19,12 @@ namespace llvm { class MBlazeIntrinsicInfo : public TargetIntrinsicInfo { public: - std::string getName(unsigned IntrID, const Type **Tys = 0, + std::string getName(unsigned IntrID, Type **Tys = 0, unsigned numTys = 0) const; unsigned lookupName(const char *Name, unsigned Len) const; unsigned lookupGCCName(const char *Name) const; bool isOverloaded(unsigned IID) const; - Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0, unsigned numTys = 0) const; }; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index f0b201a..9788ba9 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -44,164 +43,7 @@ using namespace llvm; MBlazeRegisterInfo:: MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) - : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {} - -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// MBlaze::R0, return the number that it corresponds to (e.g. 0). -unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) { - switch (RegEnum) { - case MBlaze::R0 : return 0; - case MBlaze::R1 : return 1; - case MBlaze::R2 : return 2; - case MBlaze::R3 : return 3; - case MBlaze::R4 : return 4; - case MBlaze::R5 : return 5; - case MBlaze::R6 : return 6; - case MBlaze::R7 : return 7; - case MBlaze::R8 : return 8; - case MBlaze::R9 : return 9; - case MBlaze::R10 : return 10; - case MBlaze::R11 : return 11; - case MBlaze::R12 : return 12; - case MBlaze::R13 : return 13; - case MBlaze::R14 : return 14; - case MBlaze::R15 : return 15; - case MBlaze::R16 : return 16; - case MBlaze::R17 : return 17; - case MBlaze::R18 : return 18; - case MBlaze::R19 : return 19; - case MBlaze::R20 : return 20; - case MBlaze::R21 : return 21; - case MBlaze::R22 : return 22; - case MBlaze::R23 : return 23; - case MBlaze::R24 : return 24; - case MBlaze::R25 : return 25; - case MBlaze::R26 : return 26; - case MBlaze::R27 : return 27; - case MBlaze::R28 : return 28; - case MBlaze::R29 : return 29; - case MBlaze::R30 : return 30; - case MBlaze::R31 : return 31; - case MBlaze::RPC : return 0x0000; - case MBlaze::RMSR : return 0x0001; - case MBlaze::REAR : return 0x0003; - case MBlaze::RESR : return 0x0005; - case MBlaze::RFSR : return 0x0007; - case MBlaze::RBTR : return 0x000B; - case MBlaze::REDR : return 0x000D; - case MBlaze::RPID : return 0x1000; - case MBlaze::RZPR : return 0x1001; - case MBlaze::RTLBX : return 0x1002; - case MBlaze::RTLBLO : return 0x1003; - case MBlaze::RTLBHI : return 0x1004; - case MBlaze::RPVR0 : return 0x2000; - case MBlaze::RPVR1 : return 0x2001; - case MBlaze::RPVR2 : return 0x2002; - case MBlaze::RPVR3 : return 0x2003; - case MBlaze::RPVR4 : return 0x2004; - case MBlaze::RPVR5 : return 0x2005; - case MBlaze::RPVR6 : return 0x2006; - case MBlaze::RPVR7 : return 0x2007; - case MBlaze::RPVR8 : return 0x2008; - case MBlaze::RPVR9 : return 0x2009; - case MBlaze::RPVR10 : return 0x200A; - case MBlaze::RPVR11 : return 0x200B; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -/// getRegisterFromNumbering - Given the enum value for some register, e.g. -/// MBlaze::R0, return the number that it corresponds to (e.g. 0). -unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) { - switch (Reg) { - case 0 : return MBlaze::R0; - case 1 : return MBlaze::R1; - case 2 : return MBlaze::R2; - case 3 : return MBlaze::R3; - case 4 : return MBlaze::R4; - case 5 : return MBlaze::R5; - case 6 : return MBlaze::R6; - case 7 : return MBlaze::R7; - case 8 : return MBlaze::R8; - case 9 : return MBlaze::R9; - case 10 : return MBlaze::R10; - case 11 : return MBlaze::R11; - case 12 : return MBlaze::R12; - case 13 : return MBlaze::R13; - case 14 : return MBlaze::R14; - case 15 : return MBlaze::R15; - case 16 : return MBlaze::R16; - case 17 : return MBlaze::R17; - case 18 : return MBlaze::R18; - case 19 : return MBlaze::R19; - case 20 : return MBlaze::R20; - case 21 : return MBlaze::R21; - case 22 : return MBlaze::R22; - case 23 : return MBlaze::R23; - case 24 : return MBlaze::R24; - case 25 : return MBlaze::R25; - case 26 : return MBlaze::R26; - case 27 : return MBlaze::R27; - case 28 : return MBlaze::R28; - case 29 : return MBlaze::R29; - case 30 : return MBlaze::R30; - case 31 : return MBlaze::R31; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) { - switch (Reg) { - case 0x0000 : return MBlaze::RPC; - case 0x0001 : return MBlaze::RMSR; - case 0x0003 : return MBlaze::REAR; - case 0x0005 : return MBlaze::RESR; - case 0x0007 : return MBlaze::RFSR; - case 0x000B : return MBlaze::RBTR; - case 0x000D : return MBlaze::REDR; - case 0x1000 : return MBlaze::RPID; - case 0x1001 : return MBlaze::RZPR; - case 0x1002 : return MBlaze::RTLBX; - case 0x1003 : return MBlaze::RTLBLO; - case 0x1004 : return MBlaze::RTLBHI; - case 0x2000 : return MBlaze::RPVR0; - case 0x2001 : return MBlaze::RPVR1; - case 0x2002 : return MBlaze::RPVR2; - case 0x2003 : return MBlaze::RPVR3; - case 0x2004 : return MBlaze::RPVR4; - case 0x2005 : return MBlaze::RPVR5; - case 0x2006 : return MBlaze::RPVR6; - case 0x2007 : return MBlaze::RPVR7; - case 0x2008 : return MBlaze::RPVR8; - case 0x2009 : return MBlaze::RPVR9; - case 0x200A : return MBlaze::RPVR10; - case 0x200B : return MBlaze::RPVR11; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - -bool MBlazeRegisterInfo::isRegister(unsigned Reg) { - return Reg <= 31; -} - -bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) { - switch (Reg) { - case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : - case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : - case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : - case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : - case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : - case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : - return true; - - default: - return false; - } - return false; // Not reached -} + : MBlazeGenRegisterInfo(MBlaze::R15), Subtarget(ST), TII(tii) {} unsigned MBlazeRegisterInfo::getPICCallReg() { return MBlaze::R20; @@ -334,10 +176,6 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF) const { MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset()); } -unsigned MBlazeRegisterInfo::getRARegister() const { - return MBlaze::R15; -} - unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -353,11 +191,3 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { - return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0); -} - -int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h index 7ebce21..7e4b269 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -42,14 +42,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget, const TargetInstrInfo &tii); - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// MBlaze::RA, return the number that it corresponds to (e.g. 31). - static unsigned getRegisterNumbering(unsigned RegEnum); - static unsigned getRegisterFromNumbering(unsigned RegEnum); - static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum); - static bool isRegister(unsigned RegEnum); - static bool isSpecialRegister(unsigned RegEnum); - /// Get PIC indirect call register static unsigned getPICCallReg(); @@ -69,15 +61,11 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; /// Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp index eda141d..7e5667f 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -15,7 +15,7 @@ #include "MBlaze.h" #include "MBlazeRegisterInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 7208874..7bff53e 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -16,48 +16,13 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) { - llvm_unreachable("MBlaze does not support Darwin MACH-O format"); - return NULL; - } - - if (TheTriple.isOSWindows()) { - llvm_unreachable("MBlaze does not support Windows COFF format"); - return NULL; - } - - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); -} - - extern "C" void LLVMInitializeMBlazeTarget() { // Register the target. RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); - - // Register the MC code emitter - TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget, - llvm::createMBlazeMCCodeEmitter); - - // Register the asm backend - TargetRegistry::RegisterAsmBackend(TheMBlazeTarget, - createMBlazeAsmBackend); - - // Register the object streamer - TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget, - createMCStreamer); - } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -67,21 +32,16 @@ extern "C" void LLVMInitializeMBlazeTarget() { // offset from the stack/frame pointer, using StackGrowsUp enables // an easier handling. MBlazeTargetMachine:: -MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS): - LLVMTargetMachine(T, TT, CPU, FS), +MBlazeTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - if (getRelocationModel() == Reloc::Default) { - setRelocationModel(Reloc::Static); - } - - if (getCodeModel() == CodeModel::Default) - setCodeModel(CodeModel::Small); } // Install an instruction selector pass using diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h index cd6caaf..c1bc08a 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -41,8 +41,9 @@ namespace llvm { InstrItineraryData InstrItins; public: - MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MBlazeTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index abd1b0b..f66ea30 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -69,7 +69,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isMergeable1ByteCString()) return false; - const Type *Ty = GV->getType()->getElementType(); + Type *Ty = GV->getType()->getElementType(); return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); } diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 3d15708..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMBlazeDesc - MBlazeMCTargetDesc.cpp - MBlazeMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index 08f14c3..08f7d46a 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -7,10 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "MBlaze.h" -#include "MBlazeELFWriterInfo.h" -#include "llvm/ADT/Twine.h" +#include "MCTargetDesc/MBlazeMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -20,11 +18,11 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; static unsigned getFixupKindSize(unsigned Kind) { @@ -48,10 +46,10 @@ public: /*HasRelocationAddend*/ true) {} }; -class MBlazeAsmBackend : public TargetAsmBackend { +class MBlazeAsmBackend : public MCAsmBackend { public: MBlazeAsmBackend(const Target &T) - : TargetAsmBackend() { + : MCAsmBackend() { } unsigned getNumFixupKinds() const { @@ -148,8 +146,7 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, } } // end anonymous namespace -TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h new file mode 100644 index 0000000..776dbc4 --- /dev/null +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h @@ -0,0 +1,240 @@ +//===-- MBlazeBaseInfo.h - Top level definitions for MBlaze -- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the MBlaze target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef MBlazeBASEINFO_H +#define MBlazeBASEINFO_H + +#include "MBlazeMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// MBlazeII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MBlazeII { + enum { + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + FPseudo = 0, + FRRR, + FRRI, + FCRR, + FCRI, + FRCR, + FRCI, + FCCR, + FCCI, + FRRCI, + FRRC, + FRCX, + FRCS, + FCRCS, + FCRCX, + FCX, + FCR, + FRIR, + FRRRR, + FRI, + FC, + FormMask = 63 + + //===------------------------------------------------------------------===// + // MBlaze Specific MachineOperand flags. + // MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + // MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + // MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + // MO_GPREL, + + /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol + /// address. + // MO_ABS_HILO + + }; +} + +static inline bool isMBlazeRegister(unsigned Reg) { + return Reg <= 31; +} + +static inline bool isSpecialMBlazeRegister(unsigned Reg) { + switch (Reg) { + case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : + case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : + case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : + case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : + case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : + case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : + return true; + + default: + return false; + } + return false; // Not reached +} + +/// getMBlazeRegisterNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) { + switch (RegEnum) { + case MBlaze::R0 : return 0; + case MBlaze::R1 : return 1; + case MBlaze::R2 : return 2; + case MBlaze::R3 : return 3; + case MBlaze::R4 : return 4; + case MBlaze::R5 : return 5; + case MBlaze::R6 : return 6; + case MBlaze::R7 : return 7; + case MBlaze::R8 : return 8; + case MBlaze::R9 : return 9; + case MBlaze::R10 : return 10; + case MBlaze::R11 : return 11; + case MBlaze::R12 : return 12; + case MBlaze::R13 : return 13; + case MBlaze::R14 : return 14; + case MBlaze::R15 : return 15; + case MBlaze::R16 : return 16; + case MBlaze::R17 : return 17; + case MBlaze::R18 : return 18; + case MBlaze::R19 : return 19; + case MBlaze::R20 : return 20; + case MBlaze::R21 : return 21; + case MBlaze::R22 : return 22; + case MBlaze::R23 : return 23; + case MBlaze::R24 : return 24; + case MBlaze::R25 : return 25; + case MBlaze::R26 : return 26; + case MBlaze::R27 : return 27; + case MBlaze::R28 : return 28; + case MBlaze::R29 : return 29; + case MBlaze::R30 : return 30; + case MBlaze::R31 : return 31; + case MBlaze::RPC : return 0x0000; + case MBlaze::RMSR : return 0x0001; + case MBlaze::REAR : return 0x0003; + case MBlaze::RESR : return 0x0005; + case MBlaze::RFSR : return 0x0007; + case MBlaze::RBTR : return 0x000B; + case MBlaze::REDR : return 0x000D; + case MBlaze::RPID : return 0x1000; + case MBlaze::RZPR : return 0x1001; + case MBlaze::RTLBX : return 0x1002; + case MBlaze::RTLBLO : return 0x1003; + case MBlaze::RTLBHI : return 0x1004; + case MBlaze::RPVR0 : return 0x2000; + case MBlaze::RPVR1 : return 0x2001; + case MBlaze::RPVR2 : return 0x2002; + case MBlaze::RPVR3 : return 0x2003; + case MBlaze::RPVR4 : return 0x2004; + case MBlaze::RPVR5 : return 0x2005; + case MBlaze::RPVR6 : return 0x2006; + case MBlaze::RPVR7 : return 0x2007; + case MBlaze::RPVR8 : return 0x2008; + case MBlaze::RPVR9 : return 0x2009; + case MBlaze::RPVR10 : return 0x200A; + case MBlaze::RPVR11 : return 0x200B; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +/// getRegisterFromNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0 : return MBlaze::R0; + case 1 : return MBlaze::R1; + case 2 : return MBlaze::R2; + case 3 : return MBlaze::R3; + case 4 : return MBlaze::R4; + case 5 : return MBlaze::R5; + case 6 : return MBlaze::R6; + case 7 : return MBlaze::R7; + case 8 : return MBlaze::R8; + case 9 : return MBlaze::R9; + case 10 : return MBlaze::R10; + case 11 : return MBlaze::R11; + case 12 : return MBlaze::R12; + case 13 : return MBlaze::R13; + case 14 : return MBlaze::R14; + case 15 : return MBlaze::R15; + case 16 : return MBlaze::R16; + case 17 : return MBlaze::R17; + case 18 : return MBlaze::R18; + case 19 : return MBlaze::R19; + case 20 : return MBlaze::R20; + case 21 : return MBlaze::R21; + case 22 : return MBlaze::R22; + case 23 : return MBlaze::R23; + case 24 : return MBlaze::R24; + case 25 : return MBlaze::R25; + case 26 : return MBlaze::R26; + case 27 : return MBlaze::R27; + case 28 : return MBlaze::R28; + case 29 : return MBlaze::R29; + case 30 : return MBlaze::R30; + case 31 : return MBlaze::R31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0x0000 : return MBlaze::RPC; + case 0x0001 : return MBlaze::RMSR; + case 0x0003 : return MBlaze::REAR; + case 0x0005 : return MBlaze::RESR; + case 0x0007 : return MBlaze::RFSR; + case 0x000B : return MBlaze::RBTR; + case 0x000D : return MBlaze::REDR; + case 0x1000 : return MBlaze::RPID; + case 0x1001 : return MBlaze::RZPR; + case 0x1002 : return MBlaze::RTLBX; + case 0x1003 : return MBlaze::RTLBLO; + case 0x1004 : return MBlaze::RTLBHI; + case 0x2000 : return MBlaze::RPVR0; + case 0x2001 : return MBlaze::RPVR1; + case 0x2002 : return MBlaze::RPVR2; + case 0x2003 : return MBlaze::RPVR3; + case 0x2004 : return MBlaze::RPVR4; + case 0x2005 : return MBlaze::RPVR5; + case 0x2006 : return MBlaze::RPVR6; + case 0x2007 : return MBlaze::RPVR7; + case 0x2008 : return MBlaze::RPVR8; + case 0x2009 : return MBlaze::RPVR9; + case 0x200A : return MBlaze::RPVR10; + case 0x200B : return MBlaze::RPVR11; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp index ddc636d..1514557 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "MBlaze.h" -#include "MBlazeInstrInfo.h" +#include "MCTargetDesc/MBlazeBaseInfo.h" +#include "MCTargetDesc/MBlazeMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCFixup.h" #include "llvm/ADT/Statistic.h" @@ -106,7 +108,7 @@ MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO) const { if (MO.isReg()) - return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg()); + return getMBlazeRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isExpr()) diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 20d6c0b..43ae281 100644 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "MBlazeMCTargetDesc.h" #include "MBlazeMCAsmInfo.h" +#include "InstPrinter/MBlazeInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MBlazeGenInstrInfo.inc" @@ -36,8 +40,10 @@ static MCInstrInfo *createMBlazeMCInstrInfo() { return X; } -extern "C" void LLVMInitializeMBlazeMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo); +static MCRegisterInfo *createMBlazeMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMBlazeMCRegisterInfo(X, MBlaze::R15); + return X; } static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,11 +53,6 @@ static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget, - createMBlazeMCSubtargetInfo); -} - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -60,6 +61,80 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } -extern "C" void LLVMInitializeMBlazeMCAsmInfo() { +static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::Static; + if (CM == CodeModel::Default) + CM = CodeModel::Small; + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) { + llvm_unreachable("MBlaze does not support Darwin MACH-O format"); + return NULL; + } + + if (TheTriple.isOSWindows()) { + llvm_unreachable("MBlaze does not support Windows COFF format"); + return NULL; + } + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new MBlazeInstPrinter(MAI); + return 0; +} + +// Force static initialization. +extern "C" void LLVMInitializeMBlazeTargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMBlazeTarget, + createMBlazeMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMBlazeTarget, + createMBlazeMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget, + createMBlazeMCSubtargetInfo); + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheMBlazeTarget, + llvm::createMBlazeMCCodeEmitter); + + // Register the asm backend + TargetRegistry::RegisterMCAsmBackend(TheMBlazeTarget, + createMBlazeAsmBackend); + + // Register the object streamer + TargetRegistry::RegisterMCObjectStreamer(TheMBlazeTarget, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget, + createMBlazeMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index b14772e..deff5cb 100644 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -15,12 +15,23 @@ #define MBLAZEMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCContext; +class MCCodeEmitter; +class MCInstrInfo; class MCSubtargetInfo; class Target; class StringRef; +class formatted_raw_ostream; extern Target TheMBlazeTarget; +MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for MBlaze registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile deleted file mode 100644 index 71075ff..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp index 16e01db..71210d8 100644 --- a/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp @@ -9,7 +9,7 @@ #include "MBlaze.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMBlazeTarget; diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt deleted file mode 100644 index f5458d5..0000000 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSP430AsmPrinter - MSP430InstPrinter.cpp - ) -add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index e10d4fe..5d6c6ad 100644 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -25,8 +25,10 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #include "MSP430GenAsmWriter.inc" -void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 50d98b7..a1984a8 100644 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -22,9 +22,9 @@ namespace llvm { class MSP430InstPrinter : public MCInstPrinter { public: MSP430InstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + : MCInstPrinter(MAI) {} - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile deleted file mode 100644 index a5293ab..0000000 --- a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430AsmPrinter - -# Hack: we need to include 'main' MSP430 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 0f3ebd3..0000000 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMSP430Desc - MSP430MCTargetDesc.cpp - MSP430MCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 43a704d..fda70b8 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "MSP430MCTargetDesc.h" #include "MSP430MCAsmInfo.h" +#include "InstPrinter/MSP430InstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MSP430GenInstrInfo.inc" @@ -29,18 +31,18 @@ using namespace llvm; - static MCInstrInfo *createMSP430MCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitMSP430MCInstrInfo(X); return X; } -extern "C" void LLVMInitializeMSP430MCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo); +static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMSP430MCRegisterInfo(X, MSP430::PCW); + return X; } - static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -48,11 +50,42 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMSP430MCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target, - createMSP430MCSubtargetInfo); +static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new MSP430InstPrinter(MAI); + return 0; } -extern "C" void LLVMInitializeMSP430MCAsmInfo() { +extern "C" void LLVMInitializeMSP430TargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMSP430Target, + createMSP430MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMSP430Target, + createMSP430MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target, + createMSP430MCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, + createMSP430MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 0d8a6bd..35f2590 100644 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHAMCTARGETDESC_H -#define ALPHAMCTARGETDESC_H +#ifndef MSP430MCTARGETDESC_H +#define MSP430MCTARGETDESC_H namespace llvm { class MCSubtargetInfo; diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile deleted file mode 100644 index bb85799..0000000 --- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index 2042056..8836549 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -32,9 +32,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -163,17 +161,7 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); } -static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); - TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, - createMSP430MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index 0a3eab1..dc37431 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -79,6 +79,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setSchedulingPreference(Sched::Latency); // We have post-incremented loads / stores. @@ -987,8 +988,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { } } -bool MSP430TargetLowering::isTruncateFree(const Type *Ty1, - const Type *Ty2) const { +bool MSP430TargetLowering::isTruncateFree(Type *Ty1, + Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; @@ -1002,7 +1003,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return (VT1.getSizeInBits() > VT2.getSizeInBits()); } -bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { +bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // MSP430 implicitly zero-extends 8-bit results in 16-bit registers. return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16); } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h index bd660a0..237f604 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -102,7 +102,7 @@ namespace llvm { /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. - virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a value @@ -113,7 +113,7 @@ namespace llvm { /// necessarily apply to truncate instructions. e.g. on msp430, all /// instructions that define 8-bit values implicit zero-extend the result /// out to 16 bits. - virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp index 846d093..ffd4318 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "MSP430GenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp index 1cc60bb..9049c4b 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -34,7 +34,7 @@ using namespace llvm; // FIXME: Provide proper call frame setup / destroy opcodes. MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii) - : MSP430GenRegisterInfo(), TM(tm), TII(tii) { + : MSP430GenRegisterInfo(MSP430::PCW), TM(tm), TII(tii) { StackAlign = TM.getFrameLowering()->getStackAlignment(); } @@ -233,22 +233,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } -unsigned MSP430RegisterInfo::getRARegister() const { - return MSP430::PCW; -} - unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW; } - -int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("Not implemented yet!"); - return 0; -} - -int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("Not implemented yet!"); - return 0; -} diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h index fb70594..10a3d53 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h @@ -58,12 +58,7 @@ public: void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp index b58c50a..3ee14d9 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp @@ -13,7 +13,7 @@ #include "MSP430Subtarget.h" #include "MSP430.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 971f512..4dd8933 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -16,7 +16,7 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeMSP430Target() { @@ -25,10 +25,11 @@ extern "C" void LLVMInitializeMSP430Target() { } MSP430TargetMachine::MSP430TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, + StringRef CPU, + StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h index 2a9eea0..eb483dc 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -38,8 +38,9 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430FrameLowering FrameLowering; public: - MSP430TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MSP430TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp index f9ca5c4..8b3e01e 100644 --- a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp @@ -9,7 +9,7 @@ #include "MSP430.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMSP430Target; diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp index 46c687b..53ad155f 100644 --- a/contrib/llvm/lib/Target/Mangler.cpp +++ b/contrib/llvm/lib/Target/Mangler.cpp @@ -159,7 +159,7 @@ static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName, unsigned ArgWords = 0; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) { - const Type *Ty = AI->getType(); + Type *Ty = AI->getType(); // 'Dereference' type in case of byval parameter attribute if (AI->hasByValAttr()) Ty = cast<PointerType>(Ty)->getElementType(); @@ -214,7 +214,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, // fastcall and stdcall functions usually need @42 at the end to specify // the argument info. - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) && // "Pure" variadic functions do not receive @0 suffix. (!FT->isVarArg() || FT->getNumParams() == 0 || diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt deleted file mode 100644 index 8852fd4..0000000 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMipsAsmPrinter - MipsInstPrinter.cpp - ) -add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile b/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile deleted file mode 100644 index 63e38ef..0000000 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 41c1dd3..3dafc61 100644 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -1,4 +1,4 @@ -//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===// +//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===// // // The LLVM Compiler Infrastructure // @@ -69,8 +69,10 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << '$' << LowercaseString(getRegisterName(RegNo)); } -void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { printInstruction(MI, O); + printAnnotation(O, Annot); } void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 680208e..5c11165 100644 --- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===// +//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===// // // The LLVM Compiler Infrastructure // @@ -86,7 +86,7 @@ public: virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 97de75d..0000000 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMMipsDesc - MipsMCTargetDesc.cpp - MipsMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile deleted file mode 100644 index 7fe2086..0000000 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp new file mode 100644 index 0000000..f190ec4 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -0,0 +1,117 @@ +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class MipsELFObjectWriter : public MCELFObjectTargetWriter { +public: + MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, + HasRelocationAddend) {} +}; + +class MipsAsmBackend : public MCAsmBackend { +public: + MipsAsmBackend(const Target &T) + : MCAsmBackend() {} + + unsigned getNumFixupKinds() const { + return 1; //tbd + } + + /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided + /// data fragment, at the offset specified by the fixup and following the + /// fixup kind as appropriate. + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + } + + /// @name Target Relaxation Interfaces + /// @{ + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool MayNeedRelaxation(const MCInst &Inst) const { + return false; + } + + /// RelaxInstruction - Relax the instruction in the given fragment to the next + /// wider instruction. + /// + /// \param Inst - The instruction to relax, which may be the same as the + /// output. + /// \parm Res [output] - On return, the relaxed instruction. + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + } + + /// @} + + /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given + /// output. If the target cannot generate such a sequence, it should return an + /// error. + /// + /// \return - True on success. + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + return false; + } +}; + +class MipsEB_AsmBackend : public MipsAsmBackend { +public: + Triple::OSType OSType; + + MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType) + : MipsAsmBackend(T), OSType(_OSType) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(createELFObjectTargetWriter(), + OS, /*IsLittleEndian*/ false); + } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); + } +}; + +class MipsEL_AsmBackend : public MipsAsmBackend { +public: + Triple::OSType OSType; + + MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType) + : MipsAsmBackend(T), OSType(_OSType) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(createELFObjectTargetWriter(), + OS, /*IsLittleEndian*/ true); + } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); + } +}; +} + +MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + // just return little endian for now + // + return new MipsEL_AsmBackend(T, Triple(TT).getOS()); +} diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h new file mode 100644 index 0000000..f7a6fa9 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -0,0 +1,113 @@ +//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Mips target useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef MIPSBASEINFO_H +#define MIPSBASEINFO_H + +#include "MipsMCTargetDesc.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +/// getMipsRegisterNumbering - Given the enum value for some register, +/// return the number that it corresponds to. +inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) +{ + switch (RegEnum) { + case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: + case Mips::D0: + return 0; + case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + return 1; + case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: + case Mips::D1: + return 2; + case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + return 3; + case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: + case Mips::D2: + return 4; + case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64: + return 5; + case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64: + case Mips::D3: + return 6; + case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64: + return 7; + case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64: + case Mips::D4: + return 8; + case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64: + return 9; + case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64: + case Mips::D5: + return 10; + case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64: + return 11; + case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64: + case Mips::D6: + return 12; + case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64: + return 13; + case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64: + case Mips::D7: + return 14; + case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64: + return 15; + case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64: + case Mips::D8: + return 16; + case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64: + return 17; + case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64: + case Mips::D9: + return 18; + case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64: + return 19; + case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64: + case Mips::D10: + return 20; + case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64: + return 21; + case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64: + case Mips::D11: + return 22; + case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64: + return 23; + case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64: + case Mips::D12: + return 24; + case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64: + return 25; + case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64: + case Mips::D13: + return 26; + case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64: + return 27; + case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64: + case Mips::D14: + return 28; + case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + return 29; + case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: + case Mips::D15: + return 30; + case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: + return 31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h new file mode 100644 index 0000000..8b099ea --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -0,0 +1,90 @@ +#ifndef LLVM_Mips_MipsFIXUPKINDS_H +#define LLVM_Mips_MipsFIXUPKINDS_H + +//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Mips { + enum Fixups { + // fixup_Mips_xxx - R_MIPS_NONE + fixup_Mips_NONE = FirstTargetFixupKind, + + // fixup_Mips_xxx - R_MIPS_16. + fixup_Mips_16, + + // fixup_Mips_xxx - R_MIPS_32. + fixup_Mips_32, + + // fixup_Mips_xxx - R_MIPS_REL32. + fixup_Mips_REL32, + + // fixup_Mips_xxx - R_MIPS_26. + fixup_Mips_26, + + // fixup_Mips_xxx - R_MIPS_HI16. + fixup_Mips_HI16, + + // fixup_Mips_xxx - R_MIPS_LO16. + fixup_Mips_LO16, + + // fixup_Mips_xxx - R_MIPS_GPREL16. + fixup_Mips_GPREL16, + + // fixup_Mips_xxx - R_MIPS_LITERAL. + fixup_Mips_LITERAL, + + // fixup_Mips_xxx - R_MIPS_GOT16. + fixup_Mips_GOT16, + + // fixup_Mips_xxx - R_MIPS_PC16. + fixup_Mips_PC16, + + // fixup_Mips_xxx - R_MIPS_CALL16. + fixup_Mips_CALL16, + + // fixup_Mips_xxx - R_MIPS_GPREL32. + fixup_Mips_GPREL32, + + // fixup_Mips_xxx - R_MIPS_SHIFT5. + fixup_Mips_SHIFT5, + + // fixup_Mips_xxx - R_MIPS_SHIFT6. + fixup_Mips_SHIFT6, + + // fixup_Mips_xxx - R_MIPS_64. + fixup_Mips_64, + + // fixup_Mips_xxx - R_MIPS_TLS_GD. + fixup_Mips_TLSGD, + + // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL. + fixup_Mips_GOTTPREL, + + // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16. + fixup_Mips_TPREL_HI, + + // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16. + fixup_Mips_TPREL_LO, + + // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16 + fixup_Mips_Branch_PCRel, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; +} // namespace llvm +} // namespace Mips + + +#endif /* LLVM_Mips_MipsFIXUPKINDS_H */ diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 5d92425..71ae804 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -18,7 +18,8 @@ using namespace llvm; MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::mips) + if ((TheTriple.getArch() == Triple::mips) || + (TheTriple.getArch() == Triple::mips64)) IsLittleEndian = false; AlignmentIsInBytes = false; diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp new file mode 100644 index 0000000..d66de23 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -0,0 +1,52 @@ +//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// +// +#define DEBUG_TYPE "mccodeemitter" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" + +using namespace llvm; + +namespace { +class MipsMCCodeEmitter : public MCCodeEmitter { + MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; + +public: + MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti) {} + + ~MipsMCCodeEmitter() {} + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + } +}; // class MipsMCCodeEmitter +} // namespace + +MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new MipsMCCodeEmitter(MCII, STI, Ctx); +} diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 06f0d0b..1f9e3dd 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "MipsMCTargetDesc.h" #include "MipsMCAsmInfo.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "MipsGenInstrInfo.inc" @@ -35,11 +39,12 @@ static MCInstrInfo *createMipsMCInstrInfo() { return X; } -extern "C" void LLVMInitializeMipsMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); +static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitMipsMCRegisterInfo(X, Mips::RA); + return X; } - static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); @@ -47,12 +52,111 @@ static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeMipsMCSubtargetInfo() { +static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new MipsMCAsmInfo(T, TT); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(Mips::SP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::PIC_; + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createMipsMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + return new MipsInstPrinter(MAI); +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +extern "C" void LLVMInitializeMipsTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo); + RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo); + RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo); + RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target, + createMipsMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget, + createMipsMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheMips64elTarget, + createMipsMCRegisterInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, + createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMips64Target, + createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget, + createMipsMCCodeEmitter); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, createMipsMCSubtargetInfo); -} + TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget, + createMipsMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target, + createMipsMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget, + createMipsMCSubtargetInfo); -extern "C" void LLVMInitializeMipsMCAsmInfo() { - RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget); - RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMips64Target, + createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget, + createMipsMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 3d18f11..7a0042a 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,21 +7,32 @@ // //===----------------------------------------------------------------------===// // -// This file provides Alpha specific target descriptions. +// This file provides Mips specific target descriptions. // //===----------------------------------------------------------------------===// -#ifndef ALPHAMCTARGETDESC_H -#define ALPHAMCTARGETDESC_H +#ifndef MIPSMCTARGETDESC_H +#define MIPSMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCInstrInfo; +class MCCodeEmitter; +class MCContext; class MCSubtargetInfo; -class Target; class StringRef; +class Target; extern Target TheMipsTarget; extern Target TheMipselTarget; +extern Target TheMips64Target; +extern Target TheMips64elTarget; + +MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); +MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT); } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/Mips/Mips.h b/contrib/llvm/lib/Target/Mips/Mips.h index 984b5ad..bacecf2 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.h +++ b/contrib/llvm/lib/Target/Mips/Mips.h @@ -29,6 +29,9 @@ namespace llvm { FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM); + FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE); + } // end namespace llvm; #endif diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td index 433cd57..39c2c16 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.td +++ b/contrib/llvm/lib/Target/Mips/Mips.td @@ -38,6 +38,10 @@ def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32", "Enable o32 ABI">; +def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32", + "Enable n32 ABI">; +def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", + "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", @@ -54,16 +58,19 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true", "Enable 'byte/half swap' instructions.">; def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true", "Enable 'count leading bits' instructions.">; -def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", - "Mips1 ISA Support">; -def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", - "Mips2 ISA Support">; def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", "Mips32 ISA Support", [FeatureCondMov, FeatureBitCount]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", [FeatureMips32, FeatureSEInReg]>; +def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion", + "Mips64", "Mips64 ISA Support", + [FeatureGP64Bit, FeatureFP64Bit, + FeatureMips32]>; +def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", + "Mips64r2", "Mips64r2 ISA Support", + [FeatureMips64, FeatureMips32r2]>; //===----------------------------------------------------------------------===// // Mips processors supported. @@ -72,21 +79,10 @@ def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, MipsGenericItineraries, Features>; -def : Proc<"mips1", [FeatureMips1]>; -def : Proc<"r2000", [FeatureMips1]>; -def : Proc<"r3000", [FeatureMips1]>; - -def : Proc<"mips2", [FeatureMips2]>; -def : Proc<"r6000", [FeatureMips2]>; - +def : Proc<"mips32r1", [FeatureMips32]>; def : Proc<"4ke", [FeatureMips32r2]>; - -// Allegrex is a 32bit subset of r4000, both for integer and fp registers, -// but much more similar to Mips2 than Mips3. It also contains some of -// Mips32/Mips32r2 instructions and a custom vector fpu processor. -def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, - FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd, - FeatureMinMax, FeatureSwap, FeatureBitCount]>; +def : Proc<"mips64r1", [FeatureMips64]>; +def : Proc<"mips64r2", [FeatureMips64r2]>; def MipsAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td new file mode 100644 index 0000000..49b0223 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -0,0 +1,214 @@ +//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips64 instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mips Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +// Instruction operand types +def shamt_64 : Operand<i64>; + +// Unsigned Operand +def uimm16_64 : Operand<i64> { + let PrintMethod = "printUnsignedImm"; +} + +// Transformation Function - get Imm - 32. +def Subtract32 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() - 32); +}]>; + +// imm32_63 predicate - True if imm is in range [32, 63]. +def imm32_63 : ImmLeaf<i64, + [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}], + Subtract32>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// +// Shifts +class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm, + SDNode OpNode, PatFrag PF>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$b, shamt_64:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, (i64 PF:$c)))], + IIAlu> { + let rs = _rs; +} + +class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm, + SDNode OpNode>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$c, CPU64Regs:$b), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPU64Regs:$dst, (OpNode CPU64Regs:$b, CPU64Regs:$c))], IIAlu> { + let shamt = _shamt; +} + +// Mul, Div +let Defs = [HI64, LO64] in { + let isCommutable = 1 in + class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b), + !strconcat(instr_asm, "\t$a, $b"), [], itin>; + + class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPU64Regs:$a, CPU64Regs:$b), + !strconcat(instr_asm, "\t$$zero, $a, $b"), + [(op CPU64Regs:$a, CPU64Regs:$b)], itin>; +} + +// Move from Hi/Lo +let shamt = 0 in { +let rs = 0, rt = 0 in +class MoveFromLOHI64<bits<6> func, string instr_asm>: + FR<0x00, func, (outs CPU64Regs:$dst), (ins), + !strconcat(instr_asm, "\t$dst"), [], IIHiLo>; + +let rt = 0, rd = 0 in +class MoveToLOHI64<bits<6> func, string instr_asm>: + FR<0x00, func, (outs), (ins CPU64Regs:$src), + !strconcat(instr_asm, "\t$src"), [], IIHiLo>; +} + +// Count Leading Ones/Zeros in Word +class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>: + FR<0x1c, func, (outs CPU64Regs:$dst), (ins CPU64Regs:$src), + !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>, + Requires<[HasBitCount]> { + let shamt = 0; + let rt = rd; +} + +//===----------------------------------------------------------------------===// +// Instruction definition +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, + CPU64Regs>; +def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>; +def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; +def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; +def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>; +def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>; + +/// Arithmetic Instructions (3-Operand, R-Type) +def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>; +def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>; +def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>; +def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>; +def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>; +def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>; +def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>; +def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; + +/// Shift Instructions +def DSLL : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>; +def DSRL : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>; +def DSRA : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>; +def DSLL32 : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>; +def DSRL32 : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>; +def DSRA32 : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>; +def DSLLV : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>; +def DSRLV : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>; +def DSRAV : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>; + +// Rotate Instructions +let Predicates = [HasMips64r2] in { + def DROTR : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>; + def DROTR32 : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr, + imm32_63>; + def DROTRV : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>; +} + +/// Load and Store Instructions +/// aligned +defm LB64 : LoadM64<0x20, "lb", sextloadi8>; +defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>; +defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>; +defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>; +defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>; +defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>; +defm SB64 : StoreM64<0x28, "sb", truncstorei8>; +defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>; +defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>; +defm LD : LoadM64<0x37, "ld", load_a>; +defm SD : StoreM64<0x3f, "sd", store_a>; + +/// unaligned +defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>; +defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>; +defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>; +defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>; +defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; +defm ULD : LoadM64<0x37, "uld", load_u, 1>; +defm USD : StoreM64<0x3f, "usd", store_u, 1>; + +/// Jump and Branch Instructions +def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; +def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; +def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; +def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; +def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>; +def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; + +/// Multiply and Divide Instructions. +def DMULT : Mul64<0x1c, "dmult", IIImul>; +def DMULTu : Mul64<0x1d, "dmultu", IIImul>; +def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>; +def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>; + +let Defs = [HI64] in + def MTHI64 : MoveToLOHI64<0x11, "mthi">; +let Defs = [LO64] in + def MTLO64 : MoveToLOHI64<0x13, "mtlo">; + +let Uses = [HI64] in + def MFHI64 : MoveFromLOHI64<0x10, "mfhi">; +let Uses = [LO64] in + def MFLO64 : MoveFromLOHI64<0x12, "mflo">; + +/// Count Leading +def DCLZ : CountLeading64<0x24, "dclz", + [(set CPU64Regs:$dst, (ctlz CPU64Regs:$src))]>; +def DCLO : CountLeading64<0x25, "dclo", + [(set CPU64Regs:$dst, (ctlz (not CPU64Regs:$src)))]>; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +// Small immediates +def : Pat<(i64 immSExt16:$in), + (DADDiu ZERO_64, imm:$in)>; +def : Pat<(i64 immZExt16:$in), + (ORi64 ZERO_64, imm:$in)>; + +// zextloadi32_u +def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>, + Requires<[IsN64]>; +def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>, + Requires<[NotN64]>; + +// hi/lo relocs +def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; + +defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, + ZERO_64>; + +// setcc patterns +defm : SeteqPats<CPU64Regs, SLTiu64, XOR64, SLTu64, ZERO_64>; +defm : SetlePats<CPU64Regs, SLT64, SLTu64>; +defm : SetgtPats<CPU64Regs, SLT64, SLTu64>; +defm : SetgePats<CPU64Regs, SLT64, SLTu64>; +defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>; diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 69e03bd..0e82681 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,6 +18,7 @@ #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" +#include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" @@ -33,15 +35,22 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/DebugInfo.h" using namespace llvm; +static bool isUnalignedLoadStore(unsigned Opc) { + return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu || + Opc == Mips::USW || Opc == Mips::USH || + Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 || + Opc == Mips::USW_P8 || Opc == Mips::USH_P8; +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -52,8 +61,21 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { } MipsMCInstLower MCInstLowering(Mang, *MF, *this); + unsigned Opc = MI->getOpcode(); MCInst TmpInst0; MCInstLowering.Lower(MI, TmpInst0); + + // Enclose unaligned load or store with .macro & .nomacro directives. + if (isUnalignedLoadStore(Opc)) { + MCInst Directive; + Directive.setOpcode(Mips::MACRO); + OutStreamer.EmitInstruction(Directive); + OutStreamer.EmitInstruction(TmpInst0); + Directive.setOpcode(Mips::NOMACRO); + OutStreamer.EmitInstruction(Directive); + return; + } + OutStreamer.EmitInstruction(TmpInst0); } @@ -180,7 +202,6 @@ void MipsAsmPrinter::emitFrameDirective() { const char *MipsAsmPrinter::getCurrentABIString() const { switch (Subtarget->getTargetABI()) { case MipsSubtarget::O32: return "abi32"; - case MipsSubtarget::O64: return "abiO64"; case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 @@ -304,6 +325,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MipsII::MO_GOTTPREL: O << "%gottprel("; break; case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break; case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break; + case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break; + case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break; + case MipsII::MO_GOT_DISP: O << "%got_disp("; break; + case MipsII::MO_GOT_PAGE: O << "%got_page("; break; + case MipsII::MO_GOT_OFST: O << "%got_ofst("; break; } switch (MO.getType()) { @@ -424,17 +450,9 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, } // Force static initialization. -static MCInstPrinter *createMipsMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - return new MipsInstPrinter(MAI); -} - extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); - - TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, - createMipsMCInstPrinter); + RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target); + RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget); } diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td index 876f0fc..0ae4ef6 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td +++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td @@ -31,6 +31,55 @@ def RetCC_MipsO32 : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips N32/64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_MipsN : CallingConv<[ + // FIXME: Handle byval, complex and float double parameters. + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Integer arguments are passed in integer registers. + CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64], + [D12_64, D13_64, D14_64, D15_64, + D16_64, D17_64, D18_64, D19_64]>>, + + // f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToRegWithShadow<[F12, F13, F14, F15, + F16, F17, F18, F19], + [A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + // f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToRegWithShadow<[D12_64, D13_64, D14_64, D15_64, + D16_64, D17_64, D18_64, D19_64], + [A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[f32], CCAssignToStack<4, 8>> +]>; + +def RetCC_MipsN : CallingConv<[ + // FIXME: Handle complex and float double return values. + + // i32 are returned in registers V0, V1 + CCIfType<[i32], CCAssignToReg<[V0, V1]>>, + + // i64 are returned in registers V0_64, V1_64 + CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>, + + // f32 are returned in registers F0, F2 + CCIfType<[f32], CCAssignToReg<[F0, F2]>>, + + // f64 are returned in registers D0, D2 + CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>> +]>; + +//===----------------------------------------------------------------------===// // Mips EABI Calling Convention //===----------------------------------------------------------------------===// @@ -77,10 +126,14 @@ def RetCC_MipsEABI : CallingConv<[ //===----------------------------------------------------------------------===// def CC_Mips : CallingConv<[ - CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>> + CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, + CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>, + CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>> ]>; def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, + CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, + CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, CCDelegateTo<RetCC_MipsO32> ]>; diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp new file mode 100644 index 0000000..9220d9c --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp @@ -0,0 +1,245 @@ +//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Mips machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include <iomanip> +#endif + +#include "llvm/CodeGen/MachineOperand.h" + +using namespace llvm; + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class MipsCodeEmitter : public MachineFunctionPass { + MipsJITInfo *JTI; + const MipsInstrInfo *II; + const TargetData *TD; + const MipsSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector<MachineConstantPoolEntry> *MCPEs; + const std::vector<MachineJumpTableEntry> *MJTEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineModuleInfo> (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : + MachineFunctionPass(ID), JTI(0), + II((const MipsInstrInfo *) tm.getInstrInfo()), + TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) { + } + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "Mips Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Word); + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + }; +} + +char MipsCodeEmitter::ID = 0; + +bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo(); + II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); + TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData(); + Subtarget = &TM.getSubtarget<MipsSubtarget> (); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = 0; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + JTI->Initialize(MF, IsPIC); + MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getFunction()->getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + // NOTE: This relocations are for static. + uint64_t TSFlags = MI.getDesc().TSFlags; + uint64_t Form = TSFlags & MipsII::FormMask; + if (Form == MipsII::FrmJ) + return Mips::reloc_mips_26; + if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) + && MI.getDesc().isBranch()) + return Mips::reloc_mips_branch; + if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) + return Mips::reloc_mips_hi; + return Mips::reloc_mips_lo; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); + else if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast<GlobalValue *>(GV), 0, MayNeedFarStub)); +} + +void MipsCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0, false)); +} + +void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void MipsCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, false)); +} + +void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + +void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + // Skip pseudo instructions. + if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) + return; + + ++NumEmitted; // Keep track of the # of mi's emitted + + switch (MI.getOpcode()) { + default: + emitWordLE(getBinaryCodeForInstr(MI)); + break; + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +void MipsCodeEmitter::emitWordLE(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + MCE.emitWordLE(Word); +} + +/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips +/// code to the specified MCE object. +FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE) { + return new MipsCodeEmitter(TM, JCE); +} + +unsigned MipsCodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const { + // this function will be automatically generated by the CodeEmitterGenerator + // using TableGen + return 0; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index c3a6211..be3b7a0 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with NOPs. +// Simple pass to fills delay slots with useful instructions. // //===----------------------------------------------------------------------===// @@ -17,18 +17,31 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); +STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" + " are not NOP."); + +static cl::opt<bool> EnableDelaySlotFiller( + "enable-mips-delay-filler", + cl::init(false), + cl::desc("Fill the Mips delay slots useful instructions."), + cl::Hidden); namespace { struct Filler : public MachineFunctionPass { TargetMachine &TM; const TargetInstrInfo *TII; + MachineBasicBlock::iterator LastFiller; static char ID; Filler(TargetMachine &tm) @@ -47,31 +60,61 @@ namespace { return Changed; } + bool isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate); + + void insertCallUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); + + void insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); + + bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, + unsigned Reg); + + bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); + + bool + findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot, + MachineBasicBlock::iterator &Filler); + + }; char Filler::ID = 0; } // end of anonymous namespace /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. -/// Currently, we fill delay slots with NOPs. We assume there is only one -/// delay slot per delayed instruction. +/// We assume there is only one delay slot per delayed instruction. bool Filler:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) -{ +runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { - const MCInstrDesc& MCid = I->getDesc(); - if (MCid.hasDelaySlot() && - (TM.getSubtarget<MipsSubtarget>().isMips1() || - MCid.isCall() || MCid.isBranch() || MCid.isReturn())) { - MachineBasicBlock::iterator J = I; - ++J; - BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); + LastFiller = MBB.end(); + + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { ++FilledSlots; Changed = true; - } - } + MachineBasicBlock::iterator D; + + if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { + MBB.splice(llvm::next(I), &MBB, D); + ++UsefulSlots; + } + else + BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + + // Record the filler instruction that filled the delay slot. + // The instruction after it will be visited in the next iteration. + LastFiller = ++I; + } return Changed; + } /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay @@ -80,3 +123,134 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } +bool Filler::findDelayInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator slot, + MachineBasicBlock::iterator &Filler) { + SmallSet<unsigned, 32> RegDefs; + SmallSet<unsigned, 32> RegUses; + + insertDefsUses(slot, RegDefs, RegUses); + + bool sawLoad = false; + bool sawStore = false; + + for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) { + // skip debug value + if (I->isDebugValue()) + continue; + + // Convert to forward iterator. + MachineBasicBlock::iterator FI(llvm::next(I).base()); + + if (I->hasUnmodeledSideEffects() + || I->isInlineAsm() + || I->isLabel() + || FI == LastFiller + || I->getDesc().isPseudo() + // + // Should not allow: + // ERET, DERET or WAIT, PAUSE. Need to add these to instruction + // list. TBD. + ) + break; + + if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) { + insertDefsUses(FI, RegDefs, RegUses); + continue; + } + + Filler = FI; + return true; + } + + return false; +} + +bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, + bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses) { + if (candidate->isImplicitDef() || candidate->isKill()) + return true; + + MCInstrDesc MCID = candidate->getDesc(); + // Loads or stores cannot be moved past a store to the delay slot + // and stores cannot be moved past a load. + if (MCID.mayLoad()) { + if (sawStore) + return true; + sawLoad = true; + } + + if (MCID.mayStore()) { + if (sawStore) + return true; + sawStore = true; + if (sawLoad) + return true; + } + + assert((!MCID.isCall() && !MCID.isReturn()) && + "Cannot put calls or returns in delay slot."); + + for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { + const MachineOperand &MO = candidate->getOperand(i); + unsigned Reg; + + if (!MO.isReg() || !(Reg = MO.getReg())) + continue; // skip + + if (MO.isDef()) { + // check whether Reg is defined or used before delay slot. + if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) + return true; + } + if (MO.isUse()) { + // check whether Reg is defined before delay slot. + if (IsRegInSet(RegDefs, Reg)) + return true; + } + } + return false; +} + +// Insert Defs and Uses of MI into the sets RegDefs and RegUses. +void Filler::insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses) { + // If MI is a call or return, just examine the explicit non-variadic operands. + MCInstrDesc MCID = MI->getDesc(); + unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() : + MI->getNumOperands(); + + // Add RA to RegDefs to prevent users of RA from going into delay slot. + if (MCID.isCall()) + RegDefs.insert(Mips::RA); + + for (unsigned i = 0; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + unsigned Reg; + + if (!MO.isReg() || !(Reg = MO.getReg())) + continue; + + if (MO.isDef()) + RegDefs.insert(Reg); + else if (MO.isUse()) + RegUses.insert(Reg); + } +} + +//returns true if the Reg or its alias is in the RegSet. +bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { + if (RegSet.count(Reg)) + return true; + // check Aliased Registers + for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + *Alias; ++Alias) + if (RegSet.count(*Alias)) + return true; + + return false; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp index a0f90a0..22d1e47 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -254,9 +254,15 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { } // Restore GP from the saved stack location - if (MipsFI->needGPSaveRestore()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) - .addImm(MFI->getObjectOffset(MipsFI->getGPFI())); + if (MipsFI->needGPSaveRestore()) { + unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI()); + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset); + + if (Offset >= 0x8000) { + BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO)); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); + } + } } void MipsFrameLowering::emitEpilogue(MachineFunction &MF, @@ -300,13 +306,6 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, } } -void -MipsFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(Mips::SP, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h index 78c78ee..c249756 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -27,7 +27,8 @@ protected: public: explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { + : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0), + STI(sti) { } bool targetHandlesStackFrameRounding() const; @@ -39,8 +40,6 @@ public: bool hasFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; }; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 90aaeb6..9c831ed 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,9 +86,6 @@ private: // Complex Pattern. bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); - SDNode *SelectLoadFp64(SDNode *N); - SDNode *SelectStoreFp64(SDNode *N); - // getI32Imm - Return a target constant with the specified // value, of type i32. inline SDValue getI32Imm(unsigned Imm) { @@ -114,17 +111,20 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + EVT ValTy = Addr.getValueType(); + unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64; + // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); return true; } // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if (Addr.getOpcode() == MipsISD::WrapperPIC) { - Base = CurDAG->getRegister(Mips::GP, MVT::i32); + Base = CurDAG->getRegister(GPReg, ValTy); Offset = Addr.getOperand(0); return true; } @@ -133,7 +133,7 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) { - Base = CurDAG->getRegister(Mips::GP, MVT::i32); + Base = CurDAG->getRegister(GPReg, ValTy); Offset = Addr; return true; } @@ -147,11 +147,11 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); else Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); return true; } } @@ -180,134 +180,10 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, ValTy); return true; } -SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { - MVT::SimpleValueType NVT = - N->getValueType(0).getSimpleVT().SimpleTy; - - if (!Subtarget.isMips1() || NVT != MVT::f64) - return NULL; - - LoadSDNode *LN = cast<LoadSDNode>(N); - if (LN->getExtensionType() != ISD::NON_EXTLOAD || - LN->getAddressingMode() != ISD::UNINDEXED) - return NULL; - - SDValue Chain = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue Offset0, Offset1, Base; - - if (!SelectAddr(N1, Base, Offset0) || - N1.getValueType() != MVT::i32) - return NULL; - - MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); - MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N->getDebugLoc(); - - // The second load should start after for 4 bytes. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); - else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), - MVT::i32, - CP->getAlignment(), - CP->getOffset()+4, - CP->getTargetFlags()); - else - return NULL; - - // Choose the offsets depending on the endianess - if (TM.getTargetData()->isBigEndian()) - std::swap(Offset0, Offset1); - - // Instead of: - // ldc $f0, X($3) - // Generate: - // lwc $f0, X($3) - // lwc $f1, X+4($3) - SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Base, Offset0, Chain); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, - MVT::f64, Undef, SDValue(LD0, 0)); - - SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Base, Offset1, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, - MVT::f64, I0, SDValue(LD1, 0)); - - ReplaceUses(SDValue(N, 0), I1); - ReplaceUses(SDValue(N, 1), Chain); - cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1); - cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1); - return I1.getNode(); -} - -SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { - - if (!Subtarget.isMips1() || - N->getOperand(1).getValueType() != MVT::f64) - return NULL; - - SDValue Chain = N->getOperand(0); - - StoreSDNode *SN = cast<StoreSDNode>(N); - if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED) - return NULL; - - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - SDValue Offset0, Offset1, Base; - - if (!SelectAddr(N2, Base, Offset0) || - N1.getValueType() != MVT::f64 || - N2.getValueType() != MVT::i32) - return NULL; - - MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); - MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); - DebugLoc dl = N->getDebugLoc(); - - // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, - dl, MVT::f32, N1); - SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, - dl, MVT::f32, N1); - - // The second store should start after for 4 bytes. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); - else - return NULL; - - // Choose the offsets depending on the endianess - if (TM.getTargetData()->isBigEndian()) - std::swap(Offset0, Offset1); - - // Instead of: - // sdc $f0, X($3) - // Generate: - // swc $f0, X($3) - // swc $f1, X+4($3) - SDValue Ops0[] = { FPEven, Base, Offset0, Chain }; - Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, - MVT::Other, Ops0, 4), 0); - cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - - SDValue Ops1[] = { FPOdd, Base, Offset1, Chain }; - Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, - MVT::Other, Ops1, 4), 0); - cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - - ReplaceUses(SDValue(N, 0), Chain); - return Chain.getNode(); -} - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { @@ -364,6 +240,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Mul with two results case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { + assert(Node->getValueType(0) != MVT::i64 && + "64-bit multiplication with two results not handled."); SDValue Op1 = Node->getOperand(0); SDValue Op2 = Node->getOperand(1); @@ -389,21 +267,29 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Special Muls case ISD::MUL: - if (Subtarget.isMips32()) + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32) break; case ISD::MULHS: case ISD::MULHU: { + assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) && + "64-bit MULH* not handled."); + EVT Ty = Node->getValueType(0); SDValue MulOp1 = Node->getOperand(0); SDValue MulOp2 = Node->getOperand(1); - unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + unsigned MulOp = (Opcode == ISD::MULHU ? + Mips::MULTu : + (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT)); SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, MVT::Glue, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); - if (Opcode == ISD::MUL) - return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag); + if (Opcode == ISD::MUL) { + unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + return CurDAG->getMachineNode(Opc, dl, Ty, InFlag); + } else return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); } @@ -417,31 +303,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Mips::ZERO, MVT::i32); - SDValue Undef = SDValue( - CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); - SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, - MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, - MVT::f64, I0, SDValue(MTC, 0)); - ReplaceUses(SDValue(Node, 0), I1); - return I1.getNode(); + return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, + Zero); } break; } - case ISD::LOAD: - if (SDNode *ResNode = SelectLoadFp64(Node)) - return ResNode; - // Other cases are autogenerated. - break; - - case ISD::STORE: - if (SDNode *ResNode = SelectStoreFp64(Node)) - return ResNode; - // Other cases are autogenerated. - break; - case MipsISD::ThreadPointer: { unsigned SrcReg = Mips::HWR29; unsigned DestReg = Mips::V1; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index b4f4b1b..1932e74 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -35,6 +35,18 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +// If I is a shifted mask, set the size (Size) and the first bit of the +// mask (Pos), and return true. +// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). +static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { + if (!isUInt<32>(I) || !isShiftedMask_32(I)) + return false; + + Size = CountPopulation_32(I); + Pos = CountTrailingZeros_32(I); + return true; +} + const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MipsISD::JmpLink: return "MipsISD::JmpLink"; @@ -61,27 +73,38 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; + case MipsISD::Sync: return "MipsISD::Sync"; + case MipsISD::Ext: return "MipsISD::Ext"; + case MipsISD::Ins: return "MipsISD::Ins"; default: return NULL; } } MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) - : TargetLowering(TM, new MipsTargetObjectFile()) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); + : TargetLowering(TM, new MipsTargetObjectFile()), + Subtarget(&TM.getSubtarget<MipsSubtarget>()), + HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + if (HasMips64) + addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass); + // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) - if (!Subtarget->isFP64bit()) + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); + else addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + } // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -100,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); @@ -115,6 +139,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -126,10 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); - if (!Subtarget->isMips32r2()) + if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); + if (!Subtarget->hasMips64r2()) + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); @@ -159,7 +191,14 @@ MipsTargetLowering(MipsTargetMachine &TM) // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + + setInsertFencesForAtomic(true); if (Subtarget->isSingleFloat()) setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); @@ -180,6 +219,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setMinFunctionAlignment(2); @@ -190,7 +231,12 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionSelectorRegister(Mips::A1); } -MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const { +bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; +} + +EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -348,7 +394,7 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMadd(N, &DAG)) + if (Subtarget->hasMips32() && SelectMadd(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -360,7 +406,7 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->isMips32() && SelectMsub(N, &DAG)) + if (Subtarget->hasMips32() && SelectMsub(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -372,6 +418,9 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + EVT Ty = N->getValueType(0); + unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; + unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : MipsISD::DivRemU; DebugLoc dl = N->getDebugLoc(); @@ -383,7 +432,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFLO if (N->hasAnyUseOfValue(0)) { - SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); @@ -393,7 +442,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, - Mips::HI, MVT::i32, InGlue); + HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } @@ -490,6 +539,101 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); } +static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match EXT. + // $dst = and ((sra or srl) $src , pos), (2**size - 1) + // => ext $dst, $src, size, pos + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); + + // Op's first operand must be a shift right. + if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL) + return SDValue(); + + // The second operand of the shift must be an immediate. + uint64_t Pos; + ConstantSDNode *CN; + if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) + return SDValue(); + + Pos = CN->getZExtValue(); + + uint64_t SMPos, SMSize; + // Op's second operand must be a shifted mask. + if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || + !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + return SDValue(); + + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Pos exceeds the word's size. + if (SMPos != 0 || Pos + SMSize > 32) + return SDValue(); + + return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32, + ShiftRight.getOperand(0), + DAG.getConstant(Pos, MVT::i32), + DAG.getConstant(SMSize, MVT::i32)); +} + +static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + // Pattern match INS. + // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), + // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 + // => ins $dst, $src, size, pos, $src1 + if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) + return SDValue(); + + SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); + uint64_t SMPos0, SMSize0, SMPos1, SMSize1; + ConstantSDNode *CN; + + // See if Op's first operand matches (and $src1 , mask0). + if (And0.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) || + !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) + return SDValue(); + + // See if Op's second operand matches (and (shl $src, pos), mask1). + if (And1.getOpcode() != ISD::AND) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) || + !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + return SDValue(); + + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + if (Shl.getOpcode() != ISD::SHL) + return SDValue(); + + if (!(CN = dyn_cast<ConstantSDNode>(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + if (Shamt != SMPos0) + return SDValue(); + + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32, + Shl.getOperand(0), + DAG.getConstant(SMPos0, MVT::i32), + DAG.getConstant(SMSize0, MVT::i32), + And0.getOperand(0)); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -506,6 +650,10 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); + case ISD::AND: + return PerformANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return PerformORCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -527,6 +675,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); } return SDValue(); } @@ -733,13 +883,13 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + unsigned OldVal = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Oldval = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -758,61 +908,38 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // thisMBB: // ... - // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0) // fallthrough --> loopMBB - - // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. - - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr).addFrameIndex(fi).addImm(0); - } BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); // loopMBB: // ll oldval, 0(ptr) - // or dest, $0, oldval - // <binop> tmp1, oldval, incr - // sc tmp1, 0(ptr) - // beq tmp1, $0, loopMBB + // <binop> storeval, oldval, incr + // sc success, storeval, 0(ptr) + // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { - // and tmp2, oldval, incr - // nor tmp1, $0, tmp2 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + // and andres, oldval, incr + // nor storeval, $0, andres + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal) + .addReg(Mips::ZERO).addReg(AndRes); } else if (BinOpcode) { - // <binop> tmp1, oldval, incr - BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); + // <binop> storeval, oldval, incr + BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { - // lw tmp2, fi(sp) // load incr from stack - // or tmp1, $zero, tmp2 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + StoreVal = Incr; } - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -833,33 +960,34 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Newval = RegInfo.createVirtualRegister(RC); - unsigned Oldval = RegInfo.createVirtualRegister(RC); + unsigned NewVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); unsigned Incr2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); - unsigned Tmp10 = RegInfo.createVirtualRegister(RC); - unsigned Tmp11 = RegInfo.createVirtualRegister(RC); - unsigned Tmp12 = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned AndRes = RegInfo.createVirtualRegister(RC); + unsigned BinOpRes = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -868,111 +996,104 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loopMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,incr,255 - // sll incr2,tmp4,shift - // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0) - - // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. + // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - if (BinOpcode != Mips::SUBu) { - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift); - } else { - BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); - } + BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr2).addFrameIndex(fi).addImm(0); - } - BB->addSuccessor(loopMBB); + // atomic.load.binop + // loopMBB: + // ll oldval,0(alignedaddr) + // binop binopres,oldval,incr2 + // and newval,binopres,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + + // atomic.swap // loopMBB: - // ll oldval,0(addr) - // binop tmp7,oldval,incr2 - // and newval,tmp7,mask - // and tmp8,oldval,mask2 - // or tmp9,tmp8,newval - // sc tmp9,0(addr) - // beq tmp9,$0,loopMBB + // ll oldval,0(alignedaddr) + // and newval,incr2,mask + // and maskedoldval0,oldval,mask2 + // or storeval,maskedoldval0,newval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loopMBB + BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { - // and tmp6, oldval, incr2 - // nor tmp7, $0, tmp6 - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2); - BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); - } else if (BinOpcode == Mips::SUBu) { - // addu tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2); + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes) + .addReg(Mips::ZERO).addReg(AndRes); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else if (BinOpcode) { - // <binop> tmp7, oldval, incr2 - BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); - } else { - // lw tmp6, fi(sp) // load incr2 from stack - // or tmp7, $zero, tmp6 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); + // <binop> binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); + } else {// atomic.swap + // and newval, incr2, mask + BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } - BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal0).addReg(NewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // and tmp10,oldval,mask - // srl tmp11,tmp10,shift - // sll tmp12,tmp11,24 - // sra dest,tmp12,24 - BB = exitMBB; + .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp12).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12) - .addReg(Tmp11).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11) - .addReg(Tmp10).addReg(Shift); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10) - .addReg(Oldval).addReg(Mask); + + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal1); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -989,11 +1110,10 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned OldVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1012,26 +1132,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - int fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - // thisMBB: // ... - // sw newval, fi(sp) // store newval to stack // fallthrough --> loop1MBB - - // Note: storing newval to stack before the loop and then loading it from - // stack in block loop2MBB is necessary to prevent MachineLICM pass to - // hoist "or" instruction out of the block loop2MBB. - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Newval).addFrameIndex(fi).addImm(0); BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); // loop1MBB: // ll dest, 0(ptr) @@ -1039,27 +1147,20 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB = loop1MBB; BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Dest).addReg(Oldval).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: - // lw tmp2, fi(sp) // load newval from stack - // or tmp1, $0, tmp2 - // sc tmp1, 0(ptr) - // beq tmp1, $0, loop1MBB + // sc success, newval, 0(ptr) + // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(NewVal).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } MachineBasicBlock * @@ -1077,36 +1178,39 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); - unsigned Oldval = MI->getOperand(2).getReg(); - unsigned Newval = MI->getOperand(3).getReg(); + unsigned CmpVal = MI->getOperand(2).getReg(); + unsigned NewVal = MI->getOperand(3).getReg(); - unsigned Addr = RegInfo.createVirtualRegister(RC); - unsigned Shift = RegInfo.createVirtualRegister(RC); + unsigned AlignedAddr = RegInfo.createVirtualRegister(RC); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval2 = RegInfo.createVirtualRegister(RC); - unsigned Oldval3 = RegInfo.createVirtualRegister(RC); - unsigned Oldval4 = RegInfo.createVirtualRegister(RC); - unsigned Newval2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); - unsigned Tmp3 = RegInfo.createVirtualRegister(RC); - unsigned Tmp4 = RegInfo.createVirtualRegister(RC); - unsigned Tmp5 = RegInfo.createVirtualRegister(RC); - unsigned Tmp6 = RegInfo.createVirtualRegister(RC); - unsigned Tmp7 = RegInfo.createVirtualRegister(RC); - unsigned Tmp8 = RegInfo.createVirtualRegister(RC); - unsigned Tmp9 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned OldVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RC); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned SrlRes = RegInfo.createVirtualRegister(RC); + unsigned SllRes = RegInfo.createVirtualRegister(RC); + unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = BB; ++It; MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -1115,76 +1219,90 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + sinkMBB->addSuccessor(exitMBB); + + // FIXME: computation of newval2 can be moved to loop2MBB. // thisMBB: - // addiu tmp1,$0,-4 # 0xfffffffc - // and addr,ptr,tmp1 - // andi tmp2,ptr,3 - // sll shift,tmp2,3 - // ori tmp3,$0,255 # 0xff - // sll mask,tmp3,shift + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt // nor mask2,$0,mask - // andi tmp4,oldval,255 - // sll oldval2,tmp4,shift - // andi tmp5,newval,255 - // sll newval2,tmp5,shift + // andi maskedcmpval,cmpval,255 + // sll shiftedcmpval,maskedcmpval,shiftamt + // andi maskednewval,newval,255 + // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift); + BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + .addReg(Mips::ZERO).addImm(-4); + BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + .addReg(Mips::ZERO).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + .addReg(ShiftAmt).addReg(MaskUpper); BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift); - BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift); - BB->addSuccessor(loop1MBB); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal) + .addReg(CmpVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal) + .addReg(ShiftAmt).addReg(MaskedCmpVal); + BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal) + .addReg(NewVal).addImm(MaskImm); + BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal) + .addReg(ShiftAmt).addReg(MaskedNewVal); // loop1MBB: - // ll oldval3,0(addr) - // and oldval4,oldval3,mask - // bne oldval4,oldval2,exitMBB + // ll oldval,0(alginedaddr) + // and maskedoldval0,oldval,mask + // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask); + BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + .addReg(OldVal).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) - .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB); - BB->addSuccessor(exitMBB); - BB->addSuccessor(loop2MBB); + .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); // loop2MBB: - // and tmp6,oldval3,mask2 - // or tmp7,tmp6,newval2 - // sc tmp7,0(addr) - // beq tmp7,$0,loop1MBB + // and maskedoldval1,oldval,mask2 + // or storeval,maskedoldval1,shiftednewval + // sc success,storeval,0(alignedaddr) + // beq success,$0,loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp7) - .addReg(Tmp7).addReg(Addr).addImm(0); + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + .addReg(OldVal).addReg(Mask2); + BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + .addReg(MaskedOldVal1).addReg(ShiftedNewVal); + BuildMI(BB, dl, TII->get(Mips::SC), Success) + .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); + .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); - // exitMBB: - // srl tmp8,oldval4,shift - // sll tmp9,tmp8,24 - // sra dest,tmp9,24 - BB = exitMBB; + // sinkMBB: + // srl srlres,maskedoldval0,shiftamt + // sll sllres,srlres,24 + // sra dest,sllres,24 + BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - // reverse order - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest) - .addReg(Tmp9).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9) - .addReg(Tmp8).addImm(ShiftImm); - BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8) - .addReg(Oldval4).addReg(Shift); + + BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + .addReg(ShiftAmt).addReg(MaskedOldVal0); + BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + .addReg(SrlRes).addImm(ShiftImm); + BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. - return BB; + return exitMBB; } //===----------------------------------------------------------------------===// @@ -1267,9 +1385,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); @@ -1292,21 +1410,26 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_GOT); - GA = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, GA); - SDValue ResNode = DAG.getLoad(MVT::i32, dl, + EVT ValTy = Op.getValueType(); + bool HasGotOfst = (GV->hasInternalLinkage() || + (GV->hasLocalLinkage() && !isa<Function>(GV))); + unsigned GotFlag = IsN64 ? + (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : + MipsII::MO_GOT; + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); + GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); + SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasInternalLinkage() && - (!GV->hasLocalLinkage() || isa<Function>(GV))) + if (!HasGotOfst) return ResNode; - SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); - return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, + IsN64 ? MipsII::MO_GOT_OFST : + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo); + return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo); } SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, @@ -1361,11 +1484,11 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); std::pair<SDValue, SDValue> CallResult = LowerCallTo(DAG.getEntryNode(), - (const Type *) Type::getInt32Ty(*DAG.getContext()), + (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -1557,6 +1680,25 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// TODO: set SType according to the desired memory barrier behavior. +SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, + SelectionDAG& DAG) const { + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + +SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + // FIXME: Need pseudo-fence for 'singlethread' fences + // FIXME: Set SType for weaker fences where supported/appropriate. + unsigned SType = 0; + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(SType, MVT::i32)); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1679,55 +1821,109 @@ static const unsigned O32IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; +// Return next O32 integer argument register. +static unsigned getNextIntArgReg(unsigned Reg) { + assert((Reg == Mips::A0) || (Reg == Mips::A2)); + return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; +} + // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& Chain, DebugLoc dl, +WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, SmallVector<SDValue, 8>& MemOpChains, int& LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, - MVT PtrType) { - unsigned FirstWord = VA.getLocMemOffset() / 4; - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - unsigned LastWord = FirstWord + NumWords; - unsigned CurWord; - - // copy the first 4 words of byval arg to registers A0 - A3 - for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); - ++CurWord) { + MVT PtrType, bool isLittle) { + unsigned LocMemOffset = VA.getLocMemOffset(); + unsigned Offset = 0; + uint32_t RemainingSize = Flags.getByValSize(); + unsigned ByValAlign = Flags.getByValAlign(); + + // Copy the first 4 words of byval arg to registers A0 - A3. + // FIXME: Use a stricter alignment if it enables better optimization in passes + // run later. + for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; + Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); + DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, std::min(ByValAlign, + (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[CurWord]; + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); } - // copy remaining part of byval arg to stack. - if (CurWord < LastWord) { - unsigned SizeInBytes = (LastWord - CurWord) * 4; - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant((CurWord - FirstWord) * 4, - MVT::i32)); - LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(SizeInBytes, MVT::i32), - /*Align*/4, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + if (RemainingSize == 0) + return; + + // If there still is a register available for argument passing, write the + // remaining part of the structure to it using subword loads and shifts. + if (LocMemOffset < 4 * 4) { + assert(RemainingSize <= 3 && RemainingSize >= 1 && + "There must be one to three bytes remaining."); + unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )4); + SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSize * 8), false, + false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // If target is big endian, shift it to the most significant half-word or + // byte. + if (!isLittle) + LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, + DAG.getConstant(32 - LoadSize * 8, MVT::i32)); + + Offset += LoadSize; + RemainingSize -= LoadSize; + + // Read second subword if necessary. + if (RemainingSize != 0) { + assert(RemainingSize == 1 && "There must be one byte remaining."); + LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + unsigned Alignment = std::min(ByValAlign, (unsigned )2); + SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + LoadPtr, MachinePointerInfo(), + MVT::i8, false, false, Alignment); + MemOpChains.push_back(Subword.getValue(1)); + // Insert the loaded byte to LoadVal. + // FIXME: Use INS if supported by target. + unsigned ShiftAmt = isLittle ? 16 : 8; + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, + DAG.getConstant(ShiftAmt, MVT::i32)); + LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); + } + + unsigned DstReg = O32IntRegs[LocMemOffset / 4]; + RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); + return; } + + // Create a fixed object on stack at offset LocMemOffset and copy + // remaining part of byval arg to it using memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant(Offset, MVT::i32)); + LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. SDValue -MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, +MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -1757,8 +1953,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NextStackOffset, - true)); + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue Chain, CallSeqStart, ByValChain; + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); + ByValChain = InChain; // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. @@ -1818,8 +2019,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); - RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); + unsigned LocRegLo = VA.getLocReg(); + unsigned LocRegHigh = getNextIntArgReg(LocRegLo); + RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); + RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } @@ -1852,8 +2055,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, "No support for ByVal args by ABIs other than O32 yet."); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, - VA, Flags, getPointerTy()); + WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, + DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; } @@ -1875,6 +2078,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (LastFI) MipsFI->extendOutArgFIRange(FirstFI, LastFI); + // If a memcpy has been created to copy a byval arg to a stack, replace the + // chain input of CallSeqStart with ByValChain. + if (InChain != ByValChain) + DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, + NextStackOffsetVal); + // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2071,12 +2280,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (RegVT == MVT::i32) RC = Mips::CPURegsRegisterClass; + else if (RegVT == MVT::i64) + RC = Mips::CPU64RegsRegisterClass; else if (RegVT == MVT::f32) RC = Mips::FGR32RegisterClass; - else if (RegVT == MVT::f64) { - if (!Subtarget->isSingleFloat()) - RC = Mips::AFGR64RegisterClass; - } else + else if (RegVT == MVT::f64) + RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass; + else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on @@ -2105,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), - VA.getLocReg()+1, RC); + getNextIntArgReg(ArgReg), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); if (!Subtarget->isLittle()) std::swap(ArgValue, ArgValue2); @@ -2313,7 +2523,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index bda26a2..4be3fed5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -81,7 +81,12 @@ namespace llvm { WrapperPIC, - DynAlloc + DynAlloc, + + Sync, + + Ext, + Ins }; } @@ -93,6 +98,8 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); + virtual bool allowsUnalignedMemoryAccesses (EVT VT) const; + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -101,13 +108,14 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - get the ISD::SETCC result ValueType - MVT::SimpleValueType getSetCCResultType(EVT VT) const; + EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: // Subtarget Info const MipsSubtarget *Subtarget; - + + bool HasMips64, IsN64; // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, @@ -128,6 +136,8 @@ namespace llvm { SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td index 021c167..2fb9d18 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -27,7 +27,7 @@ def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, - SDTCisInt<2>]>; + SDTCisVT<2, i32>]>; def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, @@ -35,12 +35,11 @@ def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>]>; def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, f64>, - SDTCisVT<0, i32>]>; + SDTCisVT<2, i32>]>; def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>; def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>; -def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>; def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, [SDNPHasChain, SDNPOptInGlue]>; def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>; @@ -55,10 +54,10 @@ let PrintMethod = "printFCCOperand" in // Feature predicates. //===----------------------------------------------------------------------===// -def In32BitMode : Predicate<"!Subtarget.isFP64bit()">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">; def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; -def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates @@ -74,97 +73,87 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -multiclass FFR1_1<bits<6> funct, string asmstr> -{ - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s\t$fd, $fs"), []>; - - def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d\t$fd, $fs"), []>, Requires<[In32BitMode]>; +// FP load. +class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, + Operand MemOpnd>: + FFI<op, (outs RC:$ft), (ins MemOpnd:$base), + !strconcat(opstr, "\t$ft, $base"), [(set RC:$ft, (FOp addr:$base))]>; + +// FP store. +class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, + Operand MemOpnd>: + FFI<op, (outs), (ins RC:$ft, MemOpnd:$base), + !strconcat(opstr, "\t$ft, $base"), [(store RC:$ft, addr:$base)]>; + +// Instructions that convert an FP value to 32-bit fixed point. +multiclass FFR1_W_M<bits<6> funct, string opstr> { + def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>; + def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>, + Requires<[NotFP64bit]>; + def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>, + Requires<[IsFP64bit]>; } -multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp> -{ - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s\t$fd, $fs"), - [(set FGR32:$fd, (FOp FGR32:$fs))]>; - - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d\t$fd, $fs"), - [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>; +// Instructions that convert an FP value to 64-bit fixed point. +let Predicates = [IsFP64bit] in +multiclass FFR1_L_M<bits<6> funct, string opstr> { + def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>; + def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>; } -class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc, - RegisterClass RcDst, string asmstr>: - FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), - !strconcat(asmstr, "\t$fd, $fs"), []>; - +// FP-to-FP conversion instructions. +multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> { + def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>; + def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>, + Requires<[NotFP64bit]>; + def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>, + Requires<[IsFP64bit]>; +} -multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp, bit isComm = 0> { +multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { let isCommutable = isComm in { - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), - (ins FGR32:$fs, FGR32:$ft), - !strconcat(asmstr, ".s\t$fd, $fs, $ft"), - [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>; - - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), - (ins AFGR64:$fs, AFGR64:$ft), - !strconcat(asmstr, ".d\t$fd, $fs, $ft"), - [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>, - Requires<[In32BitMode]>; + def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>; + def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>, + Requires<[NotFP64bit]>; + def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>, + Requires<[IsFP64bit]>; } } //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// +defm ROUND_W : FFR1_W_M<0xc, "round">; +defm ROUND_L : FFR1_L_M<0x8, "round">; +defm TRUNC_W : FFR1_W_M<0xd, "trunc">; +defm TRUNC_L : FFR1_L_M<0x9, "trunc">; +defm CEIL_W : FFR1_W_M<0xe, "ceil">; +defm CEIL_L : FFR1_L_M<0xa, "ceil">; +defm FLOOR_W : FFR1_W_M<0xf, "floor">; +defm FLOOR_L : FFR1_L_M<0xb, "floor">; +defm CVT_W : FFR1_W_M<0x24, "cvt">; +defm CVT_L : FFR1_L_M<0x25, "cvt">; + +def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; + +let Predicates = [NotFP64bit] in { + def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; + def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; + def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; +} -let ft = 0 in { - defm FLOOR_W : FFR1_1<0b001111, "floor.w">; - defm CEIL_W : FFR1_1<0b001110, "ceil.w">; - defm ROUND_W : FFR1_1<0b001100, "round.w">; - defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; - defm CVTW : FFR1_1<0b100100, "cvt.w">; - - defm FABS : FFR1_2<0b000101, "abs", fabs>; - defm FNEG : FFR1_2<0b000111, "neg", fneg>; - defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>; - - /// Convert to Single Precison - def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">; - - let Predicates = [IsNotSingleFloat] in { - /// Ceil to long signed integer - def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">; - def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">; - - /// Round to long signed integer - def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">; - def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">; - - /// Floor to long signed integer - def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">; - def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">; - - /// Trunc to long signed integer - def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">; - def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">; - - /// Convert to long signed integer - def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; - def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; - - /// Convert to Double Precison - def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; - def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; - def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; - - /// Convert to Single Precison - def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">; - def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; - } +let Predicates = [IsFP64bit] in { + def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; + def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; + def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; + def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>; + def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; } +defm FABS : FFR1P_M<0x5, "abs", fabs>; +defm FNEG : FFR1P_M<0x7, "neg", fneg>; +defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>; + // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. // When defining instructions, we reference all 32-bit registers, @@ -178,37 +167,46 @@ let fd = 0 in { "ctc1\t$fs, $rt", []>; def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs), - "mfc1\t$rt, $fs", []>; + "mfc1\t$rt, $fs", + [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>; def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt), - "mtc1\t$rt, $fs", []>; + "mtc1\t$rt, $fs", + [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>; } -def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - "mov.s\t$fd, $fs", []>; -def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - "mov.d\t$fd, $fs", []>; +def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; +def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, + Requires<[NotFP64bit]>; +def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, + Requires<[IsFP64bit]>; /// Floating Point Memory Instructions -let Predicates = [IsNotSingleFloat, IsNotMipsI] in { - def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), - "ldc1\t$ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; - - def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), - "sdc1\t$ft, $addr", [(store AFGR64:$ft, addr:$addr)]>; +let Predicates = [IsN64] in { + def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>; + def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>; + def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>; + def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>; } -// LWC1 and SWC1 can always be emitted with odd registers. -def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1\t$ft, $addr", - [(set FGR32:$ft, (load addr:$addr))]>; -def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), - "swc1\t$ft, $addr", [(store FGR32:$ft, addr:$addr)]>; +let Predicates = [NotN64] in { + def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>; + def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>; + let Predicates = [HasMips64] in { + def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>; + def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>; + } + let Predicates = [NotMips64] in { + def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>; + def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>; + } +} /// Floating-point Aritmetic -defm FADD : FFR1_4<0x10, "add", fadd, 1>; -defm FDIV : FFR1_4<0x03, "div", fdiv>; -defm FMUL : FFR1_4<0x02, "mul", fmul, 1>; -defm FSUB : FFR1_4<0x01, "sub", fsub>; +defm FADD : FFR2P_M<0x10, "add", fadd, 1>; +defm FDIV : FFR2P_M<0x03, "div", fdiv>; +defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>; +defm FSUB : FFR2P_M<0x01, "sub", fsub>; //===----------------------------------------------------------------------===// // Floating Point Branch Codes @@ -217,8 +215,6 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>; // They must be kept in synch. def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def MIPS_BRANCH_FL : PatLeaf<(i32 2)>; -def MIPS_BRANCH_TL : PatLeaf<(i32 3)>; /// Floating Point Branch of False/True (Likely) let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in @@ -228,8 +224,6 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">; def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">; -def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">; -def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions @@ -254,7 +248,7 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare -let hasDelaySlot = 1, Defs=[FCR31] in { +let Defs=[FCR31] in { def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), "c.$cc.s\t$fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>; @@ -262,7 +256,7 @@ let hasDelaySlot = 1, Defs=[FCR31] in { def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), "c.$cc.d\t$fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>, - Requires<[In32BitMode]>; + Requires<[NotFP64bit]>; } @@ -280,7 +274,7 @@ class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func, def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">; def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">; def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">; } @@ -288,7 +282,7 @@ let Predicates = [In32BitMode] in { defm : MovzPats<FGR32, MOVZ_S>; defm : MovnPats<FGR32, MOVN_S>; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { defm : MovzPats<AFGR64, MOVZ_D>; defm : MovnPats<AFGR64, MOVN_D>; } @@ -313,7 +307,7 @@ def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">; def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; -let Predicates = [In32BitMode] in { +let Predicates = [NotFP64bit] in { def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; } @@ -353,22 +347,16 @@ def fpimm0neg : PatLeaf<(fpimm), [{ }]>; def : Pat<(f32 fpimm0), (MTC1 ZERO)>; -def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>; +def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; -def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>; -def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>; +def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; +def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; -def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>; +def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; -def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>; -def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>; - -let Predicates = [In32BitMode] in { - def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>; +let Predicates = [NotFP64bit] in { + def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; + def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; } -// MipsFPRound is only emitted for MipsI targets. -def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>; - diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td index 9f55fb3..d246a26 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -44,7 +44,9 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, IIPseudo>; + MipsInst<outs, ins, asmstr, pattern, IIPseudo> { + let isPseudo = 1; +} //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> @@ -88,6 +90,21 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let Inst{15-0} = imm16; } +class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin>: + MipsInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rs; + bits<5> rt; + bits<16> imm16; + + let opcode = op; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + //===----------------------------------------------------------------------===// // Format J instruction class in Mips : <|opcode|address|> //===----------------------------------------------------------------------===// @@ -224,4 +241,27 @@ class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, let Inst{15-11} = fs; let Inst{10-6} = fd; let Inst{5-0} = 17; -}
\ No newline at end of file +} + +// FP unary instructions without patterns. +class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, + RegisterClass DstRC, RegisterClass SrcRC> : + FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> { + let ft = 0; +} + +// FP unary instructions with patterns. +class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, + RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> : + FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), + [(set DstRC:$fd, (OpNode SrcRC:$fs))]> { + let ft = 0; +} + +class FFR2P<bits<6> funct, bits<5> fmt, string opstr, + string fmtstr, RegisterClass RC, SDNode OpNode> : + FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"), + [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>; diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 0a7a7f2..559943a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -17,8 +17,8 @@ #include "InstPrinter/MipsInstPrinter.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR @@ -28,7 +28,8 @@ using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), + RI(*TM.getSubtargetImpl(), *this) {} const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { @@ -47,8 +48,12 @@ static bool isZeroImm(const MachineOperand &op) { unsigned MipsInstrInfo:: isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { - if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || - (MI->getOpcode() == Mips::LDC1)) { + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || + (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || + (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || + (Opc == Mips::LDC164_P8)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -68,8 +73,12 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const unsigned MipsInstrInfo:: isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { - if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || - (MI->getOpcode() == Mips::SDC1)) { + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || + (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || + (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || + (Opc == Mips::SDC164_P8)) { if ((MI->getOperand(1).isFI()) && // is a stack slot (MI->getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI->getOperand(2)))) { @@ -94,70 +103,63 @@ copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - bool DestCPU = Mips::CPURegsRegClass.contains(DestReg); - bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg); - - // CPU-CPU is the most common. - if (DestCPU && SrcCPU) { - BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } + unsigned Opc = 0, ZeroReg = 0; - // Copy to CPU from other registers. - if (DestCPU) { - if (Mips::CCRRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + else if (Mips::CCRRegClass.contains(SrcReg)) + Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MFC1; else if (SrcReg == Mips::HI) - BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg); + Opc = Mips::MFHI, SrcReg = 0; else if (SrcReg == Mips::LO) - BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg); - else - llvm_unreachable("Copy to CPU from invalid register"); - return; + Opc = Mips::MFLO, SrcReg = 0; } - - // Copy to other registers from CPU. - if (SrcCPU) { + else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) - BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) - BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MTC1; else if (DestReg == Mips::HI) - BuildMI(MBB, I, DL, get(Mips::MTHI)) - .addReg(SrcReg, getKillRegState(KillSrc)); + Opc = Mips::MTHI, DestReg = 0; else if (DestReg == Mips::LO) - BuildMI(MBB, I, DL, get(Mips::MTLO)) - .addReg(SrcReg, getKillRegState(KillSrc)); - else - llvm_unreachable("Copy from CPU to invalid register"); - return; + Opc = Mips::MTLO, DestReg = 0; } - - if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; + else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_S; + else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D32; + else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) + Opc = Mips::MOVCCRToCCR; + else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (Mips::CPU64RegsRegClass.contains(SrcReg)) + Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + else if (SrcReg == Mips::HI64) + Opc = Mips::MFHI64, SrcReg = 0; + else if (SrcReg == Mips::LO64) + Opc = Mips::MFLO64, SrcReg = 0; } - - if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; + else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (DestReg == Mips::HI64) + Opc = Mips::MTHI64, DestReg = 0; + else if (DestReg == Mips::LO64) + Opc = Mips::MTLO64, DestReg = 0; } - if (Mips::CCRRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - llvm_unreachable("Cannot copy registers"); + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); } void MipsInstrInfo:: @@ -167,31 +169,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + else if (RC == Mips::CPU64RegsRegisterClass) + Opc = IsN64 ? Mips::SD_P8 : Mips::SD; else if (RC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - else if (RC == Mips::AFGR64RegisterClass) { - if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { - BuildMI(MBB, I, DL, get(Mips::SDC1)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - } else { - const TargetRegisterInfo *TRI = - MBB.getParent()->getTarget().getRegisterInfo(); - const unsigned *SubSet = TRI->getSubRegisters(SrcReg); - BuildMI(MBB, I, DL, get(Mips::SWC1)) - .addReg(SubSet[0], getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - BuildMI(MBB, I, DL, get(Mips::SWC1)) - .addReg(SubSet[1], getKillRegState(isKill)) - .addFrameIndex(FI).addImm(4); - } - } else - llvm_unreachable("Register class not handled!"); + Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + else if (RC == Mips::AFGR64RegisterClass) + Opc = Mips::SDC1; + else if (RC == Mips::FGR64RegisterClass) + Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); } void MipsInstrInfo:: @@ -202,25 +195,21 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0); + Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + else if (RC == Mips::CPU64RegsRegisterClass) + Opc = IsN64 ? Mips::LD_P8 : Mips::LD; else if (RC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0); - else if (RC == Mips::AFGR64RegisterClass) { - if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { - BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0); - } else { - const TargetRegisterInfo *TRI = - MBB.getParent()->getTarget().getRegisterInfo(); - const unsigned *SubSet = TRI->getSubRegisters(DestReg); - BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) - .addFrameIndex(FI).addImm(0); - BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1]) - .addFrameIndex(FI).addImm(4); - } - } else - llvm_unreachable("Register class not handled!"); + Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + else if (RC == Mips::AFGR64RegisterClass) + Opc = Mips::LDC1; + else if (RC == Mips::FGR64RegisterClass) + Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0); } MachineInstr* @@ -237,9 +226,12 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, //===----------------------------------------------------------------------===// static unsigned GetAnalyzableBrOpc(unsigned Opc) { - return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || - Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0; + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || + Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? + Opc : 0; } /// GetOppositeBranchOpc - Return the inverse of the specified @@ -248,14 +240,20 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ : return Mips::BNE; - case Mips::BNE : return Mips::BEQ; - case Mips::BGTZ : return Mips::BLEZ; - case Mips::BGEZ : return Mips::BLTZ; - case Mips::BLTZ : return Mips::BGEZ; - case Mips::BLEZ : return Mips::BGTZ; - case Mips::BC1T : return Mips::BC1F; - case Mips::BC1F : return Mips::BC1T; + case Mips::BEQ : return Mips::BNE; + case Mips::BNE : return Mips::BEQ; + case Mips::BGTZ : return Mips::BLEZ; + case Mips::BGEZ : return Mips::BLTZ; + case Mips::BLTZ : return Mips::BGEZ; + case Mips::BLEZ : return Mips::BGTZ; + case Mips::BEQ64 : return Mips::BNE64; + case Mips::BNE64 : return Mips::BEQ64; + case Mips::BGTZ64 : return Mips::BLEZ64; + case Mips::BGEZ64 : return Mips::BLTZ64; + case Mips::BLTZ64 : return Mips::BGEZ64; + case Mips::BLEZ64 : return Mips::BGTZ64; + case Mips::BC1T : return Mips::BC1F; + case Mips::BC1F : return Mips::BC1T; } } diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h index 4421c48..271d248 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -72,12 +72,47 @@ namespace MipsII { /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from // the thread pointer (Local Exec TLS). MO_TPREL_HI, - MO_TPREL_LO + MO_TPREL_LO, + + // N32/64 Flags. + MO_GPOFF_HI, + MO_GPOFF_LO, + MO_GOT_DISP, + MO_GOT_PAGE, + MO_GOT_OFST + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // Mips instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15 }; } class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; + bool IsN64; const MipsRegisterInfo RI; public: explicit MipsInstrInfo(MipsTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index d1a0587..06b7de7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -34,13 +34,20 @@ def SDT_MipsMAddMSub : SDTypeProfile<0, 4, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>]>; def SDT_MipsDivRem : SDTypeProfile<0, 2, - [SDTCisVT<0, i32>, + [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, iPTR>]>; +def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; +def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, @@ -106,6 +113,11 @@ def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>; def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, [SDNPHasChain, SDNPInGlue]>; +def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; + +def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; +def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -113,8 +125,13 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; def HasBitCount : Predicate<"Subtarget.hasBitCount()">; def HasSwap : Predicate<"Subtarget.hasSwap()">; def HasCondMov : Predicate<"Subtarget.hasCondMov()">; -def IsMips32 : Predicate<"Subtarget.isMips32()">; -def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">; +def HasMips32 : Predicate<"Subtarget.hasMips32()">; +def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; +def HasMips64 : Predicate<"Subtarget.hasMips64()">; +def NotMips64 : Predicate<"!Subtarget.hasMips64()">; +def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; +def IsN64 : Predicate<"Subtarget.isABI_N64()">; +def NotN64 : Predicate<"!Subtarget.isABI_N64()">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -124,6 +141,7 @@ def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">; def brtarget : Operand<OtherVT>; def calltarget : Operand<i32>; def simm16 : Operand<i32>; +def simm16_64 : Operand<i64>; def shamt : Operand<i32>; // Unsigned Operand @@ -137,6 +155,11 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); } +def mem64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPU64Regs, simm16_64); +} + def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); @@ -177,36 +200,85 @@ def immZExt5 : PatLeaf<(imm), [{ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; //===----------------------------------------------------------------------===// +// Pattern fragment for load/store +//===----------------------------------------------------------------------===// +class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ + LoadSDNode *LD = cast<LoadSDNode>(N); + return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment(); +}]>; + +class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ + LoadSDNode *LD = cast<LoadSDNode>(N); + return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment(); +}]>; + +class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), + (Node node:$val, node:$ptr), [{ + StoreSDNode *SD = cast<StoreSDNode>(N); + return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment(); +}]>; + +class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), + (Node node:$val, node:$ptr), [{ + StoreSDNode *SD = cast<StoreSDNode>(N); + return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment(); +}]>; + +// Load/Store PatFrags. +def sextloadi16_a : AlignedLoad<sextloadi16>; +def zextloadi16_a : AlignedLoad<zextloadi16>; +def extloadi16_a : AlignedLoad<extloadi16>; +def load_a : AlignedLoad<load>; +def sextloadi32_a : AlignedLoad<sextloadi32>; +def zextloadi32_a : AlignedLoad<zextloadi32>; +def extloadi32_a : AlignedLoad<extloadi32>; +def truncstorei16_a : AlignedStore<truncstorei16>; +def store_a : AlignedStore<store>; +def truncstorei32_a : AlignedStore<truncstorei32>; +def sextloadi16_u : UnalignedLoad<sextloadi16>; +def zextloadi16_u : UnalignedLoad<zextloadi16>; +def extloadi16_u : UnalignedLoad<extloadi16>; +def load_u : UnalignedLoad<load>; +def sextloadi32_u : UnalignedLoad<sextloadi32>; +def zextloadi32_u : UnalignedLoad<zextloadi32>; +def extloadi32_u : UnalignedLoad<extloadi32>; +def truncstorei16_u : UnalignedStore<truncstorei16>; +def store_u : UnalignedStore<store>; +def truncstorei32_u : UnalignedStore<truncstorei32>; + +//===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// -// Arithmetic 3 register operands -class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, - InstrItinClass itin, bit isComm = 0>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin> { +// Arithmetic and logical instructions with 3 register operands. +class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, + InstrItinClass itin, RegisterClass RC, bit isComm = 0>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> { + let shamt = 0; let isCommutable = isComm; } class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm, - bit isComm = 0>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu> { + InstrItinClass itin, RegisterClass RC, bit isComm = 0>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> { + let shamt = 0; let isCommutable = isComm; } -// Arithmetic 2 register operands -class ArithI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type> : - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; +// Arithmetic and logical instructions with 2 register operands. +class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rt, $rs, $i"), + [(set RC:$rt, (OpNode RC:$rs, imm_type:$i))], IIAlu>; class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type> : - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>; + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI<op, (outs RC:$rt), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rt, $rs, $i"), [], IIAlu>; // Arithmetic Multiply ADD/SUB let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in @@ -214,92 +286,134 @@ class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> : FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), !strconcat(instr_asm, "\t$rs, $rt"), [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> { + let rd = 0; + let shamt = 0; let isCommutable = isComm; } // Logical -let isCommutable = 1 in -class LogicR<bits<6> func, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; - -class LogicI<bits<6> op, string instr_asm, SDNode OpNode>: - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>; - -let isCommutable = 1 in -class LogicNOR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; +class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: + FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> { + let shamt = 0; + let isCommutable = 1; +} // Shifts class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> { + FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt), + !strconcat(instr_asm, "\t$rd, $rt, $shamt"), + [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> { let rs = _rs; } -class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, +class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm, SDNode OpNode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> { - let shamt = _shamt; + FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt, $rs"), + [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> { + let shamt = isRotate; } // Load Upper Imediate class LoadUpper<bits<6> op, string instr_asm>: - FI< op, - (outs CPURegs:$dst), - (ins uimm16:$imm), - !strconcat(instr_asm, "\t$dst, $imm"), - [], IIAlu>; + FI<op, (outs CPURegs:$rt), (ins uimm16:$imm), + !strconcat(instr_asm, "\t$rt, $imm"), [], IIAlu> { + let rs = 0; +} // Memory Load/Store -let canFoldAsLoad = 1, hasDelaySlot = 1 in -class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI<op, (outs CPURegs:$dst), (ins mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>; +let canFoldAsLoad = 1 in +class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, + Operand MemOpnd, bit Pseudo>: + FI<op, (outs RC:$rt), (ins MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr"), + [(set RC:$rt, (OpNode addr:$addr))], IILoad> { + let isPseudo = Pseudo; +} -class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI<op, (outs), (ins CPURegs:$dst, mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(OpNode CPURegs:$dst, addr:$addr)], IIStore>; +class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, + Operand MemOpnd, bit Pseudo>: + FI<op, (outs), (ins RC:$rt, MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr"), + [(OpNode RC:$rt, addr:$addr)], IIStore> { + let isPseudo = Pseudo; +} + +// 32-bit load. +multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 64-bit load. +multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 32-bit store. +multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, + Requires<[IsN64]>; +} + +// 64-bit store. +multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, + bit Pseudo = 0> { + def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, + Requires<[NotN64]>; + def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, + Requires<[IsN64]>; +} // Conditional Branch -let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in { -class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>: - FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), - !strconcat(instr_asm, "\t$a, $b, $offset"), - [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], - IIBranch>; +class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: + CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$offset), + !strconcat(instr_asm, "\t$rs, $rt, $offset"), + [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; +} -class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>: - FI<op, (outs), (ins CPURegs:$src, brtarget:$offset), - !strconcat(instr_asm, "\t$src, $offset"), - [(brcond (cond_op CPURegs:$src, 0), bb:$offset)], - IIBranch>; +class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, + RegisterClass RC>: + CBranchBase<op, (outs), (ins RC:$rs, brtarget:$offset), + !strconcat(instr_asm, "\t$rs, $offset"), + [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch> { + let rt = _rt; + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; } // SetCC -class SetCC_R<bits<6> op, bits<6> func, string instr_asm, - PatFrag cond_op>: - FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))], - IIAlu>; +class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op, + RegisterClass RC>: + FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), + [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], + IIAlu> { + let shamt = 0; +} -class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, - Operand Od, PatLeaf imm_type>: - FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))], +class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, + PatLeaf imm_type, RegisterClass RC>: + FI<op, (outs CPURegs:$rd), (ins RC:$rs, Od:$i), + !strconcat(instr_asm, "\t$rd, $rs, $i"), + [(set CPURegs:$rd, (cond_op RC:$rs, imm_type:$i))], IIAlu>; // Unconditional branch @@ -310,8 +424,12 @@ class JumpFJ<bits<6> op, string instr_asm>: let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in class JumpFR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPURegs:$target), - !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>; + FR<op, func, (outs), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> { + let rt = 0; + let rd = 0; + let shamt = 0; +} // Jump and Link (Call) let isCall=1, hasDelaySlot=1, @@ -323,76 +441,124 @@ let isCall=1, hasDelaySlot=1, !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], IIBranch>; - let rd=31 in class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>: FR<op, func, (outs), (ins CPURegs:$rs, variable_ops), - !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>; + !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> { + let rt = 0; + let rd = 31; + let shamt = 0; + } class BranchLink<string instr_asm>: FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops), - !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>; + !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch> { + let rt = 0; + } } // Mul, Div -let Defs = [HI, LO] in { - let isCommutable = 1 in - class Mul<bits<6> func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$a, $b"), [], itin>; +class Mul<bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$rs, $rt"), [], itin> { + let rd = 0; + let shamt = 0; + let isCommutable = 1; + let Defs = [HI, LO]; +} - class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$$zero, $a, $b"), - [(op CPURegs:$a, CPURegs:$b)], itin>; +class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$$zero, $rs, $rt"), + [(op CPURegs:$rs, CPURegs:$rt)], itin> { + let rd = 0; + let shamt = 0; + let Defs = [HI, LO]; } // Move from Hi/Lo class MoveFromLOHI<bits<6> func, string instr_asm>: - FR<0x00, func, (outs CPURegs:$dst), (ins), - !strconcat(instr_asm, "\t$dst"), [], IIHiLo>; + FR<0x00, func, (outs CPURegs:$rd), (ins), + !strconcat(instr_asm, "\t$rd"), [], IIHiLo> { + let rs = 0; + let rt = 0; + let shamt = 0; +} class MoveToLOHI<bits<6> func, string instr_asm>: - FR<0x00, func, (outs), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$src"), [], IIHiLo>; + FR<0x00, func, (outs), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rs"), [], IIHiLo> { + let rt = 0; + let rd = 0; + let shamt = 0; +} class EffectiveAddress<string instr_asm> : - FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr), - instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>; + FI<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr), + instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>: - FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>, + FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs), + !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>, Requires<[HasBitCount]> { let shamt = 0; let rt = rd; } // Sign Extend in Register. -class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>: - FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>; +class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>: + FR<0x3f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt"), + [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> { + let rs = 0; + let shamt = sa; + let Predicates = [HasSEInReg]; +} // Byte Swap -class ByteSwap<bits<6> func, string instr_asm>: - FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>; - -// Conditional Move -class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>: - FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, - CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), - [], NoItinerary>; +class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>: + FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt), + !strconcat(instr_asm, "\t$rd, $rt"), + [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> { + let rs = 0; + let shamt = sa; + let Predicates = [HasSwap]; +} // Read Hardware -class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$dst), (ins HWRegs:$src), - "rdhwr\t$dst, $src", [], IIAlu> { +class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd), + "rdhwr\t$rt, $rd", [], IIAlu> { let rs = 0; let shamt = 0; } +// Ext and Ins +class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins, + list<dag> pattern, InstrItinClass itin>: + FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + pattern, itin>, Requires<[HasMips32r2]> { + bits<5> pos; + bits<5> sz; + let rd = sz; + let shamt = pos; +} + +// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). +class Atomic2Ops<PatFrag Op, string Opstr> : + MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), + !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), + [(set CPURegs:$dst, + (Op CPURegs:$ptr, CPURegs:$incr))]>; + +// Atomic Compare & Swap. +class AtomicCmpSwap<PatFrag Op, string Width> : + MipsPseudo<(outs CPURegs:$dst), + (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap), + !strconcat("atomic_cmp_swap_", Width, + "\t$dst, $ptr, $cmp, $swap"), + [(set CPURegs:$dst, + (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>; + //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -427,112 +593,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { - def ATOMIC_LOAD_ADD_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_ADD_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_ADD_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_add_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_SUB_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_SUB_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_SUB_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_sub_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_AND_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_AND_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_AND_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_and_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_OR_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_OR_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_OR_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_or_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_XOR_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_XOR_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_XOR_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_xor_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_LOAD_NAND_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_8\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_NAND_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_16\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>; - def ATOMIC_LOAD_NAND_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), - "atomic_load_nand_32\t$dst, $ptr, $incr", - [(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>; - - def ATOMIC_SWAP_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_8\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>; - def ATOMIC_SWAP_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_16\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>; - def ATOMIC_SWAP_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val), - "atomic_swap_32\t$dst, $ptr, $val", - [(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>; - - def ATOMIC_CMP_SWAP_I8 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; - def ATOMIC_CMP_SWAP_I16 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; - def ATOMIC_CMP_SWAP_I32 : MipsPseudo< - (outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval), - "atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval", - [(set CPURegs:$dst, - (atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>; + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, "load_add_8">; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, "load_add_16">; + def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, "load_add_32">; + def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, "load_sub_8">; + def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, "load_sub_16">; + def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, "load_sub_32">; + def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, "load_and_8">; + def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, "load_and_16">; + def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, "load_and_32">; + def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, "load_or_8">; + def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, "load_or_16">; + def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, "load_or_32">; + def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, "load_xor_8">; + def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, "load_xor_16">; + def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, "load_xor_32">; + def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, "load_nand_8">; + def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">; + def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">; + + def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, "swap_8">; + def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, "swap_16">; + def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, "swap_32">; + + def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, "8">; + def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, "16">; + def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, "32">; } //===----------------------------------------------------------------------===// @@ -544,26 +630,26 @@ let usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>; -def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>; -def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>; -def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>; -def ANDi : LogicI<0x0c, "andi", and>; -def ORi : LogicI<0x0d, "ori", or>; -def XORi : LogicI<0x0e, "xori", xor>; +def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>; +def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>; +def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>; +def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; +def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>; +def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>; +def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>; def LUi : LoadUpper<0x0f, "lui">; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu, 1>; -def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>; -def ADD : ArithOverflowR<0x00, 0x20, "add", 1>; -def SUB : ArithOverflowR<0x00, 0x22, "sub">; -def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>; -def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>; -def AND : LogicR<0x24, "and", and>; -def OR : LogicR<0x25, "or", or>; -def XOR : LogicR<0x26, "xor", xor>; -def NOR : LogicNOR<0x00, 0x27, "nor">; +def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>; +def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>; +def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>; +def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>; +def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>; +def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>; +def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>; +def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>; +def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>; +def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>; /// Shift Instructions def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>; @@ -574,45 +660,58 @@ def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>; def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>; // Rotate Instructions -let Predicates = [IsMips32r2] in { +let Predicates = [HasMips32r2] in { def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>; def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>; } /// Load and Store Instructions -def LB : LoadM<0x20, "lb", sextloadi8>; -def LBu : LoadM<0x24, "lbu", zextloadi8>; -def LH : LoadM<0x21, "lh", sextloadi16>; -def LHu : LoadM<0x25, "lhu", zextloadi16>; -def LW : LoadM<0x23, "lw", load>; -def SB : StoreM<0x28, "sb", truncstorei8>; -def SH : StoreM<0x29, "sh", truncstorei16>; -def SW : StoreM<0x2b, "sw", store>; +/// aligned +defm LB : LoadM32<0x20, "lb", sextloadi8>; +defm LBu : LoadM32<0x24, "lbu", zextloadi8>; +defm LH : LoadM32<0x21, "lh", sextloadi16_a>; +defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>; +defm LW : LoadM32<0x23, "lw", load_a>; +defm SB : StoreM32<0x28, "sb", truncstorei8>; +defm SH : StoreM32<0x29, "sh", truncstorei16_a>; +defm SW : StoreM32<0x2b, "sw", store_a>; + +/// unaligned +defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>; +defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>; +defm ULW : LoadM32<0x23, "ulw", load_u, 1>; +defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; +defm USW : StoreM32<0x2b, "usw", store_u, 1>; + +let hasSideEffects = 1 in +def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", + [(MipsSync imm:$stype)], NoItinerary> +{ + let opcode = 0; + let Inst{25-11} = 0; + let Inst{5-0} = 15; +} /// Load-linked, Store-conditional -let hasDelaySlot = 1 in +let mayLoad = 1 in def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr), "ll\t$dst, $addr", [], IILoad>; -let Constraints = "$src = $dst" in +let mayStore = 1, Constraints = "$src = $dst" in def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr), "sc\t$src, $addr", [], IIStore>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -def JR : JumpFR<0x00, 0x08, "jr">; +let isIndirectBranch = 1 in + def JR : JumpFR<0x00, 0x08, "jr">; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; -def BEQ : CBranch<0x04, "beq", seteq>; -def BNE : CBranch<0x05, "bne", setne>; - -let rt=1 in - def BGEZ : CBranchZero<0x01, "bgez", setge>; - -let rt=0 in { - def BGTZ : CBranchZero<0x07, "bgtz", setgt>; - def BLEZ : CBranchZero<0x07, "blez", setle>; - def BLTZ : CBranchZero<0x01, "bltz", setlt>; -} +def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; +def BNE : CBranch<0x05, "bne", setne, CPURegs>; +def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>; +def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>; +def BLEZ : CBranchZero<0x07, 0, "blez", setle, CPURegs>; +def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>; def BGEZAL : BranchLink<"bgezal">; def BLTZAL : BranchLink<"bltzal">; @@ -639,40 +738,31 @@ let Uses = [LO] in def MFLO : MoveFromLOHI<0x12, "mflo">; /// Sign Ext In Register Instructions. -let Predicates = [HasSEInReg] in { - let shamt = 0x10, rs = 0 in - def SEB : SignExtInReg<0x21, "seb", i8>; - - let shamt = 0x18, rs = 0 in - def SEH : SignExtInReg<0x20, "seh", i16>; -} +def SEB : SignExtInReg<0x10, "seb", i8>; +def SEH : SignExtInReg<0x18, "seh", i16>; /// Count Leading -def CLZ : CountLeading<0b100000, "clz", - [(set CPURegs:$dst, (ctlz CPURegs:$src))]>; -def CLO : CountLeading<0b100001, "clo", - [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>; +def CLZ : CountLeading<0x20, "clz", + [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>; +def CLO : CountLeading<0x21, "clo", + [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>; /// Byte Swap -let Predicates = [HasSwap] in { - let shamt = 0x3, rs = 0 in - def WSBW : ByteSwap<0x20, "wsbw">; -} - -/// Conditional Move -def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; -def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; +def WSBW : ByteSwap<0x20, 0x2, "wsbw">; // Conditional moves: // These instructions are expanded in // MipsISelLowering::EmitInstrWithCustomInserter if target does not have // conditional move instructions. // flag:int, data:int -let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in - class CondMovIntInt<bits<6> funct, string instr_asm> : - FR<0, funct, (outs CPURegs:$dst), - (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F), - !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>; +class CondMovIntInt<bits<6> funct, string instr_asm> : + FR<0, funct, (outs CPURegs:$rd), + (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F), + !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> { + let shamt = 0; + let usesCustomInserter = 1; + let Constraints = "$F = $rd"; +} def MOVZ_I : CondMovIntInt<0x0a, "movz">; def MOVN_I : CondMovIntInt<0x0b, "movn">; @@ -685,13 +775,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; @@ -701,10 +791,25 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>; // MUL is a assembly macro in the current used ISAs. In recent ISA's // it is a real instruction. -def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul, 1>, Requires<[IsMips32]>; +def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, + Requires<[HasMips32]>; def RDHWR : ReadHardware; +def EXT : ExtIns<0, "ext", (outs CPURegs:$rt), + (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz), + [(set CPURegs:$rt, + (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))], + NoItinerary>; + +let Constraints = "$src = $rt" in +def INS : ExtIns<4, "ins", (outs CPURegs:$rt), + (ins CPURegs:$rs, uimm16:$pos, uimm16:$sz, CPURegs:$src), + [(set CPURegs:$rt, + (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz, + CPURegs:$src))], + NoItinerary>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -738,16 +843,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; +def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), (ADDiu CPURegs:$hi, tblockaddress:$lo)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; @@ -763,6 +872,7 @@ def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)), // tprel hi/lo def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; +def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)), (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; @@ -784,60 +894,67 @@ def : Pat<(not CPURegs:$in), // extended load and stores def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>; def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>; -def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>; +def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>; +def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>; // peepholes def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; // brcond patterns -def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst), - (BNE CPURegs:$lhs, ZERO, bb:$dst)>; -def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst), - (BEQ CPURegs:$lhs, ZERO, bb:$dst)>; - -def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), - (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst), - (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; - -def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst), - (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>; - -def : Pat<(brcond CPURegs:$cond, bb:$dst), - (BNE CPURegs:$cond, ZERO, bb:$dst)>; +multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp, + Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, + Instruction SLTiuOp, Register ZEROReg> { +def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; + +def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; + +def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; + +def : Pat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +} + +defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; // select patterns multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> { - def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>; - def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F), + def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>; - def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; - def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; - def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; - def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F), + def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F), (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>; } multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> { - def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F), (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; def : Pat<(select CPURegs:$cond, RC:$T, RC:$F), (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>; - def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F), + def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F), (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>; } @@ -845,30 +962,48 @@ defm : MovzPats<CPURegs, MOVZ_I>; defm : MovnPats<CPURegs, MOVN_I>; // setcc patterns -def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), - (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>; -def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs), - (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>; - -def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>; -def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>; - -def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs), - (SLT CPURegs:$rhs, CPURegs:$lhs)>; -def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs), - (SLTu CPURegs:$rhs, CPURegs:$lhs)>; - -def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>; -def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs), - (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>; - -def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs), - (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>; -def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs), - (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>; +multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp, + Instruction SLTuOp, Register ZEROReg> { + def : Pat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : Pat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; +} + +multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setle RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : Pat<(setule RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; +} + +multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : Pat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; +} + +multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { + def : Pat<(setge RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : Pat<(setuge RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; +} + +multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp, + Instruction SLTiuOp> { + def : Pat<(setge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; + def : Pat<(setuge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; +} + +defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>; +defm : SetlePats<CPURegs, SLT, SLTu>; +defm : SetgtPats<CPURegs, SLT, SLTu>; +defm : SetgePats<CPURegs, SLT, SLTu>; +defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // select MipsDynAlloc def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; @@ -878,4 +1013,5 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; //===----------------------------------------------------------------------===// include "MipsInstrFPU.td" +include "Mips64InstrInfo.td" diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp new file mode 100644 index 0000000..28c2b48 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp @@ -0,0 +1,230 @@ +//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Mips target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "MipsJITInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Memory.h" +#include <cstdlib> +using namespace llvm; + + +void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. This code saves registers, calls +// MipsCompilationCallbackC and restores registers. +extern "C" { +#if defined (__mips__) +void MipsCompilationCallback(); + + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "MipsCompilationCallback\n" + ASMPREFIX "MipsCompilationCallback:\n" + ".ent " ASMPREFIX "MipsCompilationCallback\n" + ".frame $29, 32, $31\n" + ".set noreorder\n" + ".cpload $t9\n" + + "addiu $sp, $sp, -60\n" + ".cprestore 16\n" + + // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain + // stuff for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned. We also need to save the ra register since it contains the + // original return address, and t8 register since it contains the address + // of the end of function stub. + "sw $a0, 20($sp)\n" + "sw $a1, 24($sp)\n" + "sw $a2, 28($sp)\n" + "sw $a3, 32($sp)\n" + "sw $ra, 36($sp)\n" + "sw $t8, 40($sp)\n" + "sdc1 $f12, 44($sp)\n" + "sdc1 $f14, 52($sp)\n" + + // t8 points at the end of function stub. Pass the beginning of the stub + // to the MipsCompilationCallbackC. + "addiu $a0, $t8, -16\n" + "jal " ASMPREFIX "MipsCompilationCallbackC\n" + "nop\n" + + // Restore registers. + "lw $a0, 20($sp)\n" + "lw $a1, 24($sp)\n" + "lw $a2, 28($sp)\n" + "lw $a3, 32($sp)\n" + "lw $ra, 36($sp)\n" + "lw $t8, 40($sp)\n" + "ldc1 $f12, 44($sp)\n" + "ldc1 $f14, 52($sp)\n" + "addiu $sp, $sp, 60\n" + + // Jump to the (newly modified) stub to invoke the real function. + "addiu $t8, $t8, -16\n" + "jr $t8\n" + "nop\n" + + ".set reorder\n" + ".end " ASMPREFIX "MipsCompilationCallback\n" + ); +#else // host != Mips + void MipsCompilationCallback() { + llvm_unreachable( + "Cannot call MipsCompilationCallback() on a non-Mips arch!"); + } +#endif +} + +/// MipsCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the first four instructions of the + // stub with code that jumps to the compiled function: + // lui $t9, %hi(NewVal) + // addiu $t9, $t9, %lo(NewVal) + // jr $t9 + // nop + + int Hi = ((unsigned)NewVal & 0xffff0000) >> 16; + if ((NewVal & 0x8000) != 0) + Hi++; + int Lo = (int)(NewVal & 0xffff); + + *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi; + *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo; + *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8; + *(intptr_t *)(StubAddr + 12) = 0; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16); +} + +TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return MipsCompilationCallback; +} + +TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { + // The stub contains 4 4-byte instructions, aligned at 4 bytes. See + // emitFunctionStub for details. + StubLayout Result = { 4*4, 4 }; + return Result; +} + +void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + JCE.emitAlignment(4); + void *Addr = (void*) (JCE.getCurrentPCValue()); + if (!sys::Memory::setRangeWritable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + intptr_t EmittedAddr; + if (Fn != (void*)(intptr_t)MipsCompilationCallback) + EmittedAddr = (intptr_t)Fn; + else + EmittedAddr = (intptr_t)MipsCompilationCallback; + + + int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16; + if ((EmittedAddr & 0x8000) != 0) + Hi++; + int Lo = (int)(EmittedAddr & 0xffff); + + // lui t9, %hi(EmittedAddr) + // addiu t9, t9, %lo(EmittedAddr) + // jalr t8, t9 + // nop + JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordLE(25 << 21 | 24 << 11 | 9); + JCE.emitWordLE(0); + + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((Mips::RelocationType) MR->getRelocationType()) { + case Mips::reloc_mips_branch: + ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_26: + ResultPtr = (ResultPtr & 0x0fffffff) >> 2; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_hi: + ResultPtr = ResultPtr >> 16; + if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) { + ResultPtr += 1; + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_lo: + ResultPtr = ResultPtr & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + default: + llvm_unreachable("ERROR: Unknown Mips relocation."); + } + } +} diff --git a/contrib/llvm/lib/Target/Mips/MipsJITInfo.h b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h new file mode 100644 index 0000000..41f32a3 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsJITInfo.h @@ -0,0 +1,70 @@ +//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSJITINFO_H +#define MIPSJITINFO_H + +#include "MipsMachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { +class MipsTargetMachine; + +class MipsJITInfo : public TargetJITInfo { + + bool IsPIC; + + public: + explicit MipsJITInfo() : + IsPIC(false) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Mips. + virtual StubLayout getStubLayout(); + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC) { + IsPIC = isPIC; + } + +}; +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp index f5cc3aa..608a7d2 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -29,10 +29,10 @@ MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {} MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MachineOperandType MOTy) const { + MachineOperandType MOTy, + unsigned Offset) const { MipsMCSymbolRefExpr::VariantKind Kind; const MCSymbol *Symbol; - int Offset = 0; switch(MO.getTargetFlags()) { default: assert(0 && "Invalid target flag!"); @@ -46,6 +46,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break; case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break; case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break; + case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break; + case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break; + case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break; + case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break; + case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break; } switch (MOTy) { @@ -72,7 +77,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); if (MO.getOffset()) - Offset = MO.getOffset(); + Offset += MO.getOffset(); break; default: @@ -83,36 +88,39 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, Ctx)); } +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: + assert(0 && "unknown operand type"); + break; + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) break; + return MCOperand::CreateReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::CreateImm(MO.getImm()); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, 0); + } + + return MCOperand(); +} + void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - MachineOperandType MOTy = MO.getType(); + MCOperand MCOp = LowerOperand(MO); - switch (MOTy) { - default: - MI->dump(); - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) continue; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, MOTy); - break; - } - - OutMI.addOperand(MCOp); + if (MCOp.isValid()) + OutMI.addOperand(MCOp); } } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h index ec5201b..223f23a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h @@ -1,4 +1,4 @@ -//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===// +//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==// // // The LLVM Compiler Infrastructure // @@ -36,7 +36,8 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, - MachineOperandType MOTy) const; + MachineOperandType MOTy, unsigned Offset) const; + MCOperand LowerOperand(const MachineOperand& MO) const; }; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp index 9a2bdae..a0a242c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.cpp @@ -33,6 +33,11 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { case VK_Mips_GOTTPREL: OS << "%gottprel("; break; case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; + case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; + case VK_Mips_GOT_DISP: OS << "%got_disp("; break; + case VK_Mips_GOT_PAGE: OS << "%got_page("; break; + case VK_Mips_GOT_OFST: OS << "%got_ofst("; break; } OS << *Symbol; @@ -43,7 +48,9 @@ void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { OS << Offset; } - if (Kind != VK_Mips_None) + if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO) + OS << ")))"; + else if (Kind != VK_Mips_None) OS << ')'; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h index 3e69596..55e85a7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h +++ b/contrib/llvm/lib/Target/Mips/MipsMCSymbolRefExpr.h @@ -25,7 +25,12 @@ public: VK_Mips_TLSGD, VK_Mips_GOTTPREL, VK_Mips_TPREL_HI, - VK_Mips_TPREL_LO + VK_Mips_TPREL_LO, + VK_Mips_GPOFF_HI, + VK_Mips_GPOFF_LO, + VK_Mips_GOT_DISP, + VK_Mips_GOT_PAGE, + VK_Mips_GOT_OFST }; private: diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h index dbb7a67..bc30b6b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -51,16 +51,12 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; - /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap - /// intrinsics, it is necessary to use a temporary stack location. - /// This field holds the frame index of this location. - int AtomicFrameIndex; public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), AtomicFrameIndex(-1) + MaxCallFrameSize(0) {} bool isInArgFI(int FI) const { @@ -104,9 +100,6 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } - - int getAtomicFrameIndex() const { return AtomicFrameIndex; } - void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; } }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 24390da..f8c0fda 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -44,7 +43,7 @@ using namespace llvm; MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) - : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {} + : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). @@ -52,39 +51,87 @@ unsigned MipsRegisterInfo:: getRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { - case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0; - case Mips::AT : case Mips::F1 : return 1; - case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2; - case Mips::V1 : case Mips::F3 : return 3; - case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4; - case Mips::A1 : case Mips::F5 : return 5; - case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6; - case Mips::A3 : case Mips::F7 : return 7; - case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8; - case Mips::T1 : case Mips::F9 : return 9; - case Mips::T2 : case Mips::F10: case Mips::D5: return 10; - case Mips::T3 : case Mips::F11: return 11; - case Mips::T4 : case Mips::F12: case Mips::D6: return 12; - case Mips::T5 : case Mips::F13: return 13; - case Mips::T6 : case Mips::F14: case Mips::D7: return 14; - case Mips::T7 : case Mips::F15: return 15; - case Mips::S0 : case Mips::F16: case Mips::D8: return 16; - case Mips::S1 : case Mips::F17: return 17; - case Mips::S2 : case Mips::F18: case Mips::D9: return 18; - case Mips::S3 : case Mips::F19: return 19; - case Mips::S4 : case Mips::F20: case Mips::D10: return 20; - case Mips::S5 : case Mips::F21: return 21; - case Mips::S6 : case Mips::F22: case Mips::D11: return 22; - case Mips::S7 : case Mips::F23: return 23; - case Mips::T8 : case Mips::F24: case Mips::D12: return 24; - case Mips::T9 : case Mips::F25: return 25; - case Mips::K0 : case Mips::F26: case Mips::D13: return 26; - case Mips::K1 : case Mips::F27: return 27; - case Mips::GP : case Mips::F28: case Mips::D14: return 28; - case Mips::SP : case Mips::F29: return 29; - case Mips::FP : case Mips::F30: case Mips::D15: return 30; - case Mips::RA : case Mips::F31: return 31; - default: llvm_unreachable("Unknown register number!"); + case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: + case Mips::D0: + return 0; + case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + return 1; + case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: + case Mips::D1: + return 2; + case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + return 3; + case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: + case Mips::D2: + return 4; + case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64: + return 5; + case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64: + case Mips::D3: + return 6; + case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64: + return 7; + case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64: + case Mips::D4: + return 8; + case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64: + return 9; + case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64: + case Mips::D5: + return 10; + case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64: + return 11; + case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64: + case Mips::D6: + return 12; + case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64: + return 13; + case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64: + case Mips::D7: + return 14; + case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64: + return 15; + case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64: + case Mips::D8: + return 16; + case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64: + return 17; + case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64: + case Mips::D9: + return 18; + case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64: + return 19; + case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64: + case Mips::D10: + return 20; + case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64: + return 21; + case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64: + case Mips::D11: + return 22; + case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64: + return 23; + case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64: + case Mips::D12: + return 24; + case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64: + return 25; + case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64: + case Mips::D13: + return 26; + case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64: + return 27; + case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64: + case Mips::D14: + return 28; + case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + return 29; + case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: + case Mips::D15: + return 30; + case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: + return 31; + default: llvm_unreachable("Unknown register number!"); } return 0; // Not reached } @@ -101,7 +148,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const { // Mips callee-save register range is $16-$23, $f20-$f30 static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { - Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26, + Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26, Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20, Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4, Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 @@ -113,31 +160,71 @@ getCalleeSavedRegs(const MachineFunction *MF) const Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 }; + static const unsigned N32CalleeSavedRegs[] = { + Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64, + Mips::D21_64, + Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, + Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, + Mips::S0_64, 0 + }; + + static const unsigned N64CalleeSavedRegs[] = { + Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64, + Mips::D26_64, Mips::D25_64, Mips::D24_64, + Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, + Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, + Mips::S0_64, 0 + }; + if (Subtarget.isSingleFloat()) return SingleFloatOnlyCalleeSavedRegs; - else + else if (!Subtarget.hasMips64()) return Mips32CalleeSavedRegs; + else if (Subtarget.isABI_N32()) + return N32CalleeSavedRegs; + + assert(Subtarget.isABI_N64()); + return N64CalleeSavedRegs; } BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { + static const unsigned ReservedCPURegs[] = { + Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, + Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0 + }; + + static const unsigned ReservedCPU64Regs[] = { + Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, + Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0 + }; + BitVector Reserved(getNumRegs()); - Reserved.set(Mips::ZERO); - Reserved.set(Mips::AT); - Reserved.set(Mips::K0); - Reserved.set(Mips::K1); - Reserved.set(Mips::GP); - Reserved.set(Mips::SP); - Reserved.set(Mips::FP); - Reserved.set(Mips::RA); - Reserved.set(Mips::F31); - Reserved.set(Mips::D15); - - // SRV4 requires that odd register can't be used. - if (!Subtarget.isSingleFloat() && !Subtarget.isMips32()) - for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2) - Reserved.set(FReg); + typedef TargetRegisterClass::iterator RegIter; + + for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg) + Reserved.set(*Reg); + if (Subtarget.hasMips64()) { + for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg) + Reserved.set(*Reg); + + // Reserve all registers in AFGR64. + for (RegIter Reg = Mips::AFGR64RegisterClass->begin(); + Reg != Mips::AFGR64RegisterClass->end(); ++Reg) + Reserved.set(*Reg); + } + else { + // Reserve all registers in CPU64Regs & FGR64. + for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin(); + Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg) + Reserved.set(*Reg); + + for (RegIter Reg = Mips::FGR64RegisterClass->begin(); + Reg != Mips::FGR64RegisterClass->end(); ++Reg) + Reserved.set(*Reg); + } + return Reserved; } @@ -245,11 +332,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } unsigned MipsRegisterInfo:: -getRARegister() const { - return Mips::RA; -} - -unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -267,12 +349,3 @@ getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int MipsRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { - return MipsGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h index 646369b..67e57dd 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h @@ -57,15 +57,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; /// Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td index f0db518..925ad9e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td @@ -10,6 +10,11 @@ //===----------------------------------------------------------------------===// // Declarations that describe the MIPS register file //===----------------------------------------------------------------------===// +let Namespace = "Mips" in { +def sub_fpeven : SubRegIndex; +def sub_fpodd : SubRegIndex; +def sub_32 : SubRegIndex; +} // We have banks of 32 registers each. class MipsReg<string n> : Register<n> { @@ -28,22 +33,31 @@ class MipsGPRReg<bits<5> num, string n> : MipsReg<n> { let Num = num; } +// Mips 64-bit CPU Registers +class Mips64GPRReg<bits<5> num, string n, list<Register> subregs> + : MipsRegWithSubRegs<n, subregs> { + let Num = num; + let SubRegIndices = [sub_32]; +} + // Mips 32-bit FPU Registers class FPR<bits<5> num, string n> : MipsReg<n> { let Num = num; } // Mips 64-bit (aliased) FPU Registers -let Namespace = "Mips" in { -def sub_fpeven : SubRegIndex; -def sub_fpodd : SubRegIndex; -} class AFPR<bits<5> num, string n, list<Register> subregs> : MipsRegWithSubRegs<n, subregs> { let Num = num; let SubRegIndices = [sub_fpeven, sub_fpodd]; } +class AFPR64<bits<5> num, string n, list<Register> subregs> + : MipsRegWithSubRegs<n, subregs> { + let Num = num; + let SubRegIndices = [sub_32]; +} + // Mips Hardware Registers class HWR<bits<5> num, string n> : MipsReg<n> { let Num = num; @@ -54,6 +68,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> { //===----------------------------------------------------------------------===// let Namespace = "Mips" in { + // FIXME: Fix DwarfRegNum. // General Purpose Registers def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>; @@ -89,6 +104,40 @@ let Namespace = "Mips" in { def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; + // General Purpose 64-bit Registers + def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>; + def AT_64 : Mips64GPRReg< 1, "AT", [AT]>; + def V0_64 : Mips64GPRReg< 2, "2", [V0]>; + def V1_64 : Mips64GPRReg< 3, "3", [V1]>; + def A0_64 : Mips64GPRReg< 4, "4", [A0]>; + def A1_64 : Mips64GPRReg< 5, "5", [A1]>; + def A2_64 : Mips64GPRReg< 6, "6", [A2]>; + def A3_64 : Mips64GPRReg< 7, "7", [A3]>; + def T0_64 : Mips64GPRReg< 8, "8", [T0]>; + def T1_64 : Mips64GPRReg< 9, "9", [T1]>; + def T2_64 : Mips64GPRReg< 10, "10", [T2]>; + def T3_64 : Mips64GPRReg< 11, "11", [T3]>; + def T4_64 : Mips64GPRReg< 12, "12", [T4]>; + def T5_64 : Mips64GPRReg< 13, "13", [T5]>; + def T6_64 : Mips64GPRReg< 14, "14", [T6]>; + def T7_64 : Mips64GPRReg< 15, "15", [T7]>; + def S0_64 : Mips64GPRReg< 16, "16", [S0]>; + def S1_64 : Mips64GPRReg< 17, "17", [S1]>; + def S2_64 : Mips64GPRReg< 18, "18", [S2]>; + def S3_64 : Mips64GPRReg< 19, "19", [S3]>; + def S4_64 : Mips64GPRReg< 20, "20", [S4]>; + def S5_64 : Mips64GPRReg< 21, "21", [S5]>; + def S6_64 : Mips64GPRReg< 22, "22", [S6]>; + def S7_64 : Mips64GPRReg< 23, "23", [S7]>; + def T8_64 : Mips64GPRReg< 24, "24", [T8]>; + def T9_64 : Mips64GPRReg< 25, "25", [T9]>; + def K0_64 : Mips64GPRReg< 26, "26", [K0]>; + def K1_64 : Mips64GPRReg< 27, "27", [K1]>; + def GP_64 : Mips64GPRReg< 28, "GP", [GP]>; + def SP_64 : Mips64GPRReg< 29, "SP", [SP]>; + def FP_64 : Mips64GPRReg< 30, "FP", [FP]>; + def RA_64 : Mips64GPRReg< 31, "RA", [RA]>; + /// Mips Single point precision FPU Registers def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; @@ -142,10 +191,49 @@ let Namespace = "Mips" in { def D14 : AFPR<28, "F28", [F28, F29]>; def D15 : AFPR<30, "F30", [F30, F31]>; + /// Mips Double point precision FPU Registers in MFP64 mode. + def D0_64 : AFPR64<0, "F0", [F0]>; + def D1_64 : AFPR64<1, "F1", [F1]>; + def D2_64 : AFPR64<2, "F2", [F2]>; + def D3_64 : AFPR64<3, "F3", [F3]>; + def D4_64 : AFPR64<4, "F4", [F4]>; + def D5_64 : AFPR64<5, "F5", [F5]>; + def D6_64 : AFPR64<6, "F6", [F6]>; + def D7_64 : AFPR64<7, "F7", [F7]>; + def D8_64 : AFPR64<8, "F8", [F8]>; + def D9_64 : AFPR64<9, "F9", [F9]>; + def D10_64 : AFPR64<10, "F10", [F10]>; + def D11_64 : AFPR64<11, "F11", [F11]>; + def D12_64 : AFPR64<12, "F12", [F12]>; + def D13_64 : AFPR64<13, "F13", [F13]>; + def D14_64 : AFPR64<14, "F14", [F14]>; + def D15_64 : AFPR64<15, "F15", [F15]>; + def D16_64 : AFPR64<16, "F16", [F16]>; + def D17_64 : AFPR64<17, "F17", [F17]>; + def D18_64 : AFPR64<18, "F18", [F18]>; + def D19_64 : AFPR64<19, "F19", [F19]>; + def D20_64 : AFPR64<20, "F20", [F20]>; + def D21_64 : AFPR64<21, "F21", [F21]>; + def D22_64 : AFPR64<22, "F22", [F22]>; + def D23_64 : AFPR64<23, "F23", [F23]>; + def D24_64 : AFPR64<24, "F24", [F24]>; + def D25_64 : AFPR64<25, "F25", [F25]>; + def D26_64 : AFPR64<26, "F26", [F26]>; + def D27_64 : AFPR64<27, "F27", [F27]>; + def D28_64 : AFPR64<28, "F28", [F28]>; + def D29_64 : AFPR64<29, "F29", [F29]>; + def D30_64 : AFPR64<30, "F30", [F30]>; + def D31_64 : AFPR64<31, "F31", [F31]>; + // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; def LO : Register<"lo">, DwarfRegNum<[65]>; + let SubRegIndices = [sub_32] in { + def HI64 : RegisterWithSubRegs<"hi", [HI]>; + def LO64 : RegisterWithSubRegs<"lo", [LO]>; + } + // Status flags register def FCR31 : Register<"31">; @@ -167,6 +255,18 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, (add // Reserved ZERO, AT, K0, K1, GP, SP, FP, RA)>; +def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add + // Return Values and Arguments + V0_64, V1_64, A0_64, A1_64, A2_64, A3_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64, + // Callee save + S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, + // Reserved + ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> { + let SubRegClasses = [(CPURegs sub_32)]; +} + // 64bit fp: // * FGR64 - 32 64-bit registers // * AFGR64 - 16 32-bit even registers (32-bit FP Mode) @@ -182,17 +282,22 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Not preserved across procedure calls D2, D3, D4, D5, D8, D9, // Callee save - D10, D11, D12, D13, D14, - // Reserved - D15)> { + D10, D11, D12, D13, D14, D15)> { let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; } +def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> { + let SubRegClasses = [(FGR32 sub_32)]; +} + // Condition Register for floating point operations def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>; // Hi/Lo Registers def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> { + let SubRegClasses = [(HILO sub_32)]; +} // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; diff --git a/contrib/llvm/lib/Target/Mips/MipsRelocations.h b/contrib/llvm/lib/Target/Mips/MipsRelocations.h new file mode 100644 index 0000000..66d1bfd --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsRelocations.h @@ -0,0 +1,41 @@ +//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file defines the Mips target-specific relocation types +// (for relocation-model=static). +// +//===---------------------------------------------------------------------===// + +#ifndef MIPSRELOCATIONS_H_ +#define MIPSRELOCATIONS_H_ + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace Mips{ + enum RelocationType { + // reloc_mips_branch - pc relative relocation for branches. The lower 18 + // bits of the difference between the branch target and the branch + // instruction, shifted right by 2. + reloc_mips_branch = 1, + + // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the + // lower 16 bits of the address is negative). + reloc_mips_hi = 2, + + // reloc_mips_lo - lower 16 bits of the address. + reloc_mips_lo = 3, + + // reloc_mips_26 - lower 28 bits of the address, shifted right by 2. + reloc_mips_26 = 4 + }; + } +} + +#endif /* MIPSRELOCATIONS_H_ */ diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index 6eee333..016d449 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -13,7 +13,7 @@ #include "MipsSubtarget.h" #include "Mips.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -24,15 +24,14 @@ using namespace llvm; MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little) : MipsGenSubtargetInfo(TT, CPU, FS), - MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), - HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), - HasSwap(false), HasBitCount(false) + MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), + IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), + IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), + HasMinMax(false), HasSwap(false), HasBitCount(false) { std::string CPUName = CPU; if (CPUName.empty()) - CPUName = "mips1"; - MipsArchVersion = Mips1; + CPUName = "mips32r1"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); @@ -40,23 +39,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Set MipsABI if it hasn't been set yet. + if (MipsABI == UnknownABI) + MipsABI = hasMips64() ? N64 : O32; + + // Check if Architecture and ABI are compatible. + assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) || + (hasMips64() && (isABI_N32() || isABI_N64()))) && + "Invalid Arch & ABI pair."); + // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; - - // When only the target triple is specified and is - // a allegrex target, set the features. We also match - // big and little endian allegrex cores (dont really - // know if a big one exists) - if (TT.find("mipsallegrex") != std::string::npos || - TT.find("psp") != std::string::npos) { - MipsABI = EABI; - IsSingleFloat = true; - MipsArchVersion = Mips2; - HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet) - HasSEInReg = true; - HasBitCount = true; - HasSwap = true; - HasCondMov = true; - } } diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 533d4af..d9dddad 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -27,14 +27,15 @@ class StringRef; class MipsSubtarget : public MipsGenSubtargetInfo { public: + // NOTE: O64 will not be supported. enum MipsABIEnum { - O32, O64, N32, N64, EABI + UnknownABI, O32, N32, N64, EABI }; protected: enum MipsArchEnum { - Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2 + Mips32, Mips32r2, Mips64, Mips64r2 }; // Mips architecture version @@ -90,6 +91,8 @@ public: /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } + bool isABI_N64() const { return MipsABI == N64; } + bool isABI_N32() const { return MipsABI == N32; } bool isABI_O32() const { return MipsABI == O32; } unsigned getTargetABI() const { return MipsABI; } @@ -102,9 +105,11 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool isMips1() const { return MipsArchVersion == Mips1; } - bool isMips32() const { return MipsArchVersion >= Mips32; } - bool isMips32r2() const { return MipsArchVersion == Mips32r2; } + bool hasMips32() const { return MipsArchVersion >= Mips32; } + bool hasMips32r2() const { return MipsArchVersion == Mips32r2 || + MipsArchVersion == Mips64r2; } + bool hasMips64() const { return MipsArchVersion >= Mips64; } + bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 20b9f4e..6480da3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -14,13 +14,15 @@ #include "Mips.h" #include "MipsTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeMipsTarget() { // Register the target. - RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget); + RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget); RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget); + RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target); + RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -31,30 +33,47 @@ extern "C" void LLVMInitializeMipsTarget() { // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: -MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool isLittle=false): - LLVMTargetMachine(T, TT, CPU, FS), +MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool isLittle): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, isLittle), - DataLayout(isLittle ? - std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : - std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), + DataLayout(isLittle ? + (Subtarget.isABI_N64() ? + "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : + (Subtarget.isABI_N64() ? + "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this) { - // Abicall enables PIC by default - if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isABI_O32()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::Static); - } + TLInfo(*this), TSInfo(*this), JITInfo() { } +MipsebTargetMachine:: +MipsebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + MipselTargetMachine:: -MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS) : - MipsTargetMachine(T, TT, CPU, FS, true) {} +MipselTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + +Mips64ebTargetMachine:: +Mips64ebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + +Mips64elTargetMachine:: +Mips64elTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. @@ -77,7 +96,10 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) bool MipsTargetMachine:: addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - PM.add(createMipsEmitGPRestorePass(*this)); + // Do not restore $gp if target is Mips64. + // In N32/64, $gp is a callee-saved register. + if (!Subtarget.hasMips64()) + PM.add(createMipsEmitGPRestorePass(*this)); return true; } @@ -86,3 +108,12 @@ addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createMipsExpandPseudoPass(*this)); return true; } + +bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Mips. + PM.add(createMipsJITCodeEmitterPass(*this, JCE)); + return false; +} + diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h index a021af2..118ed10 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -22,6 +22,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" +#include "MipsJITInfo.h" namespace llvm { class formatted_raw_ostream; @@ -33,9 +34,12 @@ namespace llvm { MipsFrameLowering FrameLowering; MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; + public: - MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const @@ -46,6 +50,9 @@ namespace llvm { { return &Subtarget; } virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual MipsJITInfo *getJITInfo() + { return &JITInfo; } + virtual const MipsRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -67,16 +74,47 @@ namespace llvm { virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); + virtual bool addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE); + }; -/// MipselTargetMachine - Mipsel target machine. +/// MipsebTargetMachine - Mips32 big endian target machine. +/// +class MipsebTargetMachine : public MipsTargetMachine { +public: + MipsebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +/// MipselTargetMachine - Mips32 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { public: - MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + MipselTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; +/// Mips64ebTargetMachine - Mips64 big endian target machine. +/// +class Mips64ebTargetMachine : public MipsTargetMachine { +public: + Mips64ebTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; + +/// Mips64elTargetMachine - Mips64 little endian target machine. +/// +class Mips64elTargetMachine : public MipsTargetMachine { +public: + Mips64elTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); +}; } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp index cf5d1b5..05c46f5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -79,7 +79,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isMergeable1ByteCString()) return false; - const Type *Ty = GV->getType()->getElementType(); + Type *Ty = GV->getType()->getElementType(); return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); } diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index a8d6fe9..243632b 100644 --- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -9,13 +9,23 @@ #include "Mips.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheMipsTarget, llvm::TheMipselTarget; +Target llvm::TheMips64Target, llvm::TheMips64elTarget; extern "C" void LLVMInitializeMipsTargetInfo() { - RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips"); + RegisterTarget<Triple::mips, + /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips"); - RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel"); + RegisterTarget<Triple::mipsel, + /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel"); + + RegisterTarget<Triple::mips64, + /*HasJIT=*/false> A(TheMips64Target, "mips64", "Mips64 [experimental]"); + + RegisterTarget<Triple::mips64el, + /*HasJIT=*/false> B(TheMips64elTarget, + "mips64el", "Mips64el [experimental]"); } diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt deleted file mode 100644 index 331266d..0000000 --- a/contrib/llvm/lib/Target/PTX/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PTX.td) - -tablegen(PTXGenAsmWriter.inc -gen-asm-writer) -tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) -tablegen(PTXGenSubtarget.inc -gen-subtarget) - -add_llvm_target(PTXCodeGen - PTXAsmPrinter.cpp - PTXISelDAGToDAG.cpp - PTXISelLowering.cpp - PTXInstrInfo.cpp - PTXFrameLowering.cpp - PTXMCAsmInfo.cpp - PTXMCAsmStreamer.cpp - PTXMFInfoExtract.cpp - PTXRegisterInfo.cpp - PTXSubtarget.cpp - PTXTargetMachine.cpp - ) - -add_subdirectory(TargetInfo) diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp new file mode 100644 index 0000000..aabb404 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -0,0 +1,192 @@ +//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PTXInstPrinter.h" +#include "MCTargetDesc/PTXBaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "PTXGenAsmWriter.inc" + +PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printPredicate(MI, O); + switch (MI->getOpcode()) { + default: + printInstruction(MI, O); + break; + case PTX::CALL: + printCall(MI, O); + } + O << ";"; + printAnnotation(O, Annot); +} + +void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { + // The last two operands are the predicate operands + int RegIndex; + int OpIndex; + + if (MI->getOpcode() == PTX::CALL) { + RegIndex = 0; + OpIndex = 1; + } else { + RegIndex = MI->getNumOperands()-2; + OpIndex = MI->getNumOperands()-1; + } + + int PredOp = MI->getOperand(OpIndex).getImm(); + if (PredOp == PTXPredicate::None) + return; + + if (PredOp == PTXPredicate::Negate) + O << '!'; + else + O << '@'; + + printOperand(MI, RegIndex, O); +} + +void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { + O << "\tcall.uni\t"; + // The first two operands are the predicate slot + unsigned Index = 2; + unsigned NumRets = MI->getOperand(Index++).getImm(); + + if (NumRets > 0) { + O << "("; + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumRets; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + O << "), "; + } + + O << *(MI->getOperand(Index++).getExpr()) << ", ("; + + unsigned NumArgs = MI->getOperand(Index++).getImm(); + if (NumArgs > 0) { + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumArgs; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + } + O << ")"; +} + +void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + double Imm = Op.getFPImm(); + APFloat FPImm(Imm); + APInt FPIntImm = FPImm.bitcastToAPInt(); + O << "0D"; + // PTX requires us to output the full 64 bits, even if the number is zero + if (FPIntImm.getZExtValue() > 0) { + O << FPIntImm.toString(16, false); + } else { + O << "0000000000000000"; + } + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + const MCExpr *Expr = Op.getExpr(); + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + O << Sym.getName(); + } else { + O << *Op.getExpr(); + } + } +} + +void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // By definition, operand OpNo+1 is an i32imm + const MCOperand &Op2 = MI->getOperand(OpNo+1); + printOperand(MI, OpNo, O); + if (Op2.getImm() == 0) + return; // don't print "+0" + O << "+" << Op2.getImm(); +} + +void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert (Op.isImm() && "Rounding modes must be immediate values"); + switch (Op.getImm()) { + default: + llvm_unreachable("Unknown rounding mode!"); + case PTXRoundingMode::RndDefault: + llvm_unreachable("FP rounding-mode pass did not handle instruction!"); + break; + case PTXRoundingMode::RndNone: + // Do not print anything. + break; + case PTXRoundingMode::RndNearestEven: + O << ".rn"; + break; + case PTXRoundingMode::RndTowardsZero: + O << ".rz"; + break; + case PTXRoundingMode::RndNegInf: + O << ".rm"; + break; + case PTXRoundingMode::RndPosInf: + O << ".rp"; + break; + case PTXRoundingMode::RndApprox: + O << ".approx"; + break; + case PTXRoundingMode::RndNearestEvenInt: + O << ".rni"; + break; + case PTXRoundingMode::RndTowardsZeroInt: + O << ".rzi"; + break; + case PTXRoundingMode::RndNegInfInt: + O << ".rmi"; + break; + case PTXRoundingMode::RndPosInfInt: + O << ".rpi"; + break; + } +} + diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h new file mode 100644 index 0000000..86dfd48 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -0,0 +1,47 @@ +//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints n PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXINSTPRINTER_H +#define PTXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class PTXInstPrinter : public MCInstPrinter { +public: + PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printPredicate(const MCInst *MI, raw_ostream &O); + void printCall(const MCInst *MI, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index df0f63f..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMPTXDesc - PTXMCTargetDesc.cpp - PTXMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile deleted file mode 100644 index 35f5a7b..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h new file mode 100644 index 0000000..c6094be --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -0,0 +1,63 @@ +//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXBASEINFO_H +#define PTXBASEINFO_H + +#include "PTXMCTargetDesc.h" + +namespace llvm { + namespace PTXStateSpace { + enum { + Global = 0, // default to global state space + Constant = 1, + Local = 2, + Parameter = 3, + Shared = 4 + }; + } // namespace PTXStateSpace + + namespace PTXPredicate { + enum { + Normal = 0, + Negate = 1, + None = 2 + }; + } // namespace PTXPredicate + + /// Namespace to hold all target-specific flags. + namespace PTXRoundingMode { + // Instruction Flags + enum { + // Rounding Mode Flags + RndMask = 15, + RndDefault = 0, // --- + RndNone = 1, // <NONE> + RndNearestEven = 2, // .rn + RndTowardsZero = 3, // .rz + RndNegInf = 4, // .rm + RndPosInf = 5, // .rp + RndApprox = 6, // .approx + RndNearestEvenInt = 7, // .rni + RndTowardsZeroInt = 8, // .rzi + RndNegInfInt = 9, // .rmi + RndPosInfInt = 10 // .rpi + }; + } // namespace PTXII +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 23f70bd..a5af3b8 100644 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "PTXMCTargetDesc.h" #include "PTXMCAsmInfo.h" +#include "InstPrinter/PTXInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "PTXGenInstrInfo.inc" @@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() { return X; } -extern "C" void LLVMInitializePTXMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitPTXMCRegisterInfo(X, 0); + return X; } static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializePTXMCSubtargetInfo() { +static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createPTXMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + assert(SyntaxVariant == 0 && "We only have one syntax variant"); + return new PTXInstPrinter(MAI, STI); +} + +extern "C" void LLVMInitializePTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, createPTXMCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, createPTXMCSubtargetInfo); -} -extern "C" void LLVMInitializePTXMCAsmInfo() { - RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); - RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h index 28cab24..7d46cce 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.h +++ b/contrib/llvm/lib/Target/PTX/PTX.h @@ -15,34 +15,30 @@ #ifndef PTX_H #define PTX_H -#include "MCTargetDesc/PTXMCTargetDesc.h" +#include "MCTargetDesc/PTXBaseInfo.h" #include "llvm/Target/TargetMachine.h" namespace llvm { + class MachineInstr; + class MCInst; + class PTXAsmPrinter; class PTXTargetMachine; class FunctionPass; - namespace PTX { - enum StateSpace { - GLOBAL = 0, // default to global state space - CONSTANT = 1, - LOCAL = 2, - PARAMETER = 3, - SHARED = 4 - }; - - enum Predicate { - PRED_NORMAL = 0, - PRED_NEGATE = 1 - }; - } // namespace PTX - FunctionPass *createPTXISelDag(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXRegisterAllocator(); + + void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP); + } // namespace llvm; #endif // PTX_H diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td index f6fbe9f..693bb9c 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.td +++ b/contrib/llvm/lib/Target/PTX/PTX.td @@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", "Use Shader Model 1.3">; def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", - "Use Shader Model 2.0">; + "Use Shader Model 2.0", [FeatureDouble]>; def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", - "Use Shader Model 2.1">; + "Use Shader Model 2.1", [FeatureDouble]>; def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", - "Use Shader Model 2.2">; + "Use Shader Model 2.2", [FeatureDouble]>; def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", - "Use Shader Model 2.3">; + "Use Shader Model 2.3", [FeatureDouble]>; def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", "PTX_COMPUTE_1_0", @@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", "Use Compute Compatibility 1.3">; def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", "PTX_COMPUTE_2_0", - "Use Compute Compatibility 2.0">; + "Use Compute Compatibility 2.0", + [FeatureDouble]>; //===----------------------------------------------------------------------===// // PTX supported processors @@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; include "PTXRegisterInfo.td" //===----------------------------------------------------------------------===// -// Calling Conventions -//===----------------------------------------------------------------------===// - -include "PTXCallingConv.td" - -//===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// @@ -127,9 +122,20 @@ include "PTXInstrInfo.td" def PTXInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// PTX uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def PTXAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def PTX : Target { let InstructionSet = PTXInstrInfo; + let AssemblyWriters = [PTXAsmWriter]; } diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp index 2848d54..733744b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -15,9 +15,14 @@ #define DEBUG_TYPE "ptx-asm-printer" #include "PTX.h" +#include "PTXAsmPrinter.h" #include "PTXMachineFunctionInfo.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" #include "PTXTargetMachine.h" +#include "llvm/Argument.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -28,69 +33,32 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace { -class PTXAsmPrinter : public AsmPrinter { -public: - explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - const char *getPassName() const { return "PTX Assembly Printer"; } - - bool doFinalization(Module &M); - - virtual void EmitStartOfAsmFile(Module &M); - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); } - - virtual void EmitInstruction(const MachineInstr *MI); - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); - - unsigned GetOrCreateSourceID(StringRef FileName, - StringRef DirName); - - // autogen'd. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - -private: - void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(); - - StringMap<unsigned> SourceIdMap; -}; // class PTXAsmPrinter -} // namespace - static const char PARAM_PREFIX[] = "__param_"; static const char RETURN_PREFIX[] = "__ret_"; -static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ - if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; +static const char *getRegisterTypeName(unsigned RegNo, + const MachineRegisterInfo& MRI) { + const TargetRegisterClass *TRC = MRI.getRegClass(RegNo); + +#define TEST_REGCLS(cls, clsstr) \ + if (PTX::cls ## RegisterClass == TRC) return # clsstr; + TEST_REGCLS(RegPred, pred); TEST_REGCLS(RegI16, b16); TEST_REGCLS(RegI32, b32); @@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) { static const char *getStateSpaceName(unsigned addressSpace) { switch (addressSpace) { default: llvm_unreachable("Unknown state space"); - case PTX::GLOBAL: return "global"; - case PTX::CONSTANT: return "const"; - case PTX::LOCAL: return "local"; - case PTX::PARAMETER: return "param"; - case PTX::SHARED: return "shared"; + case PTXStateSpace::Global: return "global"; + case PTXStateSpace::Constant: return "const"; + case PTXStateSpace::Local: return "local"; + case PTXStateSpace::Parameter: return "param"; + case PTXStateSpace::Shared: return "shared"; } return NULL; } -static const char *getTypeName(const Type* type) { +static const char *getTypeName(Type* type) { while (true) { switch (type->getTypeID()) { default: llvm_unreachable("Unknown type"); @@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) { } case Type::ArrayTyID: case Type::PointerTyID: - type = dyn_cast<const SequentialType>(type)->getElementType(); + type = dyn_cast<SequentialType>(type)->getElementType(); break; } } @@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + // Emit the PTX .version and .target attributes OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + (ST.supportsDouble() ? "" @@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) EmitVariableDeclaration(i); } -bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - EmitFunctionDeclaration(); - EmitFunctionBody(); - return false; -} - void PTXAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(Twine("{")); const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); + + // Print register definitions + std::string regDefs; + unsigned numRegs; + + // pred + numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .pred %p<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i16 + numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b16 %rh<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i32 + numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b32 %r<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i64 + numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b64 %rd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f32 + numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f32 %f<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f64 + numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f64 %fd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } - // Print local variable definition - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) { - unsigned reg = *i; - - std::string def = "\t.reg ."; - def += getRegisterTypeName(reg); - def += ' '; - def += getRegisterName(reg); - def += ';'; - OutStreamer.EmitRawText(Twine(def)); + // Local params + for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); + i != e; ++i) { + regDefs += "\t.param .b"; + regDefs += utostr(PM.getParamSize(*i)); + regDefs += " "; + regDefs += PM.getParamName(*i); + regDefs += ";\n"; } + OutStreamer.EmitRawText(Twine(regDefs)); + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n"); for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); if (FrameInfo->getObjectSize(i) > 0) { - std::string def = "\t.reg .b"; - def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits - def += " s"; + std::string def = "\t.local .align "; + def += utostr(FrameInfo->getObjectAlignment(i)); + def += " .b8"; + def += " __local"; def += utostr(i); + def += "["; + def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits + def += "]"; def += ";"; OutStreamer.EmitRawText(Twine(def)); } } -} - -void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { - std::string str; - str.reserve(64); - - raw_string_ostream OS(str); - - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - - const MDNode *S = DL.getScope(MF->getFunction()->getContext()); - - // This is taken from DwarfDebug.cpp, which is conveniently not a public - // LLVM class. - StringRef Fn; - StringRef Dir; - unsigned Src = 1; - if (S) { - DIDescriptor Scope(S); - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Fn = CU.getFilename(); - Dir = CU.getDirectory(); - } else if (Scope.isFile()) { - DIFile F(S); - Fn = F.getFilename(); - Dir = F.getDirectory(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Fn = SP.getFilename(); - Dir = SP.getDirectory(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Fn = DB.getFilename(); - Dir = DB.getDirectory(); - } else - assert(0 && "Unexpected scope info"); - - Src = GetOrCreateSourceID(Fn, Dir); - } - OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), - 0, 0, 0, Fn); - - const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); - - OS << "\t.loc "; - OS << utostr(MDL.getFileNum()); - OS << " "; - OS << utostr(MDL.getLine()); - OS << " "; - OS << utostr(MDL.getColumn()); - OS << "\n"; - } - - - // Emit predicate - printPredicateOperand(MI, OS); - - // Write instruction to str - printInstruction(MI, OS); - OS << ';'; - OS.flush(); - - StringRef strref = StringRef(str); - OutStreamer.EmitRawText(strref); -} - -void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS) { - const MachineOperand &MO = MI->getOperand(opNum); - - switch (MO.getType()) { - default: - llvm_unreachable("<unknown operand type>"); - break; - case MachineOperand::MO_GlobalAddress: - OS << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_Immediate: - OS << (long) MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - OS << *MO.getMBB()->getSymbol(); - break; - case MachineOperand::MO_Register: - OS << getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_FPImmediate: - APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); - bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; - // Emit 0F for 32-bit floats and 0D for 64-bit doubles. - if (isFloat) { - OS << "0F"; - } - else { - OS << "0D"; - } - // Emit the encoded floating-point value. - if (constFP.getZExtValue() > 0) { - OS << constFP.toString(16, false); - } - else { - OS << "00000000"; - // If We have a double-precision zero, pad to 8-bytes. - if (!isFloat) { - OS << "00000000"; - } - } - break; - } -} - -void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - printOperand(MI, opNum, OS); - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; // don't print "+0" - - OS << "+"; - printOperand(MI, opNum+1, OS); + //unsigned Index = 1; + // Print parameter passing params + //for (PTXMachineFunctionInfo::param_iterator + // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { + // std::string def = "\t.param .b"; + // def += utostr(*i); + // def += " __ret_"; + // def += utostr(Index); + // Index++; + // def += ";"; + // OutStreamer.EmitRawText(Twine(def)); + //} } -void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(Twine("}")); } -void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); } void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { @@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { unsigned alignment = gv->getAlignment(); if (alignment != 0) { decl += ".align "; - decl += utostr(Log2_32(gv->getAlignment())); + decl += utostr(gv->getAlignment()); decl += " "; } if (PointerType::classof(gv->getType())) { - const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType()); - const Type* elementTy = pointerTy->getElementType(); + PointerType* pointerTy = dyn_cast<PointerType>(gv->getType()); + Type* elementTy = pointerTy->getElementType(); decl += ".b8 "; decl += gvsym->getName(); @@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy); + ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); unsigned numElements = arrayTy->getNumElements(); while (elementTy->isArrayTy()) { - arrayTy = dyn_cast<const ArrayType>(elementTy); + arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); numElements *= arrayTy->getNumElements(); @@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { if (gv->hasInitializer()) { - const Constant *C = gv->getInitializer(); + const Constant *C = gv->getInitializer(); if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { decl += " = {"; @@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { OutStreamer.AddBlankLine(); } -void PTXAsmPrinter::EmitFunctionDeclaration() { +void PTXAsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. if (!CurrentFnSym->isUndefined()) { @@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { } const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); const bool isKernel = MFI->isKernel(); const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + const MachineRegisterInfo& MRI = MF->getRegInfo(); std::string decl = isKernel ? ".entry" : ".func"; - unsigned cnt = 0; - if (!isKernel) { decl += " ("; - for (PTXMachineFunctionInfo::ret_iterator - i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; + if (ST.useParamSpaceForDeviceArgs()) { + for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + + decl += ".param .b"; + decl += utostr(PM.getParamSize(*i)); + decl += " "; + decl += PM.getParamName(*i); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i, MRI); + decl += " "; + decl += MFI->getRegisterName(*i); } - decl += ".reg ."; - decl += getRegisterTypeName(*i); - decl += " "; - decl += getRegisterName(*i); } decl += ")"; } @@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " ("; - cnt = 0; + const Function *F = MF->getFunction(); // Print parameters - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; - } - if (isKernel || ST.useParamSpaceForDeviceArgs()) { + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".param .b"; - decl += utostr(*i); + decl += utostr(PM.getParamSize(*i)); decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } else { + decl += PM.getParamName(*i); + }*/ + int Counter = 1; + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), + b = i; i != e; ++i) { + if (i != b) + decl += ", "; + const Type *ArgType = (*i).getType(); + decl += ".param .b"; + if (ArgType->isPointerTy()) { + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + } else { + decl += utostr(ArgType->getPrimitiveSizeInBits()); + } + if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { + const PointerType *PtrType = dyn_cast<const PointerType>(ArgType); + decl += " .ptr"; + switch (PtrType->getAddressSpace()) { + default: + llvm_unreachable("Unknown address space in argument"); + case PTXStateSpace::Global: + decl += " .global"; + break; + case PTXStateSpace::Shared: + decl += " .shared"; + break; + } + } + decl += " __param_"; + decl += utostr(Counter++); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; - decl += getRegisterTypeName(*i); + decl += getRegisterTypeName(*i, MRI); decl += " "; - decl += getRegisterName(*i); + decl += MFI->getRegisterName(*i); } } decl += ")"; @@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { OutStreamer.EmitRawText(Twine(decl)); } -void PTXAsmPrinter:: -printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { - int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - - unsigned reg = MI->getOperand(i).getReg(); - int predOp = MI->getOperand(i+1).getImm(); - - DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); - - if (reg != PTX::NoRegister) { - O << '@'; - if (predOp == PTX::PRED_NEGATE) - O << '!'; - O << getRegisterName(reg); - } -} - unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. @@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, return SrcId; } -#include "PTXGenAsmWriter.inc" +MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, + const MCSymbol *Symbol) { + const MCExpr *Expr; + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + return MCOperand::CreateExpr(Expr); +} + +MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { + MCOperand MCOp; + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const MCExpr *Expr; + const char *RegSymbolName; + switch (MO.getType()) { + default: + llvm_unreachable("Unknown operand type"); + case MachineOperand::MO_Register: + // We create register operands as symbols, since the PTXInstPrinter class + // has no way to map virtual registers back to a name without some ugly + // hacks. + // FIXME: Figure out a better way to handle virtual register naming. + RegSymbolName = MFI->getRegisterName(MO.getReg()); + Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None, + OutContext); + MCOp = MCOperand::CreateExpr(Expr); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_FPImmediate: + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } + + return MCOp; +} // Force static initialization. extern "C" void LLVMInitializePTXAsmPrinter() { RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); } + diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h new file mode 100644 index 0000000..538c080 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h @@ -0,0 +1,57 @@ +//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PTX Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXASMPRINTER_H +#define PTXASMPRINTER_H + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual void EmitFunctionEntryLabel(); + virtual void EmitInstruction(const MachineInstr *MI); + + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + + MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand lowerOperand(const MachineOperand &MO); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; +}; // class PTXAsmPrinter +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td deleted file mode 100644 index 3e3ff48..0000000 --- a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td +++ /dev/null @@ -1,29 +0,0 @@ - -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, - CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, - CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, - CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, - CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> -]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp new file mode 100644 index 0000000..0b653e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -0,0 +1,179 @@ +//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a machine function pass that sets appropriate FP rounding +// modes for all relevant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-fp-rounding-mode" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXFPRoundingModePass should be executed just before emission. + +namespace llvm { + /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to + /// all FP instructions. Essentially, this pass just looks for all FP + /// instructions that have a rounding mode set to RndDefault, and sets an + /// appropriate rounding mode based on the target device. + /// + class PTXFPRoundingModePass : public MachineFunctionPass { + private: + static char ID; + + typedef std::pair<unsigned, unsigned> RndModeDesc; + + PTXTargetMachine& TargetMachine; + DenseMap<unsigned, RndModeDesc> Instrs; + + public: + PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID), + TargetMachine(TM) { + initializeMap(); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX FP Rounding Mode Pass"; + } + + private: + + void initializeMap(); + void processInstruction(MachineInstr &MI); + }; // class PTXFPRoundingModePass +} // namespace llvm + +using namespace llvm; + +char PTXFPRoundingModePass::ID = 0; + +bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { + // Look at each basic block + for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; + ++bbi) { + MachineBasicBlock &MBB = *bbi; + // Look at each instruction + for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); + ii != ie; ++ii) { + MachineInstr &MI = *ii; + processInstruction(MI); + } + } + return false; +} + +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; + const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); + } + } +} + +FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXFPRoundingModePass(TM, OptLevel); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp index 9adfa62..5c7ee29 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "PTX.h" +#include "PTXMachineFunctionInfo.h" #include "PTXTargetMachine.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" #include "llvm/Support/Debug.h" @@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel { bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); // Include the pieces auto'gened from the target description #include "PTXGenDAGISel.inc" @@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel { // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); + SDNode *SelectREADPARAM(SDNode *Node); + SDNode *SelectWRITEPARAM(SDNode *Node); + SDNode *SelectFrameIndex(SDNode *Node); + bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); @@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { case ISD::BRCOND: return SelectBRCOND(Node); + case PTXISD::READ_PARAM: + return SelectREADPARAM(Node); + case PTXISD::WRITE_PARAM: + return SelectWRITEPARAM(Node); + case ISD::FrameIndex: + return SelectFrameIndex(Node); default: return SelectCode(Node); } @@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Pred = Node->getOperand(1); SDValue Target = Node->getOperand(2); // branch target - SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); DebugLoc dl = Node->getDebugLoc(); assert(Target.getOpcode() == ISD::BasicBlock); @@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); } +SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDValue Index = Node->getOperand(1); + + int OpCode; + + // Get the type of parameter we are reading + EVT VT = Node->getValueType(0); + assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::READPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::READPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::READPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::READPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::READPARAMF32; + else { + assert(Type == MVT::f64 && "Unexpected type!"); + OpCode = PTX::READPARAMF64; + } + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Index, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); +} + +SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { + + SDValue Chain = Node->getOperand(0); + SDValue Value = Node->getOperand(1); + + int OpCode; + + //Node->dumpr(CurDAG); + + // Get the type of parameter we are writing + EVT VT = Value->getValueType(0); + assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::WRITEPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::WRITEPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::WRITEPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::WRITEPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::WRITEPARAMF32; + else if (Type == MVT::f64) + OpCode = PTX::WRITEPARAMF64; + else + llvm_unreachable("Invalid type in SelectWRITEPARAM"); + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Value, Pred, PredOp, Chain }; + SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); + + //dbgs() << "SelectWRITEPARAM produced:\n\t"; + //Ret->dumpr(CurDAG); + + return Ret; +} + +SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; + //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); + + PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), + Node->getValueType(0)); + + return FrameSymbol.getNode(); +} + // Match memory operand of the form [reg+reg] bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || @@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { // Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { + // FrameIndex addresses are handled separately + //errs() << "SelectADDRri: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + //errs() << "Failure\n"; + return false; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Success\n"; + return true; + } + + /*if (Addr.getNumOperands() == 1) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + errs() << "Success\n"; + return true; + }*/ + + //errs() << "SelectADDRri fails on: "; + //Addr.getNode()->dumpr(); + + if (isImm(Addr)) { + //errs() << "Failure\n"; + return false; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + //errs() << "Success\n"; + return true; + + /*if (Addr.getOpcode() != ISD::ADD) { // let SelectADDRii handle the [imm] case if (isImm(Addr)) return false; // it is [reg] assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); @@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, } // neither [reg+imm] nor [imm+reg] - return false; + return false;*/ } // Match memory operand of the form [imm+imm] and [imm] @@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, return false; } +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + //errs() << "SelectADDRlocal: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + //errs() << "Success\n"; + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (!isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Offset: "; + //Offset.getNode()->dumpr(); + //errs() << "Success\n"; + return true; + } + + //errs() << "Failure\n"; + return false; +} + bool PTXDAGToDAGISel::isImm(const SDValue &operand) { return ConstantSDNode::classof(operand.getNode()); } diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp index 6fcf710..3307d91 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -16,23 +16,19 @@ #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "PTXSubtarget.h" +#include "llvm/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "PTXGenCallingConv.inc" - -//===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setMinFunctionAlignment(2); - + //////////////////////////////////// /////////// Expansion ////////////// //////////////////////////////////// - + // (any/zero/sign) extload => load + (any/zero/sign) extend - + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - + // f32 extload => load + fextend - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // f64 truncstore => trunc + store - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // sign_extend_inreg => sign_extend - + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - + // br_cc => brcond - + setOperationAction(ISD::BR_CC, MVT::Other, Expand); // select_cc => setcc - + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - + //////////////////////////////////// //////////// Legal ///////////////// //////////////////////////////////// - + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - + //////////////////////////////////// //////////// Custom //////////////// //////////////////////////////////// - + // customise setcc to use bitwise logic if possible - + setOperationAction(ISD::SETCC, MVT::i1, Custom); // customize translation of memory addresses - + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); @@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) computeRegisterProperties(); } -MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { +EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } @@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PTXISD::LOAD_PARAM"; case PTXISD::STORE_PARAM: return "PTXISD::STORE_PARAM"; + case PTXISD::READ_PARAM: + return "PTXISD::READ_PARAM"; + case PTXISD::WRITE_PARAM: + return "PTXISD::WRITE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: return "PTXISD::RET"; + case PTXISD::CALL: + return "PTXISD::CALL"; } } @@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - // Look for X == 0, X == 1, X != 0, or X != 1 + // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic if (Op1.getOpcode() == ISD::Constant && @@ -197,6 +200,7 @@ SDValue PTXTargetLowering:: MachineFunction &MF = DAG.getMachineFunction(); const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); switch (CallConv) { default: @@ -216,68 +220,34 @@ SDValue PTXTargetLowering:: if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { // We just need to emit the proper LOAD_PARAM ISDs for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && "Kernels cannot take pred operands"); + unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addArgumentParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); + ParamValue); InVals.push_back(ArgValue); - - // Instead of storing a physical register in our argument list, we just - // store the total size of the parameter, in bits. The ASM printer - // knows how to process this. - MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); } } else { - // For device functions, we use the PTX calling convention to do register - // assignments then create CopyFromReg ISDs for the allocated registers - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, - *DAG.getContext()); - - CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - - CCValAssign& VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - TargetRegisterClass* TRC = 0; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - - // Determine which register class we need - if (RegVT == MVT::i1) { - TRC = PTX::RegPredRegisterClass; - } - else if (RegVT == MVT::i16) { - TRC = PTX::RegI16RegisterClass; - } - else if (RegVT == MVT::i32) { - TRC = PTX::RegI32RegisterClass; - } - else if (RegVT == MVT::i64) { - TRC = PTX::RegI64RegisterClass; - } - else if (RegVT == MVT::f32) { - TRC = PTX::RegF32RegisterClass; - } - else if (RegVT == MVT::f64) { - TRC = PTX::RegF64RegisterClass; - } - else { - llvm_unreachable("Unknown parameter type"); - } + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT RegVT = Ins[i].VT; + TargetRegisterClass* TRC = getRegClassFor(RegVT); + // Use a unique index in the instruction to prevent instruction folding. + // Yes, this is a hack. + SDValue Index = DAG.getTargetConstant(i, MVT::i32); unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, + Index); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); InVals.push_back(ArgValue); - MFI->addArgReg(VA.getLocReg()); + MFI->addArgReg(Reg); } } @@ -301,41 +271,66 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - //assert(Outs.size() <= 1 && "Can at most return one value."); + assert(Outs.size() <= 1 && "Can at most return one value."); break; } MachineFunction& MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); SDValue Flag; + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); - // Even though we could use the .param space for return arguments for - // device functions if SM >= 2.0 and the number of return arguments is - // only 1, we just always use registers since this makes the codegen - // easier. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeReturn(Outs, RetCC_PTX); - - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign& VA = RVLocs[i]; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + if (ST.useParamSpaceForDeviceArgs()) { + assert(Outs.size() < 2 && "Device functions can return at most one value"); + + if (Outs.size() == 1) { + unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); + unsigned Param = PM.addReturnParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[0]); + } + } else { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT RegVT = Outs[i].VT; + TargetRegisterClass* TRC = 0; - unsigned Reg = VA.getLocReg(); + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } - DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); + SDValue OutReg = DAG.getRegister(Reg, RegVT); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - MFI->addRetReg(Reg); + MFI->addRetReg(Reg); + } } if (Flag.getNode() == 0) { @@ -345,3 +340,83 @@ SDValue PTXTargetLowering:: return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } } + +SDValue +PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); + + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && + "Calls are not handled for the target device"); + + std::vector<SDValue> Ops; + // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] + Ops.resize(Outs.size() + Ins.size() + 4); + + Ops[0] = Chain; + + // Identify the callee function + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device && + "PTX function calls must be to PTX device functions"); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + Ops[Ins.size()+2] = Callee; + + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32); + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + + std::vector<SDValue> InParams; + + // Generate list of .param variables to hold the return value(s). + Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); + for (unsigned i = 0; i < Ins.size(); ++i) { + unsigned Size = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Ops[i+2] = ParamValue; + InParams.push_back(ParamValue); + } + + Ops[0] = Chain; + + // Create the CALL node. + Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); + + // Create the LOAD_PARAM nodes that retrieve the function return value(s). + for (unsigned i = 0; i < Ins.size(); ++i) { + SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + InParams[i]); + InVals.push_back(Load); + } + + return Chain; +} + +unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { + // All arguments consist of one "register," regardless of the type. + return 1; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h index 4318541..4d25665 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h @@ -26,9 +26,12 @@ namespace PTXISD { FIRST_NUMBER = ISD::BUILTIN_OP_END, LOAD_PARAM, STORE_PARAM, + READ_PARAM, + WRITE_PARAM, EXIT, RET, - COPY_ADDRESS + COPY_ADDRESS, + CALL }; } // namespace PTXISD @@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering { DebugLoc dl, SelectionDAG &DAG) const; - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual EVT getSetCCResultType(EVT VT) const; + + virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td index 8cee351..397fdc3 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td @@ -7,12 +7,39 @@ // //===----------------------------------------------------------------------===// -// PTX Predicate operand, default to (0, 0) = (zero-reg, always). + +// Rounding Mode Specifier +/*class RoundingMode<bits<3> val> { + bits<3> Value = val; +} + +def RndDefault : RoundingMode<0>; +def RndNearestEven : RoundingMode<1>; +def RndNearestZero : RoundingMode<2>; +def RndNegInf : RoundingMode<3>; +def RndPosInf : RoundingMode<4>; +def RndApprox : RoundingMode<5>;*/ + + +// Rounding Mode Operand +def RndMode : Operand<i32> { + let PrintMethod = "printRoundingMode"; +} + +def RndDefault : PatLeaf<(i32 0)>; + +// PTX Predicate operand, default to (0, 0) = (zero-reg, none). // Leave PrintMethod empty; predicate printing is defined elsewhere. def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), - (ops (i1 zero_reg), (i32 0))>; + (ops (i1 zero_reg), (i32 2))>; +def RndModeOperand : Operand<OtherVT> { + let MIOperandInfo = (ops i32imm); +} + +// Instruction Types let Namespace = "PTX" in { + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> : Instruction { dag OutOperandList = oops; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp index 425265a..1b947a5 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp @@ -16,10 +16,11 @@ #include "PTX.h" #include "PTXInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR @@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { - if (map[i].cls->contains(DstReg, SrcReg)) { + + const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && + // "Invalid register copy between two register classes"); + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { + if (map[i].cls == MRI.getRegClass(DstReg)) { const MCInstrDesc &MCID = get(map[i].opcode); MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); @@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI, return false; Pred.push_back(MO); - Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); return true; } @@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(PTX::BRAdp)) .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 2; } else if (Cond.size()) { BuildMI(&MBB, DL, get(PTX::BRAdp)) @@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB, return 1; } else { BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 1; } } @@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "storeRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKSTOREI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKSTOREF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKSTOREF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the store instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addFrameIndex(FrameIdx); - MIB.addReg(SrcReg); - - AddDefaultPredicate(MIB); + assert(false && "storeRegToStackSlot should not be called for PTX"); } void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "loadRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKLOADI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKLOADF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKLOADF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the load instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addReg(DestReg); - MIB.addFrameIndex(FrameIdx); - - AddDefaultPredicate(MIB); + assert(false && "loadRegFromStackSlot should not be called for PTX"); } // static helper routines @@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, Op2, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { if (MI->findFirstPredOperandIdx() == -1) { MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); } } diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td index 6bfe906..a3fcea9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td @@ -21,10 +21,6 @@ include "PTXInstrFormats.td" // Code Generation Predicates //===----------------------------------------------------------------------===// -// Addressing -def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; -def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; - // Shader Model Support def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; @@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; -//===----------------------------------------------------------------------===// -// Instruction Pattern Stuff -//===----------------------------------------------------------------------===// -def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::CONSTANT; - return false; -}]>; - -def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -def store_global - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def store_local - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def store_parameter - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def store_shared - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -// Addressing modes. -def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; -def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; -def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; -def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; - -// Address operands -def MEMri32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI32, i32imm); -} -def MEMri64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI64, i64imm); -} -def MEMii32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def MEMii64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -// The operand here does not correspond to an actual address, so we -// can use i32 in 64-bit address modes. -def MEMpi : Operand<i32> { - let PrintMethod = "printParamOperand"; - let MIOperandInfo = (ops i32imm); -} -def MEMret : Operand<i32> { - let PrintMethod = "printReturnOperand"; - let MIOperandInfo = (ops i32imm); -} + +// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, +// [SDNPHasChain, SDNPOutGlue]>; +// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, +// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def PTXcall : SDNode<"PTXISD::CALL", SDTNone, + [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; + // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -189,87 +74,73 @@ def PTXret def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; -// Load/store .param space -def PTXloadparam - : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXstoreparam - : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// +// For floating-point instructions, we cannot just embed the pattern into the +// instruction definition since we need to muck around with the rounding mode, +// and I do not know how to insert constants into instructions directly from +// pattern matches. + //===- Floating-Point Instructions - 2 Operand Form -----------------------===// -multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_2OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode RegF32:$a))]>; + (ins RndMode:$r, RegF32:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins f32imm:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f32imm:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode RegF64:$a))]>; + (ins RndMode:$r, RegF64:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins f64imm:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f64imm:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; } //===- Floating-Point Instructions - 3 Operand Form -----------------------===// -multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_3OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF32:$a, f32imm:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF64:$a, f64imm:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; } //===- Floating-Point Instructions - 4 Operand Form -----------------------===// -multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { +multiclass PTX_FLOAT_4OP<string opcstr> { def rrr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, RegF32:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - RegF32:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, f32imm:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rii32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rrr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, RegF64:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - RegF64:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; def rri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, f64imm:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; + def rii64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; } -multiclass INT3<string opcstr, SDNode opnode> { +//===- Integer Instructions - 3 Operand Form ------------------------------===// +multiclass PTX_INT3<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), @@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } +//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// +multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// multiclass PTX_LOGIC<string opcstr, SDNode opnode> { def ripreds : InstPTX<(outs RegPred:$d), (ins RegPred:$a, i1imm:$b), @@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } -multiclass INT3ntnc<string opcstr, SDNode opnode> { +//===- Integer Shift Instructions - 3 Operand Form ------------------------===// +multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), @@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; } +//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, CondCode cmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, def rr_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; } -multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, +//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls, CondCode ucmp, CondCode ocmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; + def ri_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; + def ri_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; + def rr_and_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_and_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_or_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_or_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_xor_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_xor_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_and_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_and_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_or_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_or_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_xor_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_xor_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; } -multiclass PTX_SELP<RegisterClass RC, string regclsname> { +//===- Select Predicate Instructions - 4 Operand Form ---------------------===// +multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls, + SDNode immnode> { def rr : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; + def ri + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; + def ii + : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; } -multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { - def rr32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs RC:$d), - (ins MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs RC:$d), - (ins MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; -} - -multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; -} - -multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { - def rr32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs), - (ins RC:$d, MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs), - (ins RC:$d, MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; -} -multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; -} //===----------------------------------------------------------------------===// // Instructions @@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { ///===- Integer Arithmetic Instructions -----------------------------------===// -defm ADD : INT3<"add", add>; -defm SUB : INT3<"sub", sub>; -defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies -defm DIV : INT3<"div", udiv>; -defm REM : INT3<"rem", urem>; +defm ADD : PTX_INT3<"add", add>; +defm SUB : PTX_INT3<"sub", sub>; +defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : PTX_INT3<"div", udiv>; +defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; +defm REM : PTX_INT3<"rem", urem>; ///===- Floating-Point Arithmetic Instructions ----------------------------===// -// Standard Unary Operations -defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; +// FNEG +defm FNEG : PTX_FLOAT_2OP<"neg">; // Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; -defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; -defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; - -// For floating-point division: -// SM_13+ defaults to .rn for f32 and f64, -// SM10 must *not* provide a rounding - -// TODO: -// - Allow user selection of rounding modes for fdiv -// - Add support for -prec-div=false (.approx) - -def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - -def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - - +defm FADD : PTX_FLOAT_3OP<"add">; +defm FSUB : PTX_FLOAT_3OP<"sub">; +defm FMUL : PTX_FLOAT_3OP<"mul">; +defm FDIV : PTX_FLOAT_3OP<"div">; // Multi-operation hybrid instructions +defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; -// The selection of mad/fma is tricky. In some cases, they are the *same* -// instruction, but in other cases we may prefer one or the other. Also, -// different PTX versions differ on whether rounding mode flags are required. -// In the short term, mad is supported on all PTX versions and we use a -// default rounding mode no matter what shader model or PTX version. -// TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, - Requires<[FMadNeedsRoundingMode, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, - Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// -def FSQRT32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sqrt.rn.f32\t$d, $a", - [(set RegF32:$d, (fsqrt RegF32:$a))]>; - -def FSQRT64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sqrt.rn.f64\t$d, $a", - [(set RegF64:$d, (fsqrt RegF64:$a))]>; - -def FSIN32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sin.approx.f32\t$d, $a", - [(set RegF32:$d, (fsin RegF32:$a))]>; +// SQRT +def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sqrt$r.f64\t$d, $a", []>; +def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sqrt$r.f64\t$d, $a", []>; + +// SIN +def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sin$r.f32\t$d, $a", []>; +def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sin$r.f32\t$d, $a", []>; +def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sin$r.f64\t$d, $a", []>; +def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sin$r.f64\t$d, $a", []>; + +// COS +def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "cos$r.f64\t$d, $a", []>; +def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "cos$r.f64\t$d, $a", []>; -def FSIN64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sin.approx.f64\t$d, $a", - [(set RegF64:$d, (fsin RegF64:$a))]>; -def FCOS32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "cos.approx.f32\t$d, $a", - [(set RegF32:$d, (fcos RegF32:$a))]>; - -def FCOS64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "cos.approx.f64\t$d, $a", - [(set RegF64:$d, (fcos RegF64:$a))]>; ///===- Comparison and Selection Instructions -----------------------------===// @@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; // Compare f32 -defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">; // Compare f64 -defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">; // .selp -defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; -defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; -defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; -defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; -defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; +defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>; +defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>; +defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>; +defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>; +defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>; ///===- Logic and Shift Instructions --------------------------------------===// -defm SHL : INT3ntnc<"shl.b", PTXshl>; -defm SRL : INT3ntnc<"shr.u", PTXsrl>; -defm SRA : INT3ntnc<"shr.s", PTXsra>; +defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; +defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; +defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; defm AND : PTX_LOGIC<"and", and>; defm OR : PTX_LOGIC<"or", or>; @@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>; ///===- Data Movement and Conversion Instructions -------------------------===// +// any_extend +// Implement the anyext instruction in terms of the PTX cvt instructions. +//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; + +// bitconvert +// These instructions implement the bit-wise conversion between integer and +// floating-point types. +def MOVi32f32 + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; +def MOVf32i32 + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; +def MOVi64f64 + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; +def MOVf64i64 + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; + let neverHasSideEffects = 1 in { def MOVPREDrr : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; @@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; } -// Loads -defm LDg : PTX_LD_ALL<"ld.global", load_global>; -defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDl : PTX_LD_ALL<"ld.local", load_local>; -defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; +// PTX cvt instructions +// Note all of these may actually be used, we just define all possible patterns +// here (that make sense). +// FIXME: Can we collapse this somehow into a multiclass def? + +// To i16 +def CVTu16u32 + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; +def CVTu16u64 + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; +def CVTu16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u16.f32\t$d, $a", []>; +def CVTs16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s16.f32\t$d, $a", []>; +def CVTu16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u16.f64\t$d, $a", []>; +def CVTs16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s16.f64\t$d, $a", []>; + +// To i32 +def CVTu32u16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; +def CVTs32s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; +def CVTu32u64 + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; +def CVTu32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u32.f32\t$d, $a", []>; +def CVTs32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s32.f32\t$d, $a", []>; +def CVTu32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u32.f64\t$d, $a", []>; +def CVTs32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s32.f64\t$d, $a", []>; + +// To i64 +def CVTu64u16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; +def CVTs64s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; +def CVTu64u32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; +def CVTs64s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; +def CVTu64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u64.f32\t$d, $a", []>; +def CVTs64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s64.f32\t$d, $a", []>; +def CVTu64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u64.f64\t$d, $a", []>; +def CVTs64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s64.f64\t$d, $a", []>; + +// To f32 +def CVTf32u16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.u16\t$d, $a", []>; +def CVTf32s16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.s16\t$d, $a", []>; +def CVTf32u32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.u32\t$d, $a", []>; +def CVTf32s32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.s32\t$d, $a", []>; +def CVTf32u64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.u64\t$d, $a", []>; +def CVTf32s64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.s64\t$d, $a", []>; +def CVTf32f64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.f32.f64\t$d, $a", []>; + +// To f64 +def CVTf64u16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.u16\t$d, $a", []>; +def CVTf64s16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.s16\t$d, $a", []>; +def CVTf64u32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.u32\t$d, $a", []>; +def CVTf64s32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.s32\t$d, $a", []>; +def CVTf64u64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.u64\t$d, $a", []>; +def CVTf64s64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.s64\t$d, $a", []>; +def CVTf64f32 + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; + + ///===- Control Flow Instructions -----------------------------------------===// -// These instructions are used to load/store from the .param space for -// device and kernel parameters +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} -let hasSideEffects = 1 in { - def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), - "ld.param.pred\t$d, [$a]", - [(set RegPred:$d, (PTXloadparam timm:$a))]>; - def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", - [(set RegI16:$d, (PTXloadparam timm:$a))]>; - def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", - [(set RegI32:$d, (PTXloadparam timm:$a))]>; - def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", - [(set RegI64:$d, (PTXloadparam timm:$a))]>; - def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", - [(set RegF32:$d, (PTXloadparam timm:$a))]>; - def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", - [(set RegF64:$d, (PTXloadparam timm:$a))]>; - - def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), - "st.param.pred\t[$d], $a", - [(PTXstoreparam timm:$d, RegPred:$a)]>; - def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), - "st.param.u16\t[$d], $a", - [(PTXstoreparam timm:$d, RegI16:$a)]>; - def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), - "st.param.u32\t[$d], $a", - [(PTXstoreparam timm:$d, RegI32:$a)]>; - def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), - "st.param.u64\t[$d], $a", - [(PTXstoreparam timm:$d, RegI64:$a)]>; - def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), - "st.param.f32\t[$d], $a", - [(PTXstoreparam timm:$d, RegF32:$a)]>; - def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), - "st.param.f64\t[$d], $a", - [(PTXstoreparam timm:$d, RegF64:$a)]>; +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a RegPred register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; } -// Stores -defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STl : PTX_ST_ALL<"st.local", store_local>; -defm STs : PTX_ST_ALL<"st.shared", store_shared>; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} -// defm STp : PTX_ST_ALL<"st.param", store_parameter>; -// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; -// TODO: Do something with st.param if/when it is needed. +let hasSideEffects = 1 in { + def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; +} -// Conversion to pred -// PTX does not directly support converting to a predicate type, so we fake it -// by performing a greater-than test between the value and zero. This follows -// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_u16 - : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI16:$a))]>; +///===- Parameter Passing Pseudo-Instructions -----------------------------===// + +def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), + "mov.pred\t$a, %param$b", []>; +def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), + "mov.b16\t$a, %param$b", []>; +def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), + "mov.b32\t$a, %param$b", []>; +def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), + "mov.b64\t$a, %param$b", []>; +def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), + "mov.f32\t$a, %param$b", []>; +def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), + "mov.f64\t$a, %param$b", []>; + +def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; +def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; +def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; +def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; +def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; +def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; -def CVT_pred_u32 - : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI32:$a))]>; -def CVT_pred_u64 - : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI64:$a))]>; +//===----------------------------------------------------------------------===// +// Instruction Selection Patterns +//===----------------------------------------------------------------------===// -def CVT_pred_f32 - : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; +// FADD +def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), + (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), + (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), + (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), + (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FSUB +def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), + (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), + (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), + (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), + (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL +def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), + (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), + (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), + (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), + (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FDIV +def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), + (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), + (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), + (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), + (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL+FADD +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), + (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), + (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), + (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>; + +// FNEG +def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; + +// FSQRT +def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; + +// FSIN +def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; + +// FCOS +def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; + +// Type conversion notes: +// - PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +// - PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_f64 - : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; +// Conversion to pred +def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; +def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; +def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; +def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; +def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; // Conversion to u16 -// PTX does not directly support converting a predicate to a value, so we -// use a select instruction to select either 0 or 1 (integer or fp) based -// on the truth value of the predicate. -def CVT_u16_preda - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (anyext RegPred:$a))]>; - -def CVT_u16_pred - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (zext RegPred:$a))]>; - -def CVT_u16_preds - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (sext RegPred:$a))]>; - -def CVT_u16_u32 - : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", - [(set RegI16:$d, (trunc RegI32:$a))]>; - -def CVT_u16_u64 - : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", - [(set RegI16:$d, (trunc RegI64:$a))]>; - -def CVT_u16_f32 - : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u16_f64 - : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; +def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; +def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; +def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; +def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; // Conversion to u32 - -def CVT_u32_pred - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (zext RegPred:$a))]>; - -def CVT_u32_b16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (anyext RegI16:$a))]>; - -def CVT_u32_u16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (zext RegI16:$a))]>; - -def CVT_u32_preds - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (sext RegPred:$a))]>; - -def CVT_u32_s16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", - [(set RegI32:$d, (sext RegI16:$a))]>; - -def CVT_u32_u64 - : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", - [(set RegI32:$d, (trunc RegI64:$a))]>; - -def CVT_u32_f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u32_f64 - : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; +def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; +def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; +def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; // Conversion to u64 - -def CVT_u64_pred - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (zext RegPred:$a))]>; - -def CVT_u64_preds - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (sext RegPred:$a))]>; - -def CVT_u64_u16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", - [(set RegI64:$d, (zext RegI16:$a))]>; - -def CVT_u64_s16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", - [(set RegI64:$d, (sext RegI16:$a))]>; - -def CVT_u64_u32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", - [(set RegI64:$d, (zext RegI32:$a))]>; - -def CVT_u64_s32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", - [(set RegI64:$d, (sext RegI32:$a))]>; - -def CVT_u64_f32 - : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u64_f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, + 0xFFFFFFFFFFFFFFFF, 0)>; +def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; +def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; +def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; // Conversion to f32 - -def CVT_f32_pred - : InstPTX<(outs RegF32:$d), (ins RegPred:$a), - "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 - [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f32_u16 - : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f32_u32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f32_u64 - : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f32_f64 - : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", - [(set RegF32:$d, (fround RegF64:$a))]>; +def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, + (MOVf32i32 0x3F800000), (MOVf32i32 0))>; +def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; +def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; // Conversion to f64 +def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, + (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; +def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; +def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; -def CVT_f64_pred - : InstPTX<(outs RegF64:$d), (ins RegPred:$a), - "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 - [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f64_u16 - : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f64_u32 - : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f64_u64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f64_f32 - : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", - [(set RegF64:$d, (fextend RegF32:$a))]>; - -///===- Control Flow Instructions -----------------------------------------===// - -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { - def BRAd - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a RegPred register) here - def BRAdp - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", - [/*(brcond pred:$_p, bb:$d)*/]>; -} - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; - def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; -} - -///===- Spill Instructions ------------------------------------------------===// -// Special instructions used for stack spilling -def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), - "mov.u16\ts$d, $a", []>; -def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), - "mov.u32\ts$d, $a", []>; -def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), - "mov.u64\ts$d, $a", []>; -def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), - "mov.f32\ts$d, $a", []>; -def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), - "mov.f64\ts$d, $a", []>; - -def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), - "mov.u16\t$d, s$a", []>; -def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), - "mov.u32\t$d, s$a", []>; -def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), - "mov.u64\t$d, s$a", []>; -def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), - "mov.f32\t$d, s$a", []>; -def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), - "mov.f64\t$d, s$a", []>; ///===- Intrinsic Instructions --------------------------------------------===// - include "PTXIntrinsicInstrInfo.td" + +///===- Load/Store Instructions -------------------------------------------===// +include "PTXInstrLoadStore.td" + diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td new file mode 100644 index 0000000..9b4f56c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td @@ -0,0 +1,278 @@ +//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX load/store instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Addressing Predicates +// We have to differentiate between 32- and 64-bit pointer types +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +//===----------------------------------------------------------------------===// +// Pattern Fragments for Loads/Stores +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Constant; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +// Addressing modes. +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; +def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; + +// Address operands +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} +def LOCALri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def LOCALri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +def MEMii32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} + + +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + +def PTXreadparam + : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXwriteparam + : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + + + +//===----------------------------------------------------------------------===// +// Classes for loads/stores +//===----------------------------------------------------------------------===// +multiclass PTX_LD<string opstr, string typestr, + RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, + PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { + def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal32:$a))]>; + def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal64:$a))]>; + def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal32:$a)]>; + def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal64:$a)]>; +} + +multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> { + let hasSideEffects = 1 in { + def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), + !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (PTXloadparam texternalsym:$a))]>; + def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), + !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), + [(PTXstoreparam texternalsym:$d, RC:$a)]>; + } +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; +} + + + +//===----------------------------------------------------------------------===// +// Instruction definitions for loads/stores +//===----------------------------------------------------------------------===// + +// Global/shared stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// Global/shared/constant loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// Param loads/stores +defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; +defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; +defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; +defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; +defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; +defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; + +// Local loads/stores +defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; +defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; +defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; +defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; +defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; +defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 8d97909..9de1cb6 100644 --- a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> // TODO Add read vector-version of special registers -//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", +// int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; -//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", +// int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; -def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; -def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; -def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; -//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; +//def PTX_READ_CTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; -//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; +//def PTX_READ_NCTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; -def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; -def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; -def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; def PTX_READ_LANEMASK_EQ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp index b13a3da..468ce93 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -100,7 +100,7 @@ public: /// @{ virtual void ChangeSection(const MCSection *Section); - virtual void InitSections() {} + virtual void InitSections() { /* PTX does not use sections */ } virtual void EmitLabel(MCSymbol *Symbol); @@ -132,7 +132,9 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + /// @param ByteAlignment - The alignment of the common symbol in bytes. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); @@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - //assert(getCurrentSection() && "Cannot emit before setting section!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); @@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} -void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size, unsigned ByteAlignment) {} @@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) - InstPrinter->printInst(&Inst, OS); + InstPrinter->printInst(&Inst, OS, ""); else Inst.print(OS, &MAI); EmitEOL(); @@ -533,7 +536,7 @@ namespace llvm { formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, - MCCodeEmitter *CE, TargetAsmBackend *TAB, + MCCodeEmitter *CE, MCAsmBackend *MAB, bool ShowInst) { return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, IP, CE, ShowInst); diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp new file mode 100644 index 0000000..142e639 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp @@ -0,0 +1,32 @@ +//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PTX MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXAsmPrinter.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + OutMI.addOperand(AP.lowerOperand(MO)); + } +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp index 6fe9e6c..b33a273 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - - DEBUG(dbgs() - << "PTX::NoRegister == " << PTX::NoRegister << "\n" - << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); - - DEBUG(for (unsigned reg = PTX::NoRegister + 1; - reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg)) - dbgs() << "Used Reg: " << reg << "\n";); - - // FIXME: This is a slow linear scanning - for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg) && - !MFI->isRetReg(reg) && - (MFI->isKernel() || !MFI->isArgReg(reg))) - MFI->addLocalVarReg(reg); - - // Notify MachineFunctionInfo that I've done adding local var reg - MFI->doneAddLocalVar(); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) - dbgs() << "Arg Reg: " << *i << "\n";); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) - dbgs() << "Local Var Reg: " << *i << "\n";); + // Generate list of all virtual registers used in this function + for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + MFI->addVirtualRegister(TRC, Reg); + } return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h index 9d65f5b..3b985f7 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,75 +15,148 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { + /// PTXMachineFunctionInfo - This class is derived from MachineFunction and /// contains private PTX target-specific information for each MachineFunction. /// class PTXMachineFunctionInfo : public MachineFunctionInfo { private: - bool is_kernel; - std::vector<unsigned> reg_arg, reg_local_var; - std::vector<unsigned> reg_ret; - bool _isDoneAddArg; + bool IsKernel; + DenseSet<unsigned> RegArgs; + DenseSet<unsigned> RegRets; + + typedef std::vector<unsigned> RegisterList; + typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap; + typedef DenseMap<unsigned, std::string> RegisterNameMap; + typedef DenseMap<int, std::string> FrameMap; + + RegisterMap UsedRegs; + RegisterNameMap RegNames; + FrameMap FrameSymbols; + + PTXParamManager ParamManager; public: + typedef DenseSet<unsigned>::const_iterator reg_iterator; + PTXMachineFunctionInfo(MachineFunction &MF) - : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) { - reg_arg.reserve(8); - reg_local_var.reserve(32); + : IsKernel(false) { + UsedRegs[PTX::RegPredRegisterClass] = RegisterList(); + UsedRegs[PTX::RegI16RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI64RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF64RegisterClass] = RegisterList(); } - void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; } - - void addArgReg(unsigned reg) { reg_arg.push_back(reg); } - void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void addRetReg(unsigned reg) { - if (!isRetReg(reg)) { - reg_ret.push_back(reg); + /// getParamManager - Returns the PTXParamManager instance for this function. + PTXParamManager& getParamManager() { return ParamManager; } + const PTXParamManager& getParamManager() const { return ParamManager; } + + /// setKernel/isKernel - Gets/sets a flag that indicates if this function is + /// a PTX kernel function. + void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } + bool isKernel() const { return IsKernel; } + + /// argreg_begin/argreg_end - Returns iterators to the set of registers + /// containing function arguments. + reg_iterator argreg_begin() const { return RegArgs.begin(); } + reg_iterator argreg_end() const { return RegArgs.end(); } + + /// retreg_begin/retreg_end - Returns iterators to the set of registers + /// containing the function return values. + reg_iterator retreg_begin() const { return RegRets.begin(); } + reg_iterator retreg_end() const { return RegRets.end(); } + + /// addRetReg - Adds a register to the set of return-value registers. + void addRetReg(unsigned Reg) { + if (!RegRets.count(Reg)) { + RegRets.insert(Reg); + std::string name; + name = "%ret"; + name += utostr(RegRets.size() - 1); + RegNames[Reg] = name; } } - void doneAddArg(void) { - _isDoneAddArg = true; + /// addArgReg - Adds a register to the set of function argument registers. + void addArgReg(unsigned Reg) { + RegArgs.insert(Reg); + std::string name; + name = "%param"; + name += utostr(RegArgs.size() - 1); + RegNames[Reg] = name; } - void doneAddLocalVar(void) {} - - bool isKernel() const { return is_kernel; } - typedef std::vector<unsigned>::const_iterator reg_iterator; - typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; - typedef std::vector<unsigned>::const_iterator ret_iterator; - - bool argRegEmpty() const { return reg_arg.empty(); } - int getNumArg() const { return reg_arg.size(); } - reg_iterator argRegBegin() const { return reg_arg.begin(); } - reg_iterator argRegEnd() const { return reg_arg.end(); } - reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } - reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } - - bool localVarRegEmpty() const { return reg_local_var.empty(); } - reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } - reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - - bool retRegEmpty() const { return reg_ret.empty(); } - int getNumRet() const { return reg_ret.size(); } - ret_iterator retRegBegin() const { return reg_ret.begin(); } - ret_iterator retRegEnd() const { return reg_ret.end(); } + /// addVirtualRegister - Adds a virtual register to the set of all used + /// registers in the function. + void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) { + std::string name; + + // Do not count registers that are argument/return registers. + if (!RegRets.count(Reg) && !RegArgs.count(Reg)) { + UsedRegs[TRC].push_back(Reg); + if (TRC == PTX::RegPredRegisterClass) + name = "%p"; + else if (TRC == PTX::RegI16RegisterClass) + name = "%rh"; + else if (TRC == PTX::RegI32RegisterClass) + name = "%r"; + else if (TRC == PTX::RegI64RegisterClass) + name = "%rd"; + else if (TRC == PTX::RegF32RegisterClass) + name = "%f"; + else if (TRC == PTX::RegF64RegisterClass) + name = "%fd"; + else + llvm_unreachable("Invalid register class"); + + name += utostr(UsedRegs[TRC].size() - 1); + RegNames[Reg] = name; + } + } - bool isArgReg(unsigned reg) const { - return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); + /// getRegisterName - Returns the name of the specified virtual register. This + /// name is used during PTX emission. + const char *getRegisterName(unsigned Reg) const { + if (RegNames.count(Reg)) + return RegNames.find(Reg)->second.c_str(); + else if (Reg == PTX::NoRegister) + return "%noreg"; + else + llvm_unreachable("Register not in register name map"); } - bool isRetReg(unsigned reg) const { - return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + /// getNumRegistersForClass - Returns the number of virtual registers that are + /// used for the specified register class. + unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const { + return UsedRegs.lookup(TRC).size(); } - bool isLocalVarReg(unsigned reg) const { - return std::find(reg_local_var.begin(), reg_local_var.end(), reg) - != reg_local_var.end(); + /// getFrameSymbol - Returns the symbol name for the given FrameIndex. + const char* getFrameSymbol(int FrameIndex) { + if (FrameSymbols.count(FrameIndex)) { + return FrameSymbols.lookup(FrameIndex).c_str(); + } else { + std::string Name = "__local"; + Name += utostr(FrameIndex); + // The whole point of caching this name is to ensure the pointer we pass + // to any getExternalSymbol() calls will remain valid for the lifetime of + // the back-end instance. This is to work around an issue in SelectionDAG + // where symbol names are expected to be life-long strings. + FrameSymbols[FrameIndex] = Name; + return FrameSymbols[FrameIndex].c_str(); + } } }; // class PTXMachineFunctionInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp new file mode 100644 index 0000000..7753787 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp @@ -0,0 +1,73 @@ +//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXParamManager.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +PTXParamManager::PTXParamManager() { +} + +unsigned PTXParamManager::addArgumentParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_ARGUMENT; + Param.Size = Size; + + std::string Name; + Name = "__param_"; + Name += utostr(ArgumentParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ArgumentParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addReturnParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_RETURN; + Param.Size = Size; + + std::string Name; + Name = "__ret_"; + Name += utostr(ReturnParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ReturnParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addLocalParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_LOCAL; + Param.Size = Size; + + std::string Name; + Name = "__localparam_"; + Name += utostr(LocalParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + LocalParams.push_back(Index); + + return Index; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h new file mode 100644 index 0000000..9fd2de5 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h @@ -0,0 +1,86 @@ +//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTXParamManager class, which manages all defined .param +// variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_PARAM_MANAGER_H +#define PTX_PARAM_MANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// PTXParamManager - This class manages all .param variables defined for a +/// particular function. +class PTXParamManager { +private: + + /// PTXParamType - Type of a .param variable + enum PTXParamType { + PTX_PARAM_TYPE_ARGUMENT, + PTX_PARAM_TYPE_RETURN, + PTX_PARAM_TYPE_LOCAL + }; + + /// PTXParam - Definition of a PTX .param variable + struct PTXParam { + PTXParamType Type; + unsigned Size; + std::string Name; + }; + + DenseMap<unsigned, PTXParam> AllParams; + SmallVector<unsigned, 4> ArgumentParams; + SmallVector<unsigned, 4> ReturnParams; + SmallVector<unsigned, 4> LocalParams; + +public: + + typedef SmallVector<unsigned, 4>::const_iterator param_iterator; + + PTXParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator local_begin() const { return LocalParams.begin(); } + param_iterator local_end() const { return LocalParams.end(); } + + /// addArgumentParam - Returns a new .param used as an argument. + unsigned addArgumentParam(unsigned Size); + + /// addReturnParam - Returns a new .param used as a return argument. + unsigned addReturnParam(unsigned Size); + + /// addLocalParam - Returns a new .param used as a local .param variable. + unsigned addLocalParam(unsigned Size); + + /// getParamName - Returns the name of the parameter as a string. + const std::string &getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Name; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Size; + } + +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp new file mode 100644 index 0000000..2d2d5c3 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp @@ -0,0 +1,58 @@ +//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a register allocator for PTX code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-reg-alloc" + +#include "PTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" + +using namespace llvm; + +namespace { + // Special register allocator for PTX. + class PTXRegAlloc : public MachineFunctionPass { + public: + static char ID; + PTXRegAlloc() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual const char* getPassName() const { + return "PTX Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // We do not actually do anything (at least not yet). + return false; + } + }; + + char PTXRegAlloc::ID = 0; + + static RegisterRegAlloc + ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); +} + +FunctionPass *llvm::createPTXRegisterAllocator() { + return new PTXRegAlloc(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp index cb56ea9..c806266 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp @@ -14,6 +14,9 @@ #include "PTX.h" #include "PTXRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,15 +26,23 @@ using namespace llvm; PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) - : PTXGenRegisterInfo() { + const TargetInstrInfo &tii) + // PTX does not have a return address register. + : PTXGenRegisterInfo(0), TII(tii) { } void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { unsigned Index; - MachineInstr& MI = *II; + MachineInstr &MI = *II; + //MachineBasicBlock &MBB = *MI.getParent(); + //DebugLoc dl = MI.getDebugLoc(); + //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + + //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass); + + llvm_unreachable("FrameIndex should have been previously eliminated!"); Index = 0; while (!MI.getOperand(Index).isFI()) { @@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32)) + //.addReg(Reg, RegState::Define).addImm(FrameIndex); + //if (MI2->findFirstPredOperandIdx() == -1) { + // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + //} + //MI2->dump(); + + //MachineOperand ESOp = MachineOperand::CreateES("__local__"); + // This frame index is post stack slot re-use assignments + //MI.getOperand(Index).ChangeToRegister(Reg, false); MI.getOperand(Index).ChangeToImmediate(FrameIndex); + //MI.getOperand(Index) = ESOp; } diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h index 0b63cb6..55fafe4 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h @@ -25,8 +25,12 @@ class PTXTargetMachine; class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + +public: PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII); + const TargetInstrInfo &tii); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { @@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { llvm_unreachable("PTX does not have a frame register"); return 0; } - - virtual unsigned getRARegister() const { - llvm_unreachable("PTX does not have a return address register"); - return 0; - } - - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); - } - virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); - } }; // struct PTXRegisterInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td index 1313d24..6ed6d3f 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td @@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> { // Registers //===----------------------------------------------------------------------===// -///===- Predicate Registers -----------------------------------------------===// - -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; -def P8 : PTXReg<"p8">; -def P9 : PTXReg<"p9">; -def P10 : PTXReg<"p10">; -def P11 : PTXReg<"p11">; -def P12 : PTXReg<"p12">; -def P13 : PTXReg<"p13">; -def P14 : PTXReg<"p14">; -def P15 : PTXReg<"p15">; -def P16 : PTXReg<"p16">; -def P17 : PTXReg<"p17">; -def P18 : PTXReg<"p18">; -def P19 : PTXReg<"p19">; -def P20 : PTXReg<"p20">; -def P21 : PTXReg<"p21">; -def P22 : PTXReg<"p22">; -def P23 : PTXReg<"p23">; -def P24 : PTXReg<"p24">; -def P25 : PTXReg<"p25">; -def P26 : PTXReg<"p26">; -def P27 : PTXReg<"p27">; -def P28 : PTXReg<"p28">; -def P29 : PTXReg<"p29">; -def P30 : PTXReg<"p30">; -def P31 : PTXReg<"p31">; -def P32 : PTXReg<"p32">; -def P33 : PTXReg<"p33">; -def P34 : PTXReg<"p34">; -def P35 : PTXReg<"p35">; -def P36 : PTXReg<"p36">; -def P37 : PTXReg<"p37">; -def P38 : PTXReg<"p38">; -def P39 : PTXReg<"p39">; -def P40 : PTXReg<"p40">; -def P41 : PTXReg<"p41">; -def P42 : PTXReg<"p42">; -def P43 : PTXReg<"p43">; -def P44 : PTXReg<"p44">; -def P45 : PTXReg<"p45">; -def P46 : PTXReg<"p46">; -def P47 : PTXReg<"p47">; -def P48 : PTXReg<"p48">; -def P49 : PTXReg<"p49">; -def P50 : PTXReg<"p50">; -def P51 : PTXReg<"p51">; -def P52 : PTXReg<"p52">; -def P53 : PTXReg<"p53">; -def P54 : PTXReg<"p54">; -def P55 : PTXReg<"p55">; -def P56 : PTXReg<"p56">; -def P57 : PTXReg<"p57">; -def P58 : PTXReg<"p58">; -def P59 : PTXReg<"p59">; -def P60 : PTXReg<"p60">; -def P61 : PTXReg<"p61">; -def P62 : PTXReg<"p62">; -def P63 : PTXReg<"p63">; -def P64 : PTXReg<"p64">; -def P65 : PTXReg<"p65">; -def P66 : PTXReg<"p66">; -def P67 : PTXReg<"p67">; -def P68 : PTXReg<"p68">; -def P69 : PTXReg<"p69">; -def P70 : PTXReg<"p70">; -def P71 : PTXReg<"p71">; -def P72 : PTXReg<"p72">; -def P73 : PTXReg<"p73">; -def P74 : PTXReg<"p74">; -def P75 : PTXReg<"p75">; -def P76 : PTXReg<"p76">; -def P77 : PTXReg<"p77">; -def P78 : PTXReg<"p78">; -def P79 : PTXReg<"p79">; -def P80 : PTXReg<"p80">; -def P81 : PTXReg<"p81">; -def P82 : PTXReg<"p82">; -def P83 : PTXReg<"p83">; -def P84 : PTXReg<"p84">; -def P85 : PTXReg<"p85">; -def P86 : PTXReg<"p86">; -def P87 : PTXReg<"p87">; -def P88 : PTXReg<"p88">; -def P89 : PTXReg<"p89">; -def P90 : PTXReg<"p90">; -def P91 : PTXReg<"p91">; -def P92 : PTXReg<"p92">; -def P93 : PTXReg<"p93">; -def P94 : PTXReg<"p94">; -def P95 : PTXReg<"p95">; -def P96 : PTXReg<"p96">; -def P97 : PTXReg<"p97">; -def P98 : PTXReg<"p98">; -def P99 : PTXReg<"p99">; -def P100 : PTXReg<"p100">; -def P101 : PTXReg<"p101">; -def P102 : PTXReg<"p102">; -def P103 : PTXReg<"p103">; -def P104 : PTXReg<"p104">; -def P105 : PTXReg<"p105">; -def P106 : PTXReg<"p106">; -def P107 : PTXReg<"p107">; -def P108 : PTXReg<"p108">; -def P109 : PTXReg<"p109">; -def P110 : PTXReg<"p110">; -def P111 : PTXReg<"p111">; -def P112 : PTXReg<"p112">; -def P113 : PTXReg<"p113">; -def P114 : PTXReg<"p114">; -def P115 : PTXReg<"p115">; -def P116 : PTXReg<"p116">; -def P117 : PTXReg<"p117">; -def P118 : PTXReg<"p118">; -def P119 : PTXReg<"p119">; -def P120 : PTXReg<"p120">; -def P121 : PTXReg<"p121">; -def P122 : PTXReg<"p122">; -def P123 : PTXReg<"p123">; -def P124 : PTXReg<"p124">; -def P125 : PTXReg<"p125">; -def P126 : PTXReg<"p126">; -def P127 : PTXReg<"p127">; - -///===- 16-Bit Registers --------------------------------------------------===// - -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; -def RH8 : PTXReg<"rh8">; -def RH9 : PTXReg<"rh9">; -def RH10 : PTXReg<"rh10">; -def RH11 : PTXReg<"rh11">; -def RH12 : PTXReg<"rh12">; -def RH13 : PTXReg<"rh13">; -def RH14 : PTXReg<"rh14">; -def RH15 : PTXReg<"rh15">; -def RH16 : PTXReg<"rh16">; -def RH17 : PTXReg<"rh17">; -def RH18 : PTXReg<"rh18">; -def RH19 : PTXReg<"rh19">; -def RH20 : PTXReg<"rh20">; -def RH21 : PTXReg<"rh21">; -def RH22 : PTXReg<"rh22">; -def RH23 : PTXReg<"rh23">; -def RH24 : PTXReg<"rh24">; -def RH25 : PTXReg<"rh25">; -def RH26 : PTXReg<"rh26">; -def RH27 : PTXReg<"rh27">; -def RH28 : PTXReg<"rh28">; -def RH29 : PTXReg<"rh29">; -def RH30 : PTXReg<"rh30">; -def RH31 : PTXReg<"rh31">; -def RH32 : PTXReg<"rh32">; -def RH33 : PTXReg<"rh33">; -def RH34 : PTXReg<"rh34">; -def RH35 : PTXReg<"rh35">; -def RH36 : PTXReg<"rh36">; -def RH37 : PTXReg<"rh37">; -def RH38 : PTXReg<"rh38">; -def RH39 : PTXReg<"rh39">; -def RH40 : PTXReg<"rh40">; -def RH41 : PTXReg<"rh41">; -def RH42 : PTXReg<"rh42">; -def RH43 : PTXReg<"rh43">; -def RH44 : PTXReg<"rh44">; -def RH45 : PTXReg<"rh45">; -def RH46 : PTXReg<"rh46">; -def RH47 : PTXReg<"rh47">; -def RH48 : PTXReg<"rh48">; -def RH49 : PTXReg<"rh49">; -def RH50 : PTXReg<"rh50">; -def RH51 : PTXReg<"rh51">; -def RH52 : PTXReg<"rh52">; -def RH53 : PTXReg<"rh53">; -def RH54 : PTXReg<"rh54">; -def RH55 : PTXReg<"rh55">; -def RH56 : PTXReg<"rh56">; -def RH57 : PTXReg<"rh57">; -def RH58 : PTXReg<"rh58">; -def RH59 : PTXReg<"rh59">; -def RH60 : PTXReg<"rh60">; -def RH61 : PTXReg<"rh61">; -def RH62 : PTXReg<"rh62">; -def RH63 : PTXReg<"rh63">; -def RH64 : PTXReg<"rh64">; -def RH65 : PTXReg<"rh65">; -def RH66 : PTXReg<"rh66">; -def RH67 : PTXReg<"rh67">; -def RH68 : PTXReg<"rh68">; -def RH69 : PTXReg<"rh69">; -def RH70 : PTXReg<"rh70">; -def RH71 : PTXReg<"rh71">; -def RH72 : PTXReg<"rh72">; -def RH73 : PTXReg<"rh73">; -def RH74 : PTXReg<"rh74">; -def RH75 : PTXReg<"rh75">; -def RH76 : PTXReg<"rh76">; -def RH77 : PTXReg<"rh77">; -def RH78 : PTXReg<"rh78">; -def RH79 : PTXReg<"rh79">; -def RH80 : PTXReg<"rh80">; -def RH81 : PTXReg<"rh81">; -def RH82 : PTXReg<"rh82">; -def RH83 : PTXReg<"rh83">; -def RH84 : PTXReg<"rh84">; -def RH85 : PTXReg<"rh85">; -def RH86 : PTXReg<"rh86">; -def RH87 : PTXReg<"rh87">; -def RH88 : PTXReg<"rh88">; -def RH89 : PTXReg<"rh89">; -def RH90 : PTXReg<"rh90">; -def RH91 : PTXReg<"rh91">; -def RH92 : PTXReg<"rh92">; -def RH93 : PTXReg<"rh93">; -def RH94 : PTXReg<"rh94">; -def RH95 : PTXReg<"rh95">; -def RH96 : PTXReg<"rh96">; -def RH97 : PTXReg<"rh97">; -def RH98 : PTXReg<"rh98">; -def RH99 : PTXReg<"rh99">; -def RH100 : PTXReg<"rh100">; -def RH101 : PTXReg<"rh101">; -def RH102 : PTXReg<"rh102">; -def RH103 : PTXReg<"rh103">; -def RH104 : PTXReg<"rh104">; -def RH105 : PTXReg<"rh105">; -def RH106 : PTXReg<"rh106">; -def RH107 : PTXReg<"rh107">; -def RH108 : PTXReg<"rh108">; -def RH109 : PTXReg<"rh109">; -def RH110 : PTXReg<"rh110">; -def RH111 : PTXReg<"rh111">; -def RH112 : PTXReg<"rh112">; -def RH113 : PTXReg<"rh113">; -def RH114 : PTXReg<"rh114">; -def RH115 : PTXReg<"rh115">; -def RH116 : PTXReg<"rh116">; -def RH117 : PTXReg<"rh117">; -def RH118 : PTXReg<"rh118">; -def RH119 : PTXReg<"rh119">; -def RH120 : PTXReg<"rh120">; -def RH121 : PTXReg<"rh121">; -def RH122 : PTXReg<"rh122">; -def RH123 : PTXReg<"rh123">; -def RH124 : PTXReg<"rh124">; -def RH125 : PTXReg<"rh125">; -def RH126 : PTXReg<"rh126">; -def RH127 : PTXReg<"rh127">; - -///===- 32-Bit Registers --------------------------------------------------===// - -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; -def R8 : PTXReg<"r8">; -def R9 : PTXReg<"r9">; -def R10 : PTXReg<"r10">; -def R11 : PTXReg<"r11">; -def R12 : PTXReg<"r12">; -def R13 : PTXReg<"r13">; -def R14 : PTXReg<"r14">; -def R15 : PTXReg<"r15">; -def R16 : PTXReg<"r16">; -def R17 : PTXReg<"r17">; -def R18 : PTXReg<"r18">; -def R19 : PTXReg<"r19">; -def R20 : PTXReg<"r20">; -def R21 : PTXReg<"r21">; -def R22 : PTXReg<"r22">; -def R23 : PTXReg<"r23">; -def R24 : PTXReg<"r24">; -def R25 : PTXReg<"r25">; -def R26 : PTXReg<"r26">; -def R27 : PTXReg<"r27">; -def R28 : PTXReg<"r28">; -def R29 : PTXReg<"r29">; -def R30 : PTXReg<"r30">; -def R31 : PTXReg<"r31">; -def R32 : PTXReg<"r32">; -def R33 : PTXReg<"r33">; -def R34 : PTXReg<"r34">; -def R35 : PTXReg<"r35">; -def R36 : PTXReg<"r36">; -def R37 : PTXReg<"r37">; -def R38 : PTXReg<"r38">; -def R39 : PTXReg<"r39">; -def R40 : PTXReg<"r40">; -def R41 : PTXReg<"r41">; -def R42 : PTXReg<"r42">; -def R43 : PTXReg<"r43">; -def R44 : PTXReg<"r44">; -def R45 : PTXReg<"r45">; -def R46 : PTXReg<"r46">; -def R47 : PTXReg<"r47">; -def R48 : PTXReg<"r48">; -def R49 : PTXReg<"r49">; -def R50 : PTXReg<"r50">; -def R51 : PTXReg<"r51">; -def R52 : PTXReg<"r52">; -def R53 : PTXReg<"r53">; -def R54 : PTXReg<"r54">; -def R55 : PTXReg<"r55">; -def R56 : PTXReg<"r56">; -def R57 : PTXReg<"r57">; -def R58 : PTXReg<"r58">; -def R59 : PTXReg<"r59">; -def R60 : PTXReg<"r60">; -def R61 : PTXReg<"r61">; -def R62 : PTXReg<"r62">; -def R63 : PTXReg<"r63">; -def R64 : PTXReg<"r64">; -def R65 : PTXReg<"r65">; -def R66 : PTXReg<"r66">; -def R67 : PTXReg<"r67">; -def R68 : PTXReg<"r68">; -def R69 : PTXReg<"r69">; -def R70 : PTXReg<"r70">; -def R71 : PTXReg<"r71">; -def R72 : PTXReg<"r72">; -def R73 : PTXReg<"r73">; -def R74 : PTXReg<"r74">; -def R75 : PTXReg<"r75">; -def R76 : PTXReg<"r76">; -def R77 : PTXReg<"r77">; -def R78 : PTXReg<"r78">; -def R79 : PTXReg<"r79">; -def R80 : PTXReg<"r80">; -def R81 : PTXReg<"r81">; -def R82 : PTXReg<"r82">; -def R83 : PTXReg<"r83">; -def R84 : PTXReg<"r84">; -def R85 : PTXReg<"r85">; -def R86 : PTXReg<"r86">; -def R87 : PTXReg<"r87">; -def R88 : PTXReg<"r88">; -def R89 : PTXReg<"r89">; -def R90 : PTXReg<"r90">; -def R91 : PTXReg<"r91">; -def R92 : PTXReg<"r92">; -def R93 : PTXReg<"r93">; -def R94 : PTXReg<"r94">; -def R95 : PTXReg<"r95">; -def R96 : PTXReg<"r96">; -def R97 : PTXReg<"r97">; -def R98 : PTXReg<"r98">; -def R99 : PTXReg<"r99">; -def R100 : PTXReg<"r100">; -def R101 : PTXReg<"r101">; -def R102 : PTXReg<"r102">; -def R103 : PTXReg<"r103">; -def R104 : PTXReg<"r104">; -def R105 : PTXReg<"r105">; -def R106 : PTXReg<"r106">; -def R107 : PTXReg<"r107">; -def R108 : PTXReg<"r108">; -def R109 : PTXReg<"r109">; -def R110 : PTXReg<"r110">; -def R111 : PTXReg<"r111">; -def R112 : PTXReg<"r112">; -def R113 : PTXReg<"r113">; -def R114 : PTXReg<"r114">; -def R115 : PTXReg<"r115">; -def R116 : PTXReg<"r116">; -def R117 : PTXReg<"r117">; -def R118 : PTXReg<"r118">; -def R119 : PTXReg<"r119">; -def R120 : PTXReg<"r120">; -def R121 : PTXReg<"r121">; -def R122 : PTXReg<"r122">; -def R123 : PTXReg<"r123">; -def R124 : PTXReg<"r124">; -def R125 : PTXReg<"r125">; -def R126 : PTXReg<"r126">; -def R127 : PTXReg<"r127">; - -///===- 64-Bit Registers --------------------------------------------------===// - -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; -def RD8 : PTXReg<"rd8">; -def RD9 : PTXReg<"rd9">; -def RD10 : PTXReg<"rd10">; -def RD11 : PTXReg<"rd11">; -def RD12 : PTXReg<"rd12">; -def RD13 : PTXReg<"rd13">; -def RD14 : PTXReg<"rd14">; -def RD15 : PTXReg<"rd15">; -def RD16 : PTXReg<"rd16">; -def RD17 : PTXReg<"rd17">; -def RD18 : PTXReg<"rd18">; -def RD19 : PTXReg<"rd19">; -def RD20 : PTXReg<"rd20">; -def RD21 : PTXReg<"rd21">; -def RD22 : PTXReg<"rd22">; -def RD23 : PTXReg<"rd23">; -def RD24 : PTXReg<"rd24">; -def RD25 : PTXReg<"rd25">; -def RD26 : PTXReg<"rd26">; -def RD27 : PTXReg<"rd27">; -def RD28 : PTXReg<"rd28">; -def RD29 : PTXReg<"rd29">; -def RD30 : PTXReg<"rd30">; -def RD31 : PTXReg<"rd31">; -def RD32 : PTXReg<"rd32">; -def RD33 : PTXReg<"rd33">; -def RD34 : PTXReg<"rd34">; -def RD35 : PTXReg<"rd35">; -def RD36 : PTXReg<"rd36">; -def RD37 : PTXReg<"rd37">; -def RD38 : PTXReg<"rd38">; -def RD39 : PTXReg<"rd39">; -def RD40 : PTXReg<"rd40">; -def RD41 : PTXReg<"rd41">; -def RD42 : PTXReg<"rd42">; -def RD43 : PTXReg<"rd43">; -def RD44 : PTXReg<"rd44">; -def RD45 : PTXReg<"rd45">; -def RD46 : PTXReg<"rd46">; -def RD47 : PTXReg<"rd47">; -def RD48 : PTXReg<"rd48">; -def RD49 : PTXReg<"rd49">; -def RD50 : PTXReg<"rd50">; -def RD51 : PTXReg<"rd51">; -def RD52 : PTXReg<"rd52">; -def RD53 : PTXReg<"rd53">; -def RD54 : PTXReg<"rd54">; -def RD55 : PTXReg<"rd55">; -def RD56 : PTXReg<"rd56">; -def RD57 : PTXReg<"rd57">; -def RD58 : PTXReg<"rd58">; -def RD59 : PTXReg<"rd59">; -def RD60 : PTXReg<"rd60">; -def RD61 : PTXReg<"rd61">; -def RD62 : PTXReg<"rd62">; -def RD63 : PTXReg<"rd63">; -def RD64 : PTXReg<"rd64">; -def RD65 : PTXReg<"rd65">; -def RD66 : PTXReg<"rd66">; -def RD67 : PTXReg<"rd67">; -def RD68 : PTXReg<"rd68">; -def RD69 : PTXReg<"rd69">; -def RD70 : PTXReg<"rd70">; -def RD71 : PTXReg<"rd71">; -def RD72 : PTXReg<"rd72">; -def RD73 : PTXReg<"rd73">; -def RD74 : PTXReg<"rd74">; -def RD75 : PTXReg<"rd75">; -def RD76 : PTXReg<"rd76">; -def RD77 : PTXReg<"rd77">; -def RD78 : PTXReg<"rd78">; -def RD79 : PTXReg<"rd79">; -def RD80 : PTXReg<"rd80">; -def RD81 : PTXReg<"rd81">; -def RD82 : PTXReg<"rd82">; -def RD83 : PTXReg<"rd83">; -def RD84 : PTXReg<"rd84">; -def RD85 : PTXReg<"rd85">; -def RD86 : PTXReg<"rd86">; -def RD87 : PTXReg<"rd87">; -def RD88 : PTXReg<"rd88">; -def RD89 : PTXReg<"rd89">; -def RD90 : PTXReg<"rd90">; -def RD91 : PTXReg<"rd91">; -def RD92 : PTXReg<"rd92">; -def RD93 : PTXReg<"rd93">; -def RD94 : PTXReg<"rd94">; -def RD95 : PTXReg<"rd95">; -def RD96 : PTXReg<"rd96">; -def RD97 : PTXReg<"rd97">; -def RD98 : PTXReg<"rd98">; -def RD99 : PTXReg<"rd99">; -def RD100 : PTXReg<"rd100">; -def RD101 : PTXReg<"rd101">; -def RD102 : PTXReg<"rd102">; -def RD103 : PTXReg<"rd103">; -def RD104 : PTXReg<"rd104">; -def RD105 : PTXReg<"rd105">; -def RD106 : PTXReg<"rd106">; -def RD107 : PTXReg<"rd107">; -def RD108 : PTXReg<"rd108">; -def RD109 : PTXReg<"rd109">; -def RD110 : PTXReg<"rd110">; -def RD111 : PTXReg<"rd111">; -def RD112 : PTXReg<"rd112">; -def RD113 : PTXReg<"rd113">; -def RD114 : PTXReg<"rd114">; -def RD115 : PTXReg<"rd115">; -def RD116 : PTXReg<"rd116">; -def RD117 : PTXReg<"rd117">; -def RD118 : PTXReg<"rd118">; -def RD119 : PTXReg<"rd119">; -def RD120 : PTXReg<"rd120">; -def RD121 : PTXReg<"rd121">; -def RD122 : PTXReg<"rd122">; -def RD123 : PTXReg<"rd123">; -def RD124 : PTXReg<"rd124">; -def RD125 : PTXReg<"rd125">; -def RD126 : PTXReg<"rd126">; -def RD127 : PTXReg<"rd127">; +// The generated register info code throws warnings for empty register classes +// (e.g. zero-length arrays), so we use a dummy register here just to prevent +// these warnings. +def DUMMY_REG : PTXReg<"R0">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; +def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 0000000..50ef14a --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,149 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + // Always inline memcpys. In PTX, we do not have a C library that provides + // a memcpy function. + //if (!AlwaysInline) + // return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + llvm_unreachable("memset lowering not implemented for PTX yet"); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h new file mode 100644 index 0000000..e0c7167 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h @@ -0,0 +1,53 @@ +//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTX subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXSELECTIONDAGINFO_H +#define PTXSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. +/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. +class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can + /// make the right decision when generating code for different targets. + const PTXSubtarget *Subtarget; + +public: + explicit PTXSelectionDAGInfo(const TargetMachine &TM); + ~PTXSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp index 8ec646e..1eb57d2 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp @@ -14,7 +14,7 @@ #include "PTXSubtarget.h" #include "PTX.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h index 0921f1f..b946d7c 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h @@ -114,7 +114,16 @@ class StringRef; (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); } - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool callsAreHandled() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool emitPtrAttribute() const { + return PTXVersion >= PTX_VERSION_2_2; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; // class PTXSubtarget } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp index ab926e0..449a3d9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -14,8 +14,32 @@ #include "PTX.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; @@ -25,7 +49,7 @@ namespace llvm { bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, - TargetAsmBackend *TAB, + MCAsmBackend *MAB, bool ShowInst); } @@ -43,34 +67,47 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + + // Copied from LLVMTargetMachine.cpp + void printNoVerify(PassManagerBase &PM, const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + } + + void printAndVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + //if (VerifyMachineCode) + // PM.add(createMachineVerifierPass(Banner)); + } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS, + StringRef TT, StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), + TSInfo(*this), TLInfo(*this) { } -PTX32TargetMachine::PTX32TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, false) { +PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { } -PTX64TargetMachine::PTX64TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, true) { +PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, @@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // PTXMFInfoExtract must after register allocation! + //PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // This is mostly based on LLVMTargetMachine::addPassesToEmitFile + + // Add common CodeGen passes. + MCContext *Context = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + return true; + assert(Context != 0 && "Failed to get MCContext"); + + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + OwningPtr<MCStreamer> AsmStreamer; + + switch (FileType) { + default: return true; + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + MCAsmBackend *MAB = 0; + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + true, /* verbose asm */ + hasMCUseLoc(), + hasMCUseCFI(), + InstPrinter, + MCE, MAB, + false /* show MC encoding */); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + llvm_unreachable("Object file emission is not supported with PTX"); + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + // MC Logging + //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + PM.add(createGCInfoDeleter()); + return false; +} + +bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DisableVerify, + MCContext *&OutContext) { + // Add standard LLVM codegen passes. + // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some + // modifications for the PTX target. + + // Standard LLVM-Level Passes. + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + PM.add(createLowerInvokePass(getTargetLowering())); + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(PM, OptLevel); + + //PM.add(createPrintFunctionPass("\n\n" + // "*** Final LLVM Code input to ISel ***\n", + // &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Standard Lower-Level Passes. + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getRegisterInfo(), + &getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, "After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + PM.add(createExpandISelPseudosPass()); + + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + + if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass"); + + PM.add(createMachineLICMPass()); + PM.add(createMachineCSEPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PreRegAlloc passes"); + + // Perform register allocation. + PM.add(createPTXRegisterAllocator()); + printAndVerify(PM, "After Register Allocation"); + + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); + + // FIXME: Post-RA LICM has asserts that fire on virtual registers. + // Run post-ra machine LICM to hoist reloads / remats. + //if (!DisablePostRAMachineLICM) + // PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); + } + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PostRegAlloc passes"); + + PM.add(createExpandPostRAPseudosPass()); + printAndVerify(PM, "After ExpandPostRAPseudos"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM, "After PrologEpilogCodeInserter"); + + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None) { + PM.add(createPostRAScheduler(OptLevel)); + printAndVerify(PM, "After PostRAScheduler"); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printNoVerify(PM, "After BranchFolding"); + } + + // Tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(false)); + printNoVerify(PM, "After TailDuplicate"); + } + + PM.add(createGCMachineCodeAnalysisPass()); + + //if (PrintGCInfo) + // PM.add(createGCInfoPrinter(dbgs())); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } + + if (addPreEmitPass(PM, OptLevel)) + printNoVerify(PM, "After PreEmit passes"); + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h index ae42153..5b7c82b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -17,6 +17,7 @@ #include "PTXISelLowering.h" #include "PTXInstrInfo.h" #include "PTXFrameLowering.h" +#include "PTXSelectionDAGInfo.h" #include "PTXSubtarget.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -25,15 +26,17 @@ namespace llvm { class PTXTargetMachine : public LLVMTargetMachine { private: - const TargetData DataLayout; - PTXSubtarget Subtarget; // has to be initialized before FrameLowering - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; - PTXTargetLowering TLInfo; + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXSelectionDAGInfo TSInfo; + PTXTargetLowering TLInfo; public: - PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + PTXTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + + // We override this method to supply our own set of codegen passes. + virtual bool addPassesToEmitFile(PassManagerBase &, + formatted_raw_ostream &, + CodeGenFileType, + CodeGenOpt::Level, + bool = true); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + private: + + bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine class PTX32TargetMachine : public PTXTargetMachine { public: - PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: - PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt deleted file mode 100644 index 4b09cf5..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXInfo - PTXTargetInfo.cpp - ) - -add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile deleted file mode 100644 index 8619785..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp index 9df6c75..09a2735 100644 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -9,7 +9,7 @@ #include "PTX.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py deleted file mode 100755 index 1528690..0000000 --- a/contrib/llvm/lib/Target/PTX/generate-register-td.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python -##===- generate-register-td.py --------------------------------*-python-*--===## -## -## The LLVM Compiler Infrastructure -## -## This file is distributed under the University of Illinois Open Source -## License. See LICENSE.TXT for details. -## -##===----------------------------------------------------------------------===## -## -## This file describes the PTX register file generator. -## -##===----------------------------------------------------------------------===## - -from sys import argv, exit, stdout - - -if len(argv) != 5: - print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') - exit(1) - -try: - num_pred = int(argv[1]) - num_16bit = int(argv[2]) - num_32bit = int(argv[3]) - num_64bit = int(argv[4]) -except: - print('ERROR: Invalid integer parameter') - exit(1) - -## Print the register definition file -td_file = open('PTXRegisterInfo.td', 'w') - -td_file.write(''' -//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class PTXReg<string n> : Register<n> { - let Namespace = "PTX"; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -''') - - -# Print predicate registers -td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') -for r in range(0, num_pred): - td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) - -# Print 16-bit registers -td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_16bit): - td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) - -# Print 32-bit registers -td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_32bit): - td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) - -# Print 64-bit registers -td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_64bit): - td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) - - -td_file.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - - -# Print register classes - -td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) -td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) -td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) -td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) - - -td_file.close() - -## Now write the PTXCallingConv.td file -td_file = open('PTXCallingConv.td', 'w') - -# Reserve 10% of the available registers for return values, and the other 90% -# for parameters -num_ret_pred = int(0.1 * num_pred) -num_ret_16bit = int(0.1 * num_16bit) -num_ret_32bit = int(0.1 * num_32bit) -num_ret_64bit = int(0.1 * num_64bit) -num_param_pred = num_pred - num_ret_pred -num_param_16bit = num_16bit - num_ret_16bit -num_param_32bit = num_32bit - num_ret_32bit -num_param_64bit = num_64bit - num_ret_64bit - -param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] -ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] -param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] -ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] -param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] -ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] -param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] -ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] - -param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) -ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) -param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) -ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) -param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) -ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) -param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) -ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) - -td_file.write(''' -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; -''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, - ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) - - -td_file.close() diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt deleted file mode 100644 index 389ea77..0000000 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPowerPCAsmPrinter - PPCInstPrinter.cpp - ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile deleted file mode 100644 index f097e84..0000000 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCAsmPrinter - -# Hack: we need to include 'main' powerpc target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 1a9bd76..b6a0835 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -13,7 +13,8 @@ #define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" -#include "PPCPredicates.h" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" @@ -30,7 +31,8 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { +void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); @@ -49,6 +51,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; + + printAnnotation(O, Annot); return; } } @@ -59,6 +63,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { printOperand(MI, 0, O); O << ", "; printOperand(MI, 1, O); + printAnnotation(O, Annot); return; } @@ -72,11 +77,13 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; + printAnnotation(O, Annot); return; } } printInstruction(MI, O); + printAnnotation(O, Annot); } diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index d022a44..4ed4b76 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -32,7 +32,7 @@ public: } virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; static const char *getInstructionName(unsigned Opcode); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index a1b8166..0000000 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMPowerPCDesc - PPCMCTargetDesc.cpp - PPCMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile deleted file mode 100644 index 9db6662..0000000 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4b8cbb7..9f2fd6d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,17 +7,43 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "PPC.h" -#include "PPCFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + return Value; + case PPC::fixup_ppc_brcond14: + return Value & 0x3ffc; + case PPC::fixup_ppc_br24: + return Value & 0x3fffffc; +#if 0 + case PPC::fixup_ppc_hi16: + return (Value >> 16) & 0xffff; +#endif + case PPC::fixup_ppc_ha16: + return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; + case PPC::fixup_ppc_lo16: + return Value & 0xffff; + } +} + namespace { class PPCMachObjectWriter : public MCMachObjectTargetWriter { public: @@ -31,10 +57,17 @@ public: MCValue Target, uint64_t &FixedValue) {} }; -class PPCAsmBackend : public TargetAsmBackend { +class PPCELFObjectWriter : public MCELFObjectTargetWriter { +public: + PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend, bool isLittleEndian) + : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {} +}; + +class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; public: - PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {} + PPCAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } @@ -49,7 +82,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -109,15 +142,50 @@ namespace { return false; } }; + + class ELFPPCAsmBackend : public PPCAsmBackend { + Triple::OSType OSType; + public: + ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) : + PPCAsmBackend(T), OSType(OSType) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != 4; ++i) + Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + bool is64 = getPointerSize() == 8; + return createELFObjectWriter(new PPCELFObjectWriter( + /*Is64Bit=*/is64, + OSType, + is64 ? ELF::EM_PPC64 : ELF::EM_PPC, + /*addend*/ true, /*isLittleEndian*/ false), + OS, /*IsLittleEndian=*/false); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + }; + } // end anonymous namespace -TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - return 0; + return new ELFPPCAsmBackend(T, Triple(TT).getOS()); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h new file mode 100644 index 0000000..369bbdc --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h @@ -0,0 +1,70 @@ +//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PPC target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCBASEINFO_H +#define PPCBASEINFO_H + +#include "PPCMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// getPPCRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { + using namespace PPC; + switch (RegEnum) { + case 0: return 0; + case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; + case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; + case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; + case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; + case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; + case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; + case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; + case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; + case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; + case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; + case R10: case X10: case F10: case V10: case CR2EQ: return 10; + case R11: case X11: case F11: case V11: case CR2UN: return 11; + case R12: case X12: case F12: case V12: case CR3LT: return 12; + case R13: case X13: case F13: case V13: case CR3GT: return 13; + case R14: case X14: case F14: case V14: case CR3EQ: return 14; + case R15: case X15: case F15: case V15: case CR3UN: return 15; + case R16: case X16: case F16: case V16: case CR4LT: return 16; + case R17: case X17: case F17: case V17: case CR4GT: return 17; + case R18: case X18: case F18: case V18: case CR4EQ: return 18; + case R19: case X19: case F19: case V19: case CR4UN: return 19; + case R20: case X20: case F20: case V20: case CR5LT: return 20; + case R21: case X21: case F21: case V21: case CR5GT: return 21; + case R22: case X22: case F22: case V22: case CR5EQ: return 22; + case R23: case X23: case F23: case V23: case CR5UN: return 23; + case R24: case X24: case F24: case V24: case CR6LT: return 24; + case R25: case X25: case F25: case V25: case CR6GT: return 25; + case R26: case X26: case F26: case V26: case CR6EQ: return 26; + case R27: case X27: case F27: case V27: case CR6UN: return 27; + case R28: case X28: case F28: case V28: case CR7LT: return 28; + case R29: case X29: case F29: case V29: case CR7GT: return 29; + case R30: case X30: case F30: case V30: case CR7EQ: return 30; + case R31: case X31: case F31: case V31: case CR7UN: return 31; + default: + llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); + } +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index b3c889e..b3c889e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b6dca83..e9424d8 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -31,6 +31,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { + if (is64Bit) + PointerSize = 8; + IsLittleEndian = false; + // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; @@ -56,7 +60,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - HasLCOMMDirective = true; + LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index cf73d86..262f97c3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -12,9 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "PPC.h" -#include "PPCRegisterInfo.h" -#include "PPCFixupKinds.h" +#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCInst.h" #include "llvm/ADT/Statistic.h" @@ -170,7 +169,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } @@ -182,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return getPPCRegisterNumbering(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 02b887f..d5c8a9e 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -13,10 +13,14 @@ #include "PPCMCTargetDesc.h" #include "PPCMCAsmInfo.h" +#include "InstPrinter/PPCInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "PPCGenInstrInfo.inc" @@ -35,11 +39,16 @@ static MCInstrInfo *createPPCMCInstrInfo() { return X; } -extern "C" void LLVMInitializePowerPCMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); -} +static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = (TheTriple.getArch() == Triple::ppc64); + unsigned Flavour = isPPC64 ? 0 : 1; + unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; + MCRegisterInfo *X = new MCRegisterInfo(); + InitPPCMCRegisterInfo(X, RA, Flavour, Flavour); + return X; +} static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { @@ -48,23 +57,95 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializePowerPCMCSubtargetInfo() { +static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = TheTriple.getArch() == Triple::ppc64; + + MCAsmInfo *MAI; + if (TheTriple.isOSDarwin()) + MAI = new PPCMCAsmInfoDarwin(isPPC64); + else + MAI = new PPCLinuxMCAsmInfo(isPPC64); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(PPC::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + if (RM == Reloc::Default) { + Triple T(TT); + if (T.isOSDarwin()) + RM = Reloc::DynamicNoPIC; + else + RM = Reloc::Static; + } + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + if (Triple(TT).isOSDarwin()) + return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + return new PPCInstPrinter(MAI, SyntaxVariant); +} + +extern "C" void LLVMInitializePowerPCTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo); + RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, createPPCMCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, createPPCMCSubtargetInfo); -} -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - bool isPPC64 = TheTriple.getArch() == Triple::ppc64; - if (TheTriple.isOSDarwin()) - return new PPCMCAsmInfoDarwin(isPPC64); - return new PPCLinuxMCAsmInfo(isPPC64); + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); -} + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend); + TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer); -extern "C" void LLVMInitializePowerPCMCAsmInfo() { - RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo); - RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index cee2350..e5bf2a9 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -15,6 +15,10 @@ #define PPCMCTARGETDESC_H namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; class MCSubtargetInfo; class Target; class StringRef; @@ -22,6 +26,12 @@ class StringRef; extern Target ThePPC32Target; extern Target ThePPC64Target; +MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 12bb0a1..12bb0a1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index b2c8315..f872e86 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,8 +14,6 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H -#include "PPC.h" - namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index 7191dd1..5dc1863 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H +#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> @@ -30,22 +31,12 @@ namespace llvm { class MachineInstr; class AsmPrinter; class MCInst; - class MCCodeEmitter; - class MCContext; - class MCInstrInfo; - class MCSubtargetInfo; class TargetMachine; - class TargetAsmBackend; FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); - MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &); - void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index aabf494..2d5d302 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -43,9 +43,9 @@ def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true", "Enable GPUL instructions">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", - "Enable the fsqrt instruction">; + "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", - "Enable the stfiwx instruction">; + "Enable the stfiwx instruction">; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9de2200..9528459 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -18,9 +18,9 @@ #define DEBUG_TYPE "asmprinter" #include "PPC.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -43,11 +43,11 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" @@ -679,18 +679,8 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, return new PPCLinuxAsmPrinter(tm, Streamer); } -static MCInstPrinter *createPPCMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - return new PPCInstPrinter(MAI, SyntaxVariant); -} - - // Force static initialization. extern "C" void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); - - TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index e161d23..475edf3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -19,7 +19,7 @@ #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" -#include "PPCPredicates.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp index 42232a0..4a1f182 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -140,7 +140,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(OpNo); assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, @@ -250,7 +250,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + return getPPCRegisterNumbering(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 375e000..7dead10 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -109,14 +109,14 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first); + unsigned RegNo = getPPCRegisterNumbering(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } for (MachineRegisterInfo::liveout_iterator I = MF->getRegInfo().liveout_begin(), E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I); + unsigned RegNo = getPPCRegisterNumbering(*I); if (VRRegNo[RegNo] == *I) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } @@ -712,13 +712,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(PPC::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - static bool spillsCR(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); return FuncInfo->isCRSpilled(); @@ -885,7 +878,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8; + LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it @@ -919,8 +912,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinReg = - std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR), - PPCRegisterInfo::getRegisterNumbering(MinG8R)); + std::min<unsigned>(getPPCRegisterNumbering(MinGPR), + getPPCRegisterNumbering(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 0c18de1..20faa71 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -40,7 +40,6 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 2176c02..6f204cc 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "ppc-codegen" #include "PPC.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9741a39..d6b8a9e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14,8 +14,8 @@ #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -211,7 +211,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -365,7 +366,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { setStackPointerRegisterToSaveRestore(PPC::X1); @@ -401,12 +406,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. -unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) @@ -463,7 +470,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -1368,8 +1375,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1378,7 +1390,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - const Type *IntPtrTy = + Type *IntPtrTy = DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( *DAG.getContext()); @@ -1398,16 +1410,13 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()), + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); - SDValue Ops[] = - { CallResult.first, CallResult.second }; - - return DAG.getMergeValues(Ops, 2, dl); + return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, @@ -2550,7 +2559,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { unsigned OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (!PPCSubTarget.getTargetTriple().isMacOSX() || + (PPCSubTarget.getTargetTriple().isMacOSX() && PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || G->getGlobal()->isWeakForLinker())) { @@ -2574,7 +2583,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned char OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (!PPCSubTarget.getTargetTriple().isMacOSX() || + (PPCSubTarget.getTargetTriple().isMacOSX() && PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which @@ -2941,6 +2950,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, SmallVector<TailCallArgumentInfo, 8> TailCallArguments; SmallVector<SDValue, 8> MemOpChains; + bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; @@ -2985,6 +2995,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, } if (VA.isRegLoc()) { + seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { @@ -3011,9 +3022,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call. + // Set CR6 to true if this is a vararg call with floating args passed in + // registers. if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); + SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, + dl, MVT::i32), 0); RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); } @@ -3403,6 +3416,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, Ins, InVals); } +bool +PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_PPC); +} + SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -4490,7 +4514,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, PPCSubTarget); @@ -5504,7 +5529,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: @@ -5634,7 +5659,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { // FIXME: PPC does not allow r+i addressing modes for vectors! // PPC allows a sign-extended 16-bit immediate field. @@ -5670,7 +5695,7 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. -bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ +bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ // PPC allows a sign-extended 16-bit immediate field. return (V > -(1 << 16) && V < (1 << 16)-1); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 986b4e7..430e45e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -246,7 +246,7 @@ namespace llvm { virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(EVT VT) const; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address @@ -323,7 +323,7 @@ namespace llvm { /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(const Type *Ty) const; + unsigned getByValTypeAlignment(Type *Ty) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. @@ -334,12 +334,12 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. - virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const; + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; /// isLegalAddressImmediate - Return true if the GlobalValue can be used as /// the offset of the target addressing mode. @@ -390,7 +390,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, @@ -444,6 +445,12 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + virtual bool + CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 143444f..2bc109c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -15,18 +15,18 @@ #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCHazardRecognizers.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/STLExtras.h" @@ -334,7 +334,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const{ DebugLoc DL; - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(SrcReg, @@ -350,7 +350,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { if (SrcReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) .addReg(SrcReg, @@ -366,17 +366,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (RC == PPC::F4RCRegisterClass) { + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { // FIXME (64-bit): Enable @@ -402,7 +402,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { - unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; + unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; // rlwinm scratch, scratch, ShiftBits, 0, 31. NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(ShiftBits) @@ -414,7 +414,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (RC == PPC::CRBITRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should // not cause any bug. If we need other uses of CR bits, the following @@ -448,7 +448,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, PPC::CRRCRegisterClass, NewMIs); - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // STVX VAL, 0, R0 @@ -499,7 +499,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs)const{ - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), DestReg), FrameIdx)); @@ -508,7 +508,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::R11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { if (DestReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), FrameIdx)); @@ -517,13 +517,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::R11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); - } else if (RC == PPC::F4RCRegisterClass) { + } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); - } else if (RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { // FIXME: We need a scatch reg here. The trouble with using R0 is that // it's possible for the stack frame to be so big the save location is // out of range of immediate offsets, necessitating another register. @@ -537,7 +537,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { - unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) @@ -546,7 +546,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) .addReg(ScratchReg)); - } else if (RC == PPC::CRBITRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { unsigned Reg = 0; if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || @@ -577,7 +577,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, PPC::CRRCRegisterClass, NewMIs); - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // Dest = LVX 0, R0 diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 773578c..f248b5b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1053,6 +1053,10 @@ def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; +def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), + "crxor $dst, $dst, $dst", BrCR, + []>; + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1472,5 +1476,7 @@ def : Pat<(membarrier (i32 imm /*ll*/), (i32 imm /*device*/)), (SYNC)>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; + include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 9c2428b..2e90b7a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" @@ -68,52 +67,12 @@ PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { (EnablePPC64RS && Subtarget.isPPC64())); } -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { - using namespace PPC; - switch (RegEnum) { - case 0: return 0; - case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; - case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; - case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; - case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; - case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; - case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; - case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; - case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; - case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; - case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; - case R10: case X10: case F10: case V10: case CR2EQ: return 10; - case R11: case X11: case F11: case V11: case CR2UN: return 11; - case R12: case X12: case F12: case V12: case CR3LT: return 12; - case R13: case X13: case F13: case V13: case CR3GT: return 13; - case R14: case X14: case F14: case V14: case CR3EQ: return 14; - case R15: case X15: case F15: case V15: case CR3UN: return 15; - case R16: case X16: case F16: case V16: case CR4LT: return 16; - case R17: case X17: case F17: case V17: case CR4GT: return 17; - case R18: case X18: case F18: case V18: case CR4EQ: return 18; - case R19: case X19: case F19: case V19: case CR4UN: return 19; - case R20: case X20: case F20: case V20: case CR5LT: return 20; - case R21: case X21: case F21: case V21: case CR5GT: return 21; - case R22: case X22: case F22: case V22: case CR5EQ: return 22; - case R23: case X23: case F23: case V23: case CR5UN: return 23; - case R24: case X24: case F24: case V24: case CR6LT: return 24; - case R25: case X25: case F25: case V25: case CR6GT: return 25; - case R26: case X26: case F26: case V26: case CR6EQ: return 26; - case R27: case X27: case F27: case V27: case CR6UN: return 27; - case R28: case X28: case F28: case V28: case CR7LT: return 28; - case R29: case X29: case F29: case V29: case CR7GT: return 29; - case R30: case X30: case F30: case V30: case CR7EQ: return 30; - case R31: case X31: case F31: case V31: case CR7UN: return 31; - default: - llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); - } -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) - : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) { + : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, + ST.isPPC64() ? 0 : 1, + ST.isPPC64() ? 0 : 1), + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -519,7 +478,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) .addReg(Reg, RegState::Kill) - .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4) + .addImm(getPPCRegisterNumbering(SrcReg) * 4) .addImm(0) .addImm(31); @@ -668,10 +627,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); } -unsigned PPCRegisterInfo::getRARegister() const { - return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; -} - unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -688,27 +643,3 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const { unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } - -/// DWARFFlavour - Flavour of dwarf regnumbers -/// -namespace DWARFFlavour { - enum { - PPC64 = 0, PPC32 = 1 - }; -} - -int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - unsigned Flavour = Subtarget.isPPC64() ? - DWARFFlavour::PPC64 : DWARFFlavour::PPC32; - - return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, Flavour); -} - -int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { - // FIXME: Most probably dwarf numbers differs for Linux and Darwin - unsigned Flavour = Subtarget.isPPC64() ? - DWARFFlavour::PPC64 : DWARFFlavour::PPC32; - - return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour); -} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 33fe5eb..1cc7213 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -33,10 +33,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// PPC::F14, return the number that it corresponds to (e.g. 14). - static unsigned getRegisterNumbering(unsigned RegEnum); - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; @@ -62,15 +58,11 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 5ea9b0f..cf194de 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -15,7 +15,7 @@ #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include <cstdlib> #define GET_SUBTARGETINFO_TARGET_DESC diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index e0ea5ad..f5744b8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -16,76 +16,43 @@ #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; -// This is duplicated code. Refactor this. -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool NoExecStack) { - if (Triple(TT).isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - - return NULL; -} - extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); - - // Register the MC Code Emitter - TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); - - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend); - TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer); - TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer); } - -PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), +PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { - - if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isDarwin()) - setRelocationModel(Reloc::DynamicNoPIC); - else - setRelocationModel(Reloc::Static); - } } /// Override this for PowerPC. Tail merging happily breaks up instruction issue /// groups, which typically degrades performance. bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } -PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : PPCTargetMachine(T, TT, CPU, FS, false) { +PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { } -PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : PPCTargetMachine(T, TT, CPU, FS, true) { +PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { } @@ -110,19 +77,11 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) { - // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many - // instructions to materialize arbitrary global variable + function + - // constant pool addresses. - setRelocationModel(Reloc::PIC_); + if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. DisableJumpTables = true; - } else { - setRelocationModel(Reloc::Static); - } // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index baf07e3..d06f084 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -40,9 +40,9 @@ class PPCTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: - PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool is64Bit); + PPCTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -77,16 +77,18 @@ public: /// class PPC32TargetMachine : public PPCTargetMachine { public: - PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + PPC32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { public: - PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + PPC64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index ad607d0..5dc8568 100644 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -9,7 +9,7 @@ #include "PPC.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::ThePPC32Target, llvm::ThePPC64Target; diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 1e8c029..0000000 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMSparcDesc - SparcMCTargetDesc.cpp - SparcMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile deleted file mode 100644 index abcbe2d..0000000 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMSparcDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index cb92a2b..cb2a7df 100644 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "SparcMCTargetDesc.h" #include "SparcMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SparcGenInstrInfo.inc" @@ -35,8 +36,10 @@ static MCInstrInfo *createSparcMCInstrInfo() { return X; } -extern "C" void LLVMInitializeSparcMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); +static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSparcMCRegisterInfo(X, SP::I7); + return X; } static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,12 +49,31 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeSparcMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, - createSparcMCSubtargetInfo); +static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeSparcMCAsmInfo() { +extern "C" void LLVMInitializeSparcTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget); RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget, + createSparcMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target, + createSparcMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, + createSparcMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index edde842..345e1bc 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -22,9 +22,9 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 6f30d3f..d70b163 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -631,8 +631,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const assert(CalleeFn->hasStructRetAttr() && "Callee does not have the StructRet attribute."); - const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); + Type *ElementTy = Ty->getElementType(); return getTargetData()->getTypeAllocSize(ElementTy); } @@ -748,8 +748,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - // SPARC has no intrinsics for these particular operations. + // FIXME: There are instructions available for ATOMIC_FENCE + // on SparcV8 and later. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index 4e3ddf8..7a6bf50 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -17,8 +17,8 @@ #include "SparcSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index 0acdd2c..8c16251 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Type.h" @@ -31,7 +30,7 @@ using namespace llvm; SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii) - : SparcGenRegisterInfo(), Subtarget(st), TII(tii) { + : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) { } const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -113,10 +112,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, void SparcRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} -unsigned SparcRegisterInfo::getRARegister() const { - return SP::I7; -} - unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } @@ -130,11 +125,3 @@ unsigned SparcRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); return 0; } - -int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h index ec9e63a..f845667 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h +++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h @@ -46,15 +46,11 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp index de647e8..6c501cf 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp @@ -13,7 +13,7 @@ #include "SparcSubtarget.h" #include "Sparc.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index cbe6d87..3d7b4a4 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -13,7 +13,7 @@ #include "Sparc.h" #include "SparcTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeSparcTarget() { @@ -24,10 +24,11 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine ctor - Create an ILP32 architecture model /// -SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS), +SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), @@ -51,15 +52,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, } SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : SparcTargetMachine(T, TT, CPU, FS, false) { + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : SparcTargetMachine(T, TT, CPU, FS, true) { + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) { } diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h index 799fc49..3c907dd 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h @@ -33,9 +33,9 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcInstrInfo InstrInfo; SparcFrameLowering FrameLowering; public: - SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, - bool is64bit); + SparcTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -62,16 +62,18 @@ public: /// class SparcV8TargetMachine : public SparcTargetMachine { public: - SparcV8TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SparcV8TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; /// SparcV9TargetMachine - Sparc 64-bit target machine /// class SparcV9TargetMachine : public SparcTargetMachine { public: - SparcV9TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SparcV9TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index 5c06f07..c9d5b7b 100644 --- a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -9,7 +9,7 @@ #include "Sparc.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheSparcTarget; diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 2ac9016..0000000 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMSystemZDesc - SystemZMCTargetDesc.cpp - SystemZMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile deleted file mode 100644 index 08f1a9d..0000000 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 5a826a6..23fb1e0 100644 --- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "SystemZMCTargetDesc.h" #include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "SystemZGenInstrInfo.inc" @@ -35,9 +36,10 @@ static MCInstrInfo *createSystemZMCInstrInfo() { return X; } -extern "C" void LLVMInitializeSystemZMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, - createSystemZMCInstrInfo); +static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSystemZMCRegisterInfo(X, 0); + return X; } static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, @@ -48,11 +50,32 @@ static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, return X; } -extern "C" void LLVMInitializeSystemZMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, - createSystemZMCSubtargetInfo); +static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) + RM = Reloc::Static; + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeSystemZMCAsmInfo() { +extern "C" void LLVMInitializeSystemZTargetMC() { + // Register the MC asm info. RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, + createSystemZMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, + createSystemZMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheSystemZTarget, + createSystemZMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, + createSystemZMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index fd4d8b7..43dcdfc 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -28,10 +28,8 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 871c297..48ca99f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,6 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 99e2730..5f3dd80 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "SystemZGenInstrInfo.inc" diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 11a39fc..580d65b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -478,7 +478,7 @@ def MOV64rmm : RSYI<0x04EB, "lmg\t{$from, $to, $dst}", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, +let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1, Constraints = "$src = $dst" in { def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), "lhi\t${dst:subreg_even}, 0", diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 59692e8..b1050d4 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -33,7 +33,7 @@ using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(), TM(tm), TII(tii) { + : SystemZGenRegisterInfo(0), TM(tm), TII(tii) { } const unsigned* @@ -126,11 +126,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } -unsigned SystemZRegisterInfo::getRARegister() const { - assert(0 && "What is the return address register"); - return 0; -} - unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { assert(0 && "What is the frame register"); @@ -146,13 +141,3 @@ unsigned SystemZRegisterInfo::getEHHandlerRegister() const { assert(0 && "What is the exception handler register"); return 0; } - -int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "What is the dwarf register number"); - return -1; -} - -int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - assert(0 && "What is the dwarf register number"); - return -1; -} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 2e262e1..03935b2 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -48,15 +48,11 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index b3ed066..0845510 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -15,7 +15,7 @@ #include "SystemZ.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index 48298cc..e390f06 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -10,7 +10,7 @@ #include "SystemZTargetMachine.h" #include "SystemZ.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; extern "C" void LLVMInitializeSystemZTarget() { @@ -21,18 +21,15 @@ extern "C" void LLVMInitializeSystemZTarget() { /// SystemZTargetMachine ctor - Create an ILP64 architecture model /// SystemZTargetMachine::SystemZTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), + StringRef TT, StringRef CPU, + StringRef FS, Reloc::Model RM, + CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { - - if (getRelocationModel() == Reloc::Default) - setRelocationModel(Reloc::Static); } bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h index e40b556..43dce4b 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -37,8 +37,9 @@ class SystemZTargetMachine : public LLVMTargetMachine { SystemZSelectionDAGInfo TSInfo; SystemZFrameLowering FrameLowering; public: - SystemZTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + SystemZTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp index 8272b11..da99282 100644 --- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -9,7 +9,7 @@ #include "SystemZ.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheSystemZTarget; diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp index a42ce54..a2b83bc 100644 --- a/contrib/llvm/lib/Target/Target.cpp +++ b/contrib/llvm/lib/Target/Target.cpp @@ -17,6 +17,7 @@ #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include <cstring> @@ -39,6 +40,11 @@ void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) { unwrap(PM)->add(new TargetData(*unwrap(TD))); } +void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI, + LLVMPassManagerRef PM) { + unwrap(PM)->add(new TargetLibraryInfo(*unwrap(TLI))); +} + char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) { std::string StringRep = unwrap(TD)->getStringRepresentation(); return strdup(StringRep.c_str()); @@ -87,13 +93,13 @@ unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD, unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy, unsigned long long Offset) { - const StructType *STy = unwrap<StructType>(StructTy); + StructType *STy = unwrap<StructType>(StructTy); return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset); } unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy, unsigned Element) { - const StructType *STy = unwrap<StructType>(StructTy); + StructType *STy = unwrap<StructType>(StructTy); return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element); } diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp deleted file mode 100644 index a97b0e8..0000000 --- a/contrib/llvm/lib/Target/TargetAsmInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -using namespace llvm; - -TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) { - TLOF = &TM.getTargetLowering()->getObjFileLowering(); - TFI = TM.getFrameLowering(); - TRI = TM.getRegisterInfo(); - TFI->getInitialFrameState(InitialFrameState); -} diff --git a/contrib/llvm/lib/Target/TargetAsmLexer.cpp b/contrib/llvm/lib/Target/TargetAsmLexer.cpp deleted file mode 100644 index d4893ff..0000000 --- a/contrib/llvm/lib/Target/TargetAsmLexer.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetAsmLexer.h" -using namespace llvm; - -TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {} -TargetAsmLexer::~TargetAsmLexer() {} diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp index 17d022a..bd6a6b6 100644 --- a/contrib/llvm/lib/Target/TargetData.cpp +++ b/contrib/llvm/lib/Target/TargetData.cpp @@ -41,7 +41,7 @@ char TargetData::ID = 0; // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { +StructLayout::StructLayout(StructType *ST, const TargetData &TD) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; @@ -49,7 +49,7 @@ StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { - const Type *Ty = ST->getElementType(i); + Type *Ty = ST->getElementType(i); unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. @@ -139,6 +139,7 @@ void TargetData::init(StringRef Desc) { PointerMemSize = 8; PointerABIAlign = 8; PointerPrefAlign = PointerABIAlign; + StackNaturalAlign = 0; // Default alignments setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1 @@ -218,7 +219,12 @@ void TargetData::init(StringRef Desc) { Token = Split.second; } while (!Specifier.empty() || !Token.empty()); break; - + case 'S': // Stack natural alignment. + StackNaturalAlign = getInt(Specifier.substr(1)); + StackNaturalAlign /= 8; + // FIXME: Should we really be truncating these alingments and + // sizes silently? + break; default: break; } @@ -261,7 +267,7 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, /// preferred if ABIInfo = false) the target wants for the specified datatype. unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, bool ABIInfo, - const Type *Ty) const { + Type *Ty) const { // Check to see if we have an exact match and remember the best match we see. int BestMatchIdx = -1; int LargestInt = -1; @@ -315,7 +321,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, namespace { class StructLayoutMap { - typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; + typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy; LayoutInfoTy LayoutInfo; public: @@ -329,7 +335,7 @@ public: } } - StructLayout *&operator[](const StructType *STy) { + StructLayout *&operator[](StructType *STy) { return LayoutInfo[STy]; } @@ -343,7 +349,7 @@ TargetData::~TargetData() { delete static_cast<StructLayoutMap*>(LayoutMap); } -const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { +const StructLayout *TargetData::getStructLayout(StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); @@ -372,7 +378,9 @@ std::string TargetData::getStringRepresentation() const { OS << (LittleEndian ? "e" : "E") << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 - << ':' << PointerPrefAlign*8; + << ':' << PointerPrefAlign*8 + << "-S" << StackNaturalAlign*8; + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { const TargetAlignElem &AI = Alignments[i]; OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' @@ -389,14 +397,14 @@ std::string TargetData::getStringRepresentation() const { } -uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { +uint64_t TargetData::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: case Type::PointerTyID: return getPointerSizeInBits(); case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); } case Type::StructTyID: @@ -435,7 +443,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { +unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const { int AlignType = -1; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -485,7 +493,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { abi_or_pref, Ty); } -unsigned TargetData::getABITypeAlignment(const Type *Ty) const { +unsigned TargetData::getABITypeAlignment(Type *Ty) const { return getAlignment(Ty, true); } @@ -496,7 +504,7 @@ unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { } -unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { +unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) return Alignments[i].ABIAlign; @@ -504,11 +512,11 @@ unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { return getABITypeAlignment(Ty); } -unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const { +unsigned TargetData::getPrefTypeAlignment(Type *Ty) const { return getAlignment(Ty, false); } -unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { +unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const { unsigned Align = getPrefTypeAlignment(Ty); assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); return Log2_32(Align); @@ -521,16 +529,17 @@ IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { } -uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, - unsigned NumIndices) const { - const Type *Ty = ptrTy; +uint64_t TargetData::getIndexedOffset(Type *ptrTy, + ArrayRef<Value *> Indices) const { + Type *Ty = ptrTy; assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()"); uint64_t Result = 0; generic_gep_type_iterator<Value* const*> - TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices); - for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) { - if (const StructType *STy = dyn_cast<StructType>(*TI)) { + TI = gep_type_begin(ptrTy, Indices); + for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX; + ++CurIDX, ++TI) { + if (StructType *STy = dyn_cast<StructType>(*TI)) { assert(Indices[CurIDX]->getType() == Type::getInt32Ty(ptrTy->getContext()) && "Illegal struct idx"); @@ -561,7 +570,7 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, /// global. This includes an explicitly requested alignment (if the global /// has one). unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { - const Type *ElemType = GV->getType()->getElementType(); + Type *ElemType = GV->getType()->getElementType(); unsigned Alignment = getPrefTypeAlignment(ElemType); unsigned GVAlignment = GV->getAlignment(); if (GVAlignment >= Alignment) { diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp index 19fd581..122f869 100644 --- a/contrib/llvm/lib/Target/TargetFrameLowering.cpp +++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp @@ -23,14 +23,6 @@ using namespace llvm; TargetFrameLowering::~TargetFrameLowering() { } -/// getInitialFrameState - Returns a list of machine moves that are assumed -/// on entry to a function. -void -TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Default is to do nothing. -} - /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp index 703431b..56b7b69 100644 --- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -35,34 +35,16 @@ using namespace llvm; // Generic Code //===----------------------------------------------------------------------===// -TargetLoweringObjectFile::TargetLoweringObjectFile() : - Ctx(0), - TextSection(0), - DataSection(0), - BSSSection(0), - ReadOnlySection(0), - StaticCtorSection(0), - StaticDtorSection(0), - LSDASection(0), - CompactUnwindSection(0), - DwarfAbbrevSection(0), - DwarfInfoSection(0), - DwarfLineSection(0), - DwarfFrameSection(0), - DwarfPubNamesSection(0), - DwarfPubTypesSection(0), - DwarfDebugInlineSection(0), - DwarfStrSection(0), - DwarfLocSection(0), - DwarfARangesSection(0), - DwarfRangesSection(0), - DwarfMacroInfoSection(0), - TLSExtraDataSection(0), - CommDirectiveSupportsAlignment(true), - SupportsWeakOmittedEHFrame(true), - IsFunctionEHFrameSymbolPrivate(true) { +/// Initialize - this method must be called before any actual lowering is +/// done. This specifies the current context for codegen, and gives the +/// lowering implementations a chance to set up their default sections. +void TargetLoweringObjectFile::Initialize(MCContext &ctx, + const TargetMachine &TM) { + Ctx = &ctx; + InitMCObjectFileInfo(TM.getTargetTriple(), + TM.getRelocationModel(), TM.getCodeModel(), *Ctx); } - + TargetLoweringObjectFile::~TargetLoweringObjectFile() { } @@ -93,7 +75,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV) { /// known to have a type that is an array of 1/2/4 byte elements) ends with a /// nul value and contains no other nuls in it. static bool IsNullTerminatedString(const Constant *C) { - const ArrayType *ATy = cast<ArrayType>(C->getType()); + ArrayType *ATy = cast<ArrayType>(C->getType()); // First check: is we have constant array of i8 terminated with zero if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) { @@ -188,8 +170,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. - if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { - if (const IntegerType *ITy = + if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + if (IntegerType *ITy = dyn_cast<IntegerType>(ATy->getElementType())) { if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 || ITy->getBitWidth() == 32) && @@ -341,20 +323,3 @@ getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, } } } - -unsigned TargetLoweringObjectFile::getPersonalityEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getLSDAEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const { - return dwarf::DW_EH_PE_absptr; -} - -unsigned TargetLoweringObjectFile::getTTypeEncoding() const { - return dwarf::DW_EH_PE_absptr; -} - diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp index 74a1f4e..fe8a7ce 100644 --- a/contrib/llvm/lib/Target/TargetMachine.cpp +++ b/contrib/llvm/lib/Target/TargetMachine.cpp @@ -40,8 +40,6 @@ namespace llvm { bool JITExceptionHandling; bool JITEmitDebugInfo; bool JITEmitDebugInfoToDisk; - Reloc::Model RelocationModel; - CodeModel::Model CMModel; bool GuaranteedTailCallOpt; unsigned StackAlignmentOverride; bool RealignStack; @@ -49,6 +47,7 @@ namespace llvm { bool StrongPHIElim; bool HasDivModLibcall; bool AsmVerbosityDefault(false); + bool EnableSegmentedStacks; } static cl::opt<bool, true> @@ -143,38 +142,6 @@ EmitJitDebugInfoToDisk("jit-emit-debug-to-disk", cl::location(JITEmitDebugInfoToDisk), cl::init(false)); -static cl::opt<llvm::Reloc::Model, true> -DefRelocationModel("relocation-model", - cl::desc("Choose relocation model"), - cl::location(RelocationModel), - cl::init(Reloc::Default), - cl::values( - clEnumValN(Reloc::Default, "default", - "Target default relocation model"), - clEnumValN(Reloc::Static, "static", - "Non-relocatable code"), - clEnumValN(Reloc::PIC_, "pic", - "Fully relocatable, position independent code"), - clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", - "Relocatable external references, non-relocatable code"), - clEnumValEnd)); -static cl::opt<llvm::CodeModel::Model, true> -DefCodeModel("code-model", - cl::desc("Choose code model"), - cl::location(CMModel), - cl::init(CodeModel::Default), - cl::values( - clEnumValN(CodeModel::Default, "default", - "Target default code model"), - clEnumValN(CodeModel::Small, "small", - "Small code model"), - clEnumValN(CodeModel::Kernel, "kernel", - "Kernel code model"), - clEnumValN(CodeModel::Medium, "medium", - "Medium code model"), - clEnumValN(CodeModel::Large, "large", - "Large code model"), - clEnumValEnd)); static cl::opt<bool, true> EnableGuaranteedTailCallOpt("tailcallopt", cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), @@ -212,13 +179,20 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); +static cl::opt<bool, true> +SegmentedStacks("segmented-stacks", + cl::desc("Use segmented stacks if possible."), + cl::location(EnableSegmentedStacks), + cl::init(false)); + //--------------------------------------------------------------------------- // TargetMachine Class // TargetMachine::TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS) - : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0), + : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), + CodeGenInfo(0), AsmInfo(0), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), @@ -231,29 +205,24 @@ TargetMachine::TargetMachine(const Target &T, } TargetMachine::~TargetMachine() { + delete CodeGenInfo; delete AsmInfo; } /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. -Reloc::Model TargetMachine::getRelocationModel() { - return RelocationModel; -} - -/// setRelocationModel - Sets the code generation relocation model. -void TargetMachine::setRelocationModel(Reloc::Model Model) { - RelocationModel = Model; +Reloc::Model TargetMachine::getRelocationModel() const { + if (!CodeGenInfo) + return Reloc::Default; + return CodeGenInfo->getRelocationModel(); } /// getCodeModel - Returns the code model. The choices are small, kernel, /// medium, large, and target default. -CodeModel::Model TargetMachine::getCodeModel() { - return CMModel; -} - -/// setCodeModel - Sets the code model. -void TargetMachine::setCodeModel(CodeModel::Model Model) { - CMModel = Model; +CodeModel::Model TargetMachine::getCodeModel() const { + if (!CodeGenInfo) + return CodeModel::Default; + return CodeGenInfo->getCodeModel(); } bool TargetMachine::getAsmVerbosityDefault() { diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp index 90a8f8d..67239b8 100644 --- a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp @@ -98,44 +98,25 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, } const TargetRegisterClass * -llvm::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) { - // First take care of the trivial cases +TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B) const { + // First take care of the trivial cases. if (A == B) return A; if (!A || !B) return 0; - // If B is a subclass of A, it will be handled in the loop below - if (B->hasSubClass(A)) - return A; + // Register classes are ordered topologically, so the largest common + // sub-class it the common sub-class with the smallest ID. + const unsigned *SubA = A->getSubClassMask(); + const unsigned *SubB = B->getSubClassMask(); - const TargetRegisterClass *Best = 0; - for (TargetRegisterClass::sc_iterator I = A->subclasses_begin(); - const TargetRegisterClass *X = *I; ++I) { - if (X == B) - return B; // B is a subclass of A - - // X must be a common subclass of A and B - if (!B->hasSubClass(X)) - continue; - - // A superclass is definitely better. - if (!Best || Best->hasSuperClass(X)) { - Best = X; - continue; - } - - // A subclass is definitely worse - if (Best->hasSubClass(X)) - continue; - - // Best and *I have no super/sub class relation - pick the larger class, or - // the smaller spill size. - int nb = std::distance(Best->begin(), Best->end()); - int ni = std::distance(X->begin(), X->end()); - if (ni>nb || (ni==nb && X->getSize() < Best->getSize())) - Best = X; - } - return Best; + // We could start the search from max(A.ID, B.ID), but we are only going to + // execute 2-3 iterations anyway. + for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32) + if (unsigned Common = *SubA++ & *SubB++) + return getRegClass(Base + CountTrailingZeros_32(Common)); + + // No common sub-class exists. + return NULL; } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp index ec73087..1eaccff 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -7,20 +7,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Target/TargetAsmLexer.h" -#include "llvm/Target/TargetRegistry.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "X86.h" +#include "llvm/MC/MCTargetAsmLexer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; namespace { -class X86AsmLexer : public TargetAsmLexer { +class X86AsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; bool tentativeIsValid; @@ -60,8 +60,8 @@ protected: } } public: - X86AsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { + X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) + : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { } }; @@ -160,6 +160,6 @@ AsmToken X86AsmLexer::LexTokenIntel() { } extern "C" void LLVMInitializeX86AsmLexer() { - RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target); - RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); + RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target); + RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target); } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index d45dd35..cb4f15f 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmParser.h" -#include "X86.h" -#include "X86Subtarget.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -26,6 +24,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -33,7 +32,7 @@ using namespace llvm; namespace { struct X86Operand; -class X86ATTAsmParser : public TargetAsmParser { +class X86ATTAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; @@ -48,6 +47,7 @@ private: X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveCode(StringRef IDVal, SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -65,6 +65,10 @@ private: // FIXME: Can tablegen auto-generate this? return (STI.getFeatureBits() & X86::Mode64Bit) != 0; } + void SwitchMode() { + unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); + setAvailableFeatures(FB); + } /// @name Auto-generated Matcher Functions /// { @@ -76,7 +80,7 @@ private: public: X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : TargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -223,6 +227,21 @@ struct X86Operand : public MCParsedAsmOperand { (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } + bool isImmZExtu32u8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (Value <= 0x00000000000000FFULL); + } bool isImmSExti64i8() const { if (!isImm()) return false; @@ -382,19 +401,25 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, if (Tok.isNot(AsmToken::Identifier)) return Error(Tok.getLoc(), "invalid register name"); - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); // If the match failed, try the register name as lowercase. if (RegNo == 0) RegNo = MatchRegisterName(LowercaseString(Tok.getString())); - // FIXME: This should be done using Requires<In32BitMode> and - // Requires<In64BitMode> so "eiz" usage in 64-bit instructions - // can be also checked. - if (RegNo == X86::RIZ && !is64BitMode()) - return Error(Tok.getLoc(), "riz register in 64-bit mode only"); + if (!is64BitMode()) { + // FIXME: This should be done using Requires<In32BitMode> and + // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also + // checked. + // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a + // REX prefix. + if (RegNo == X86::RIZ || + X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || + X86II::isX86_64NonExtLowByteReg(RegNo) || + X86II::isX86_64ExtendedReg(RegNo)) + return Error(Tok.getLoc(), "register %" + + Tok.getString() + " is only available in 64-bit mode"); + } // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { @@ -472,7 +497,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() { SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; if (RegNo == X86::EIZ || RegNo == X86::RIZ) { - Error(Start, "eiz and riz can only be used as index registers"); + Error(Start, "%eiz and %riz can only be used as index registers"); return 0; } @@ -956,6 +981,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, // First, try a direct match. switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + default: break; case Match_Success: Out.EmitInstruction(Inst); return false; @@ -994,7 +1020,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; - MatchResultTy Match1, Match2, Match3, Match4; + unsigned Match1, Match2, Match3, Match4; Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; @@ -1096,6 +1122,8 @@ bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(2, DirectiveID.getLoc()); + else if (IDVal.startswith(".code")) + return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); return true; } @@ -1124,15 +1152,35 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// ParseDirectiveCode +/// ::= .code32 | .code64 +bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { + if (IDVal == ".code32") { + Parser.Lex(); + if (is64BitMode()) { + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); + } + } else if (IDVal == ".code64") { + Parser.Lex(); + if (!is64BitMode()) { + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); + } + } else { + return Error(L, "unexpected directive " + IDVal); + } + return false; +} extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); + RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4a0d2ec..3aacb20 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -21,13 +21,16 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_REGINFO_ENUM #include "X86GenRegisterInfo.inc" +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; @@ -64,8 +67,8 @@ extern Target TheX86_32Target, TheX86_64Target; static bool translateInstruction(MCInst &target, InternalInstruction &source); -X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : - MCDisassembler(), +X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : + MCDisassembler(STI), fMode(mode) { } @@ -106,28 +109,34 @@ static void logger(void* arg, const char* log) { // Public interface for the disassembler // -bool X86GenericDisassembler::getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const { +MCDisassembler::DecodeStatus +X86GenericDisassembler::getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const { InternalInstruction internalInstr; + + dlog_t loggerFn = logger; + if (&vStream == &nulls()) + loggerFn = 0; // Disable logging completely if it's going to nulls(). int ret = decodeInstruction(&internalInstr, regionReader, (void*)®ion, - logger, + loggerFn, (void*)&vStream, address, fMode); if (ret) { size = internalInstr.readerCursor - address; - return false; + return Fail; } else { size = internalInstr.length; - return !translateInstruction(instr, internalInstr); + return (!translateInstruction(instr, internalInstr)) ? Success : Fail; } } @@ -183,8 +192,46 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } } + // By default sign-extend all X86 immediates based on their encoding. + else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || + type == TYPE_IMM64) { + uint32_t Opcode = mcInst.getOpcode(); + switch (operand.encoding) { + default: + break; + case ENCODING_IB: + // Special case those X86 instructions that use the imm8 as a set of + // bits, bit count, etc. and are not sign-extend. + if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && + Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && + Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && + Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && + Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && + Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && + Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && + Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && + Opcode != X86::VINSERTPSrr) + type = TYPE_MOFFS8; + break; + case ENCODING_IW: + type = TYPE_MOFFS16; + break; + case ENCODING_ID: + type = TYPE_MOFFS32; + break; + case ENCODING_IO: + type = TYPE_MOFFS64; + break; + } + } switch (type) { + case TYPE_XMM128: + mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); + return; + case TYPE_XMM256: + mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); + return; case TYPE_MOFFS8: case TYPE_REL8: if(immediate & 0x80) @@ -543,12 +590,12 @@ static bool translateInstruction(MCInst &mcInst, return false; } -static MCDisassembler *createX86_32Disassembler(const Target &T) { - return new X86Disassembler::X86_32Disassembler; +static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_32Disassembler(STI); } -static MCDisassembler *createX86_64Disassembler(const Target &T) { - return new X86Disassembler::X86_64Disassembler; +static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_64Disassembler(STI); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h index 550cf9d..6ac9a0f 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h @@ -92,6 +92,7 @@ struct InternalInstruction; namespace llvm { class MCInst; +class MCSubtargetInfo; class MemoryObject; class raw_ostream; @@ -107,16 +108,17 @@ protected: /// Constructor - Initializes the disassembler. /// /// @param mode - The X86 architecture mode to decode for. - X86GenericDisassembler(DisassemblerMode mode); + X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode); public: ~X86GenericDisassembler(); /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. EDInstInfo *getEDInfo() const; @@ -127,24 +129,24 @@ private: /// X86_16Disassembler - 16-bit X86 disassembler. class X86_16Disassembler : public X86GenericDisassembler { public: - X86_16Disassembler() : - X86GenericDisassembler(MODE_16BIT) { + X86_16Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_16BIT) { } }; /// X86_16Disassembler - 32-bit X86 disassembler. class X86_32Disassembler : public X86GenericDisassembler { public: - X86_32Disassembler() : - X86GenericDisassembler(MODE_32BIT) { + X86_32Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_32BIT) { } }; /// X86_16Disassembler - 64-bit X86 disassembler. class X86_64Disassembler : public X86GenericDisassembler { public: - X86_64Disassembler() : - X86GenericDisassembler(MODE_64BIT) { + X86_64Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_64BIT) { } }; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index de1610b..f9b0fe5 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -58,8 +58,8 @@ static InstructionContext contextForAttrs(uint8_t attrMask) { * @return - TRUE if the ModR/M byte is required, FALSE otherwise. */ static int modRMRequired(OpcodeType type, - InstructionContext insnContext, - uint8_t opcode) { + InstructionContext insnContext, + uint8_t opcode) { const struct ContextDecision* decision = 0; switch (type) { @@ -391,7 +391,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -406,12 +406,14 @@ static int readPrefixes(struct InternalInstruction* insn) { consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ - - insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vexPrefix[2]) << 3) - | (rFromVEX2of3(insn->vexPrefix[1]) << 2) - | (xFromVEX2of3(insn->vexPrefix[1]) << 1) - | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + } switch (ppFromVEX3of3(insn->vexPrefix[2])) { @@ -433,7 +435,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { @@ -444,8 +446,10 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); - insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + } switch (ppFromVEX2of2(insn->vexPrefix[1])) { @@ -700,34 +704,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { } /* - * is64BitEquivalent - Determines whether two instruction names refer to - * equivalent instructions but one is 64-bit whereas the other is not. - * - * @param orig - The instruction that is not 64-bit - * @param equiv - The instruction that is 64-bit - */ -static BOOL is64BitEquivalent(const char* orig, const char* equiv) { - off_t i; - - for (i = 0;; i++) { - if (orig[i] == '\0' && equiv[i] == '\0') - return TRUE; - if (orig[i] == '\0' || equiv[i] == '\0') - return FALSE; - if (orig[i] != equiv[i]) { - if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') - continue; - if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') - continue; - if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') - continue; - return FALSE; - } - } -} - - -/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. @@ -763,8 +739,6 @@ static int getID(struct InternalInstruction* insn) { break; } - if (wFromVEX3of3(insn->vexPrefix[2])) - attrMask |= ATTR_REXW; if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } @@ -789,63 +763,55 @@ static int getID(struct InternalInstruction* insn) { } } else { - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; - } + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ - if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { /* - * Although for SSE instructions it is usually necessary to treat REX.W+F2 - * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is - * an occasional instruction where F2 is incidental and REX.W is the more - * significant. If the decoded instruction is 32-bit and adding REX.W - * instead of F2 changes a 32 to a 64, we adopt the new encoding. + * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit + * has precedence since there are no L-bit with W-bit entries in the tables. + * So if the L-bit isn't significant we should use the W-bit instead. */ - + const struct InstructionSpecifier *spec; - uint16_t instructionIDWithREXw; - const struct InstructionSpecifier *specWithREXw; - + uint16_t instructionIDWithWBit; + const struct InstructionSpecifier *specWithWBit; + spec = specifierForUID(instructionID); - - if (getIDWithAttrMask(&instructionIDWithREXw, + + if (getIDWithAttrMask(&instructionIDWithWBit, insn, - attrMask & (~ATTR_XD))) { - /* - * Decoding with REX.w would yield nothing; give up and return original - * decode. - */ - + (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = spec; return 0; } - - specWithREXw = specifierForUID(instructionIDWithREXw); - - if (is64BitEquivalent(spec->name, specWithREXw->name)) { - insn->instructionID = instructionIDWithREXw; - insn->spec = specWithREXw; + + specWithWBit = specifierForUID(instructionIDWithWBit); + + if (instructionID != instructionIDWithWBit) { + insn->instructionID = instructionIDWithWBit; + insn->spec = specWithWBit; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } - + if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -885,6 +851,43 @@ static int getID(struct InternalInstruction* insn) { } return 0; } + + if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && + insn->rexPrefix & 0x01) { + /* + * NOOP shouldn't decode as NOOP if REX.b is set. Instead + * it should decode as XCHG %r8, %eax. + */ + + const struct InstructionSpecifier *spec; + uint16_t instructionIDWithNewOpcode; + const struct InstructionSpecifier *specWithNewOpcode; + + spec = specifierForUID(instructionID); + + /* Borrow opcode from one of the other XCHGar opcodes */ + insn->opcode = 0x91; + + if (getIDWithAttrMask(&instructionIDWithNewOpcode, + insn, + attrMask)) { + insn->opcode = 0x90; + + insn->instructionID = instructionID; + insn->spec = spec; + return 0; + } + + specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); + + /* Change back */ + insn->opcode = 0x90; + + insn->instructionID = instructionIDWithNewOpcode; + insn->spec = specWithNewOpcode; + + return 0; + } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); @@ -1434,11 +1437,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { } /* - * readVVVV - Consumes an immediate operand from an instruction, given the - * desired operand size. + * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. * * @param insn - The instruction whose operand is to be read. - * @return - 0 if the immediate was successfully consumed; nonzero + * @return - 0 if the vvvv was successfully consumed; nonzero * otherwise. */ static int readVVVV(struct InternalInstruction* insn) { @@ -1451,6 +1453,9 @@ static int readVVVV(struct InternalInstruction* insn) { else return -1; + if (insn->mode != MODE_64BIT) + insn->vvvv &= 0x7; + return 0; } @@ -1463,8 +1468,14 @@ static int readVVVV(struct InternalInstruction* insn) { */ static int readOperands(struct InternalInstruction* insn) { int index; + int hasVVVV, needVVVV; dbgprintf(insn, "readOperands()"); + + /* If non-zero vvvv specified, need to make sure one of the operands + uses it. */ + hasVVVV = !readVVVV(insn); + needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { switch (insn->spec->operands[index].encoding) { @@ -1537,7 +1548,8 @@ static int readOperands(struct InternalInstruction* insn) { return -1; break; case ENCODING_VVVV: - if (readVVVV(insn)) + needVVVV = 0; /* Mark that we have found a VVVV operand. */ + if (!hasVVVV) return -1; if (fixupReg(insn, &insn->spec->operands[index])) return -1; @@ -1549,6 +1561,9 @@ static int readOperands(struct InternalInstruction* insn) { return -1; } } + + /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ + if (needVVVV) return -1; return 0; } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 70315ed..8b79335 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -81,12 +81,18 @@ enum attributeBits { "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ "but not the operands") \ + ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ "secondary") \ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ + ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ "opcode") \ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ @@ -104,7 +110,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ - ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt deleted file mode 100644 index 033973e..0000000 --- a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86AsmPrinter - X86ATTInstPrinter.cpp - X86IntelInstPrinter.cpp - X86InstComments.cpp - ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile deleted file mode 100644 index c82aa33..0000000 --- a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86AsmPrinter - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index c37d879..029d491 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -39,14 +39,17 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, OS << '%' << getRegisterName(RegNo); } -void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { +void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot) { // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) + if (CommentStream) { + printAnnotation(OS, Annot); EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + } } StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { @@ -90,7 +93,8 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { - O << '$' << Op.getImm(); + // Print X86 immediates as signed values. + O << '$' << (int64_t)Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 5426e5c..0293869 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -25,7 +25,7 @@ public: X86ATTInstPrinter(const MCAsmInfo &MAI); virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen, returns true if we successfully printed an diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp index 4e28dfe..8d85b95 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -14,9 +14,9 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "Utils/X86ShuffleDecode.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" -#include "../Utils/X86ShuffleDecode.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -136,9 +136,11 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::SHUFPDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::SHUFPDrmi: DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); break; case X86::SHUFPSrri: @@ -205,6 +207,31 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHPMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERMILPSri: + DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPSYri: + DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPDri: + DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERMILPDYri: + DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VPERM2F128rr: + DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; } diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 506e26c..f9ab5ae 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -32,12 +32,15 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } -void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { +void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot) { printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) + if (CommentStream) { + printAnnotation(OS, Annot); EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + } } StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index e84a194..6d5ec62 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -27,7 +27,7 @@ public: : MCInstPrinter(MAI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index ca88f8f..0000000 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMX86Desc - X86MCTargetDesc.cpp - X86MCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile deleted file mode 100644 index b19774e..0000000 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 9b556a5..69ad7d7 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmBackend.h" -#include "X86.h" -#include "X86FixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -24,9 +24,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; // Option to allow disabling arithmetic relaxation to workaround PR9807, which @@ -63,10 +62,10 @@ public: : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {} }; -class X86AsmBackend : public TargetAsmBackend { +class X86AsmBackend : public MCAsmBackend { public: X86AsmBackend(const Target &T) - : TargetAsmBackend() {} + : MCAsmBackend() {} unsigned getNumFixupKinds() const { return X86::NumTargetFixupKinds; @@ -81,7 +80,7 @@ public: }; if (Kind < FirstTargetFixupKind) - return TargetAsmBackend::getFixupKindInfo(Kind); + return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); @@ -94,6 +93,14 @@ public: assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); + + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert(isIntN(Size * 8 + 1, Value) && + "Value does not fit in the Fixup field"); + for (unsigned i = 0; i != Size; ++i) Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } @@ -426,8 +433,7 @@ public: } // end anonymous namespace -TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) @@ -439,8 +445,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, return new ELFX86_32AsmBackend(T, TheTriple.getOS()); } -TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, - const std::string &TT) { +MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h new file mode 100644 index 0000000..e6ba705 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -0,0 +1,548 @@ +//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the X86 target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef X86BASEINFO_H +#define X86BASEINFO_H + +#include "X86MCTargetDesc.h" +#include "llvm/Support/DataTypes.h" +#include <cassert> + +namespace llvm { + +namespace X86 { + // Enums for memory operand decoding. Each memory operand is represented with + // a 5 operand sequence in the form: + // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] + // These enums help decode this. + enum { + AddrBaseReg = 0, + AddrScaleAmt = 1, + AddrIndexReg = 2, + AddrDisp = 3, + + /// AddrSegmentReg - The operand # of the segment in the memory operand. + AddrSegmentReg = 4, + + /// AddrNumOperands - Total number of operands in a memory reference. + AddrNumOperands = 5 + }; +} // end namespace X86; + + +/// X86II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace X86II { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // X86 Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a + /// relocation of: + /// SYMBOL_LABEL + [. - PICBASELABEL] + MO_GOT_ABSOLUTE_ADDRESS, + + /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the + /// immediate should get the value of the symbol minus the PIC base label: + /// SYMBOL_LABEL - PICBASELABEL + MO_PIC_BASE_OFFSET, + + /// MO_GOT - On a symbol operand this indicates that the immediate is the + /// offset to the GOT entry for the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOT + MO_GOT, + + /// MO_GOTOFF - On a symbol operand this indicates that the immediate is + /// the offset to the location of the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTOFF + MO_GOTOFF, + + /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is + /// offset to the GOT entry for the symbol name from the current code + /// location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTPCREL + MO_GOTPCREL, + + /// MO_PLT - On a symbol operand this indicates that the immediate is + /// offset to the PLT entry of symbol name from the current code location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @PLT + MO_PLT, + + /// MO_TLSGD - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSGD + MO_TLSGD, + + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @GOTTPOFF + MO_GOTTPOFF, + + /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @INDNTPOFF + MO_INDNTPOFF, + + /// MO_TPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TPOFF + MO_TPOFF, + + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @NTPOFF + MO_NTPOFF, + + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "__imp_FOO" symbol. This is used for + /// dllimport linkage on windows. + MO_DLLIMPORT, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and earlier. + MO_DARWIN_STUB, + + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a + /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY, + + /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates + /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is + /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY_PIC_BASE, + + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this + /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", + /// which is a PIC-base-relative reference to a hidden dyld lazy pointer + /// stub. + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, + + /// MO_TLVP - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// This is the TLS offset for the Darwin TLS mechanism. + MO_TLVP, + + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate + /// is some TLS offset from the picbase. + /// + /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. + MO_TLVP_PIC_BASE + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for X86 + // instructions. + // + + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// Raw - This form is for instructions that don't have any operands, so + /// they are just a fixed opcode value, like 'leave'. + RawFrm = 1, + + /// AddRegFrm - This form is used for instructions like 'push r32' that have + /// their one register operand added to their opcode. + AddRegFrm = 2, + + /// MRMDestReg - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is a register. + /// + MRMDestReg = 3, + + /// MRMDestMem - This form is used for instructions that use the Mod/RM byte + /// to specify a destination, which in this case is memory. + /// + MRMDestMem = 4, + + /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is a register. + /// + MRMSrcReg = 5, + + /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte + /// to specify a source, which in this case is memory. + /// + MRMSrcMem = 6, + + /// MRM[0-7][rm] - These forms are used to represent instructions that use + /// a Mod/RM byte, and use the middle field to hold extended opcode + /// information. In the intel manual these are represented as /0, /1, ... + /// + + // First, instructions that operate on a register r/m operand... + MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3 + MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7 + + // Next, instructions that operate on a memory r/m operand... + MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 + MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 + + // MRMInitReg - This form is used for instructions whose source and + // destinations are the same register. + MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, + + /// RawFrmImm8 - This is used for the ENTER instruction, which has two + /// immediates, the first of which is a 16-bit immediate (specified by + /// the imm encoding) and the second is a 8-bit fixed value. + RawFrmImm8 = 43, + + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two + /// immediates, the first of which is a 16 or 32-bit immediate (specified by + /// the imm encoding) and the second is a 16-bit fixed value. In the AMD + /// manual, this operand is described as pntr16:32 and pntr16:16 + RawFrmImm16 = 44, + + FormMask = 63, + + //===------------------------------------------------------------------===// + // Actual flags... + + // OpSize - Set if this instruction requires an operand size prefix (0x66), + // which most often indicates that the instruction operates on 16 bit data + // instead of 32 bit data. + OpSize = 1 << 6, + + // AsSize - Set if this instruction requires an operand size prefix (0x67), + // which most often indicates that the instruction address 16 bit address + // instead of 32 bit address (or 32 bit address in 64 bit mode). + AdSize = 1 << 7, + + //===------------------------------------------------------------------===// + // Op0Mask - There are several prefix bytes that are used to form two byte + // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is + // used to obtain the setting of this field. If no bits in this field is + // set, there is no prefix byte for obtaining a multibyte opcode. + // + Op0Shift = 8, + Op0Mask = 0x1F << Op0Shift, + + // TB - TwoByte - Set if this instruction has a two byte opcode, which + // starts with a 0x0F byte before the real opcode. + TB = 1 << Op0Shift, + + // REP - The 0xF3 prefix byte indicating repetition of the following + // instruction. + REP = 2 << Op0Shift, + + // D8-DF - These escape opcodes are used by the floating point unit. These + // values must remain sequential. + D8 = 3 << Op0Shift, D9 = 4 << Op0Shift, + DA = 5 << Op0Shift, DB = 6 << Op0Shift, + DC = 7 << Op0Shift, DD = 8 << Op0Shift, + DE = 9 << Op0Shift, DF = 10 << Op0Shift, + + // XS, XD - These prefix codes are for single and double precision scalar + // floating point operations performed in the SSE registers. + XD = 11 << Op0Shift, XS = 12 << Op0Shift, + + // T8, TA, A6, A7 - Prefix after the 0x0F prefix. + T8 = 13 << Op0Shift, TA = 14 << Op0Shift, + A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, + + // TF - Prefix before and after 0x0F + TF = 17 << Op0Shift, + + //===------------------------------------------------------------------===// + // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. + // They are used to specify GPRs and SSE registers, 64-bit operand size, + // etc. We only cares about REX.W and REX.R bits and only the former is + // statically determined. + // + REXShift = Op0Shift + 5, + REX_W = 1 << REXShift, + + //===------------------------------------------------------------------===// + // This three-bit field describes the size of an immediate operand. Zero is + // unused so that we can tell if we forgot to set a value. + ImmShift = REXShift + 1, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm16PCRel = 4 << ImmShift, + Imm32 = 5 << ImmShift, + Imm32PCRel = 6 << ImmShift, + Imm64 = 7 << ImmShift, + + //===------------------------------------------------------------------===// + // FP Instruction Classification... Zero is non-fp instruction. + + // FPTypeMask - Mask for all of the FP types... + FPTypeShift = ImmShift + 3, + FPTypeMask = 7 << FPTypeShift, + + // NotFP - The default, set for instructions that do not use FP registers. + NotFP = 0 << FPTypeShift, + + // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0 + ZeroArgFP = 1 << FPTypeShift, + + // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst + OneArgFP = 2 << FPTypeShift, + + // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a + // result back to ST(0). For example, fcos, fsqrt, etc. + // + OneArgFPRW = 3 << FPTypeShift, + + // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an + // explicit argument, storing the result to either ST(0) or the implicit + // argument. For example: fadd, fsub, fmul, etc... + TwoArgFP = 4 << FPTypeShift, + + // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an + // explicit argument, but have no destination. Example: fucom, fucomi, ... + CompareFP = 5 << FPTypeShift, + + // CondMovFP - "2 operand" floating point conditional move instructions. + CondMovFP = 6 << FPTypeShift, + + // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. + SpecialFP = 7 << FPTypeShift, + + // Lock prefix + LOCKShift = FPTypeShift + 3, + LOCK = 1 << LOCKShift, + + // Segment override prefixes. Currently we just need ability to address + // stuff in gs and fs segments. + SegOvrShift = LOCKShift + 1, + SegOvrMask = 3 << SegOvrShift, + FS = 1 << SegOvrShift, + GS = 2 << SegOvrShift, + + // Execution domain for SSE instructions in bits 23, 24. + // 0 in bits 23-24 means normal, non-SSE instruction. + SSEDomainShift = SegOvrShift + 2, + + OpcodeShift = SSEDomainShift + 2, + + //===------------------------------------------------------------------===// + /// VEX - The opcode prefix used by AVX instructions + VEXShift = OpcodeShift + 8, + VEX = 1U << 0, + + /// VEX_W - Has a opcode specific functionality, but is used in the same + /// way as REX_W is for regular SSE instructions. + VEX_W = 1U << 1, + + /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + /// address instructions in SSE are represented as 3 address ones in AVX + /// and the additional register is encoded in VEX_VVVV prefix. + VEX_4V = 1U << 2, + + /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, + /// must be encoded in the i8 immediate field. This usually happens in + /// instructions with 4 operands. + VEX_I8IMM = 1U << 3, + + /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + /// instruction uses 256-bit wide registers. This is usually auto detected + /// if a VR256 register is used, but some AVX instructions also have this + /// field marked when using a f256 memory references. + VEX_L = 1U << 4, + + // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX + // prefix. Usually used for scalar instructions. Needed by disassembler. + VEX_LIG = 1U << 5, + + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the + /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents + /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction + /// storing a classifier in the imm8 field. To simplify our implementation, + /// we handle this by storeing the classifier in the opcode field and using + /// this flag to indicate that the encoder should do the wacky 3DNow! thing. + Has3DNow0F0FOpcode = 1U << 6 + }; + + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the + // specified machine instruction. + // + static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + return TSFlags >> X86II::OpcodeShift; + } + + static inline bool hasImm(uint64_t TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field + /// of the specified instruction. + static inline unsigned getSizeOfImm(uint64_t TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: + case X86II::Imm16PCRel: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(uint64_t TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm16PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } + + /// getMemoryOperandNo - The function returns the MCInst operand # for the + /// first field of the memory operand. If the instruction doesn't have a + /// memory operand, this returns -1. + /// + /// Note that this ignores tied operands. If there is a tied register which + /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only + /// counted as one operand. + /// + static inline int getMemoryOperandNo(uint64_t TSFlags) { + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); + default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddRegFrm: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + case X86II::RawFrmImm8: + case X86II::RawFrmImm16: + return -1; + case X86II::MRMDestMem: + return 0; + case X86II::MRMSrcMem: { + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + unsigned FirstMemOp = 1; + if (HasVEX_4V) + ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + // Opcode == X86::LEA16r || Opcode == X86::LEA32r) + return FirstMemOp; + } + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + return -1; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + return 0; + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + case X86II::MRM_F8: + case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: + return -1; + } + } + + /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or + /// higher) register? e.g. r8, xmm8, xmm13, etc. + static inline bool isX86_64ExtendedReg(unsigned RegNo) { + switch (RegNo) { + default: break; + case X86::R8: case X86::R9: case X86::R10: case X86::R11: + case X86::R12: case X86::R13: case X86::R14: case X86::R15: + case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: + case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: + case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: + case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: + case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: + case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: + case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: + return true; + } + return false; + } + + static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + return (reg == X86::SPL || reg == X86::BPL || + reg == X86::SIL || reg == X86::DIL); + } +} + +} // end namespace llvm; + +#endif diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index 17d242a..17d242a 100644 --- a/contrib/llvm/lib/Target/X86/X86FixupKinds.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86FixupKinds.h diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index ce8ef49..2eee112 100644 --- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -12,12 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "X86.h" -#include "X86InstrInfo.h" -#include "X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +47,7 @@ public: } static unsigned GetX86RegNum(const MCOperand &MO) { - return X86RegisterInfo::getX86RegNum(MO.getReg()); + return X86_MC::getX86RegNum(MO.getReg()); } // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range @@ -159,9 +161,11 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); - - if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) || - (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg()))) + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) return true; return false; } @@ -191,11 +195,11 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { const MCExpr *Expr = NULL; if (DispOp.isImm()) { - // If this is a simple integer displacement that doesn't require a relocation, - // emit it now. + // If this is a simple integer displacement that doesn't require a + // relocation, emit it now. if (FixupKind != FK_PCRel_1 && - FixupKind != FK_PCRel_2 && - FixupKind != FK_PCRel_4) { + FixupKind != FK_PCRel_2 && + FixupKind != FK_PCRel_4) { EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } @@ -205,7 +209,9 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, } // If we have an immoffset, add it to the expression. - if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) { + if ((FixupKind == FK_Data_4 || + FixupKind == MCFixupKind(X86::reloc_signed_4byte)) && + StartsWithGlobalOffsetTable(Expr)) { assert(ImmOffset == 0); FixupKind = MCFixupKind(X86::reloc_global_offset_table); @@ -346,7 +352,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, } // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { @@ -486,71 +492,100 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_L = 1; } - unsigned NumOps = MI.getNumOperands(); + // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned CurOp = 0; - bool IsDestMem = false; - switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); - case X86II::MRMDestMem: - IsDestMem = true; - // The important info for the VEX prefix is never beyond the address - // registers. Don't check beyond that. - NumOps = CurOp = X86::AddrNumOperands; + case X86II::MRMDestMem: { + // MRMDestMem instructions forms: + // MemAddr, src1(ModR/M) + // MemAddr, src1(VEX_4V), src2(ModR/M) + // MemAddr, src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + CurOp = X86::AddrNumOperands; + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + break; + } + case X86II::MRMSrcMem: { + // MRMSrcMem instructions forms: + // src1(ModR/M), MemAddr + // src1(ModR/M), src2(VEX_4V), MemAddr + // src1(ModR/M), MemAddr, imm8 + // src1(ModR/M), MemAddr, src2(VEX_I8IMM) + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_R = 0x0; + + unsigned MemAddrOffset = 1; + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, 1); + MemAddrOffset++; + } + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; + } case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMSrcMem: + // MRM[0-9]m instructions forms: + // MemAddr + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; case X86II::MRMSrcReg: - if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + // MRMSrcReg instructions forms: + // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M), src1(ModR/M) + // dst(ModR/M), src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; - if (HasVEX_4V) { - VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp); - CurOp++; - } - - // To only check operands before the memory address ones, start - // the search from the beginning - if (IsDestMem) - CurOp = 0; - - // If the last register should be encoded in the immediate field - // do not use any bit from VEX prefix to this register, ignore it - if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) - NumOps--; - - for (; CurOp != NumOps; ++CurOp) { - const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_B = 0x0; - if (!VEX_B && MO.isReg() && - ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && - X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_X = 0x0; - } - break; - default: // MRMDestReg, MRM0r-MRM7r, RawFrm - if (!MI.getNumOperands()) - break; - - if (MI.getOperand(CurOp).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) - VEX_B = 0; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); - - CurOp++; - for (; CurOp != NumOps; ++CurOp) { - const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && !HasVEX_4V && - X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - VEX_R = 0x0; - } + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: + // dst(ModR/M), src(ModR/M) + // dst(ModR/M), src(ModR/M), imm8 + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_R = 0x0; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 + VEX_4V = getVEXRegisterEncoding(MI, 0); + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_B = 0x0; + break; + default: // RawFrm break; } @@ -604,7 +639,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const MCOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; + if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. REX |= 0x40; // REX fixed encoding prefix @@ -615,25 +650,25 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 2; // set REX.R i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << 0; // set REX.B } break; case X86II::MRMSrcMem: { if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 2; // set REX.R unsigned Bit = 0; i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { - if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1) Bit++; } @@ -648,13 +683,13 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && MI.getOperand(e).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(e).getReg())) REX |= 1 << 2; // set REX.R unsigned Bit = 0; for (; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { - if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1) Bit++; } @@ -663,12 +698,12 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } default: if (MI.getOperand(0).isReg() && - X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) REX |= 1 << 0; // set REX.B i = isTwoAddr ? 2 : 1; for (unsigned e = NumOps; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); - if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) REX |= 1 << 2; // set REX.R } break; @@ -731,7 +766,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags & X86II::AdSize) || (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) EmitByte(0x67, CurByte, OS); - + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); @@ -834,7 +869,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; - // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -844,12 +878,11 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); - unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); - + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) BaseOpcode = 0x0F; // Weird 3DNow! encoding. - + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: @@ -861,7 +894,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; - case X86II::RawFrmImm8: EmitByte(BaseOpcode, CurByte, OS); EmitImmediate(MI.getOperand(CurOp++), @@ -1006,8 +1038,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); - bool IsExtReg = - X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); + bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); unsigned RegNum = (IsExtReg ? (1 << 7) : 0); RegNum |= GetX86RegNum(MO) << 4; EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, @@ -1030,7 +1061,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); - #ifndef NDEBUG // FIXME: Verify. diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index b77f37b..f98d5e3 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -13,12 +13,18 @@ #include "X86MCTargetDesc.h" #include "X86MCAsmInfo.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" #define GET_REGINFO_MC_DESC #include "X86GenRegisterInfo.inc" @@ -34,9 +40,16 @@ using namespace llvm; std::string X86_MC::ParseX86Triple(StringRef TT) { Triple TheTriple(TT); + std::string FS; if (TheTriple.getArch() == Triple::x86_64) - return "+64bit-mode"; - return "-64bit-mode"; + FS = "+64bit-mode"; + else + FS = "-64bit-mode"; + if (TheTriple.getOS() == Triple::NativeClient) + FS += ",+nacl-mode"; + else + FS += ",-nacl-mode"; + return FS; } /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the @@ -107,6 +120,135 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, } } +unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::x86_64) + return DWARFFlavour::X86_64; + + if (TheTriple.isOSDarwin()) + return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic; + if (TheTriple.getOS() == Triple::MinGW32 || + TheTriple.getOS() == Triple::Cygwin) + // Unsupported by now, just quick fallback + return DWARFFlavour::X86_32_Generic; + return DWARFFlavour::X86_32_Generic; +} + +/// getX86RegNum - This function maps LLVM register identifiers to their X86 +/// specific numbering, which is used in various places encoding instructions. +unsigned X86_MC::getX86RegNum(unsigned RegNo) { + switch(RegNo) { + case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; + case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; + case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; + case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; + case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: + return N86::ESP; + case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: + return N86::EBP; + case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: + return N86::ESI; + case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: + return N86::EDI; + + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + return N86::EAX; + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + return N86::ECX; + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + return N86::EDX; + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + return N86::EBX; + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + return N86::ESP; + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + return N86::EBP; + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + return N86::ESI; + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + return N86::EDI; + + case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: + case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: + return RegNo-X86::ST0; + + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: + return 0; + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: + return 1; + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: + return 2; + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: + return 3; + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: + return 4; + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: + return 5; + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: + return 6; + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: + return 7; + + case X86::ES: return 0; + case X86::CS: return 1; + case X86::SS: return 2; + case X86::DS: return 3; + case X86::FS: return 4; + case X86::GS: return 5; + + case X86::CR0: case X86::CR8 : case X86::DR0: return 0; + case X86::CR1: case X86::CR9 : case X86::DR1: return 1; + case X86::CR2: case X86::CR10: case X86::DR2: return 2; + case X86::CR3: case X86::CR11: case X86::DR3: return 3; + case X86::CR4: case X86::CR12: case X86::DR4: return 4; + case X86::CR5: case X86::CR13: case X86::DR5: return 5; + case X86::CR6: case X86::CR14: case X86::DR6: return 6; + case X86::CR7: case X86::CR15: case X86::DR7: return 7; + + // Pseudo index registers are equivalent to a "none" + // scaled index (See Intel Manual 2A, table 2-3) + case X86::EIZ: + case X86::RIZ: + return 4; + + default: + assert((int(RegNo) > 0) && "Unknown physical register!"); + return 0; + } +} + +void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) { + // FIXME: TableGen these. + for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) { + int SEH = X86_MC::getX86RegNum(Reg); + switch (Reg) { + case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: + case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: + case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: + case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: + case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: + case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: + case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: + case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: + case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: + case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + SEH += 8; + break; + } + MRI->mapLLVMRegToSEHReg(Reg, SEH); + } +} + MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { std::string ArchFS = X86_MC::ParseX86Triple(TT); @@ -131,55 +273,191 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -// Force static initialization. -extern "C" void LLVMInitializeX86MCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, - X86_MC::createX86MCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, - X86_MC::createX86MCSubtargetInfo); -} - static MCInstrInfo *createX86MCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitX86MCInstrInfo(X); return X; } -extern "C" void LLVMInitializeX86MCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); -} +static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { + Triple TheTriple(TT); + unsigned RA = (TheTriple.getArch() == Triple::x86_64) + ? X86::RIP // Should have dwarf #16. + : X86::EIP; // Should have dwarf #8. -static MCRegisterInfo *createX86MCRegisterInfo() { MCRegisterInfo *X = new MCRegisterInfo(); - InitX86MCRegisterInfo(X); + InitX86MCRegisterInfo(X, RA, + X86_MC::getDwarfRegFlavour(TT, false), + X86_MC::getDwarfRegFlavour(TT, true)); + X86_MC::InitLLVM2SEHRegisterMapping(X); return X; } -extern "C" void LLVMInitializeX86MCRegInfo() { - TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); -} - - static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); + bool is64Bit = TheTriple.getArch() == Triple::x86_64; + MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { - if (TheTriple.getArch() == Triple::x86_64) - return new X86_64MCAsmInfoDarwin(TheTriple); + if (is64Bit) + MAI = new X86_64MCAsmInfoDarwin(TheTriple); else - return new X86MCAsmInfoDarwin(TheTriple); + MAI = new X86MCAsmInfoDarwin(TheTriple); + } else if (TheTriple.isOSWindows()) { + MAI = new X86MCAsmInfoCOFF(TheTriple); + } else { + MAI = new X86ELFMCAsmInfo(TheTriple); } + // Initialize initial frame state. + // Calculate amount of bytes used for return address storing + int stackGrowth = is64Bit ? -8 : -4; + + // Initial state of the frame pointer is esp+stackGrowth. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(is64Bit ? X86::RSP : X86::ESP, stackGrowth); + MAI->addInitialFrameState(0, Dst, Src); + + // Add return address to move list + MachineLocation CSDst(is64Bit ? X86::RSP : X86::ESP, stackGrowth); + MachineLocation CSSrc(is64Bit ? X86::RIP : X86::EIP); + MAI->addInitialFrameState(0, CSDst, CSSrc); + + return MAI; +} + +static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + Triple T(TT); + bool is64Bit = T.getArch() == Triple::x86_64; + + if (RM == Reloc::Default) { + // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. + // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we + // use static relocation model by default. + if (T.isOSDarwin()) { + if (is64Bit) + RM = Reloc::PIC_; + else + RM = Reloc::DynamicNoPIC; + } else if (T.isOSWindows() && is64Bit) + RM = Reloc::PIC_; + else + RM = Reloc::Static; + } + + // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC + // is defined as a model for code which may be used in static or dynamic + // executables but not necessarily a shared library. On X86-32 we just + // compile in -static mode, in x86-64 we use PIC. + if (RM == Reloc::DynamicNoPIC) { + if (is64Bit) + RM = Reloc::PIC_; + else if (!T.isOSDarwin()) + RM = Reloc::Static; + } + + // If we are on Darwin, disallow static relocation model in X86-64 mode, since + // the Mach-O file format doesn't support it. + if (RM == Reloc::Static && T.isOSDarwin() && is64Bit) + RM = Reloc::PIC_; + + // For static codegen, if we're not already set, use Small codegen. + if (CM == CodeModel::Default) + CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) + // 64-bit JIT places everything in the same buffer except external funcs. + CM = is64Bit ? CodeModel::Large : CodeModel::Small; + + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) + return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); + if (TheTriple.isOSWindows()) - return new X86MCAsmInfoCOFF(TheTriple); + return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll); + + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); +} + +static MCInstPrinter *createX86MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return new X86ATTInstPrinter(MAI); + if (SyntaxVariant == 1) + return new X86IntelInstPrinter(MAI); + return 0; +} - return new X86ELFMCAsmInfo(TheTriple); +static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) { + return new MCInstrAnalysis(Info); } -extern "C" void LLVMInitializeX86MCAsmInfo() { - // Register the target asm info. +// Force static initialization. +extern "C" void LLVMInitializeX86TargetMC() { + // Register the MC asm info. RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); + + // Register the MC codegen info. + RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo); + RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, + X86_MC::createX86MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, + X86_MC::createX86MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target, + createX86MCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target, + createX86MCInstrAnalysis); + + // Register the code emitter. + TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target, + createX86MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target, + createX86MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheX86_64Target, + createX86_64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target, + createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target, + createMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target, + createX86MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheX86_64Target, + createX86MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 89ea22b..c144c51 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -14,15 +14,39 @@ #ifndef X86MCTARGETDESC_H #define X86MCTARGETDESC_H +#include "llvm/Support/DataTypes.h" #include <string> namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; +class raw_ostream; extern Target TheX86_32Target, TheX86_64Target; +/// DWARFFlavour - Flavour of dwarf regnumbers +/// +namespace DWARFFlavour { + enum { + X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 + }; +} + +/// N86 namespace - Native X86 register numbers +/// +namespace N86 { + enum { + EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 + }; +} + namespace X86_MC { std::string ParseX86Triple(StringRef TT); @@ -33,13 +57,32 @@ namespace X86_MC { void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); - /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance. + unsigned getDwarfRegFlavour(StringRef TT, bool isEH); + + unsigned getX86RegNum(unsigned RegNo); + + void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); + + /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance. /// This is exposed so Asm parser, etc. do not need to go through /// TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS); } +MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT); + +/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 3711038..f0f1982 100644 --- a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "X86.h" -#include "X86FixupKinds.h" -#include "llvm/ADT/Twine.h" +#include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Object/MachOFormat.h" diff --git a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp b/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp deleted file mode 100644 index 13680c5..0000000 --- a/contrib/llvm/lib/Target/X86/SSEDomainFix.cpp +++ /dev/null @@ -1,506 +0,0 @@ -//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SSEDomainFix pass. -// -// Some SSE instructions like mov, and, or, xor are available in different -// variants for different operand types. These variant instructions are -// equivalent, but on Nehalem and newer cpus there is extra latency -// transferring data between integer and floating point domains. -// -// This pass changes the variant instructions to minimize domain crossings. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sse-domain-fix" -#include "X86InstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track -/// of execution domains. -/// -/// An open DomainValue represents a set of instructions that can still switch -/// execution domain. Multiple registers may refer to the same open -/// DomainValue - they will eventually be collapsed to the same execution -/// domain. -/// -/// A collapsed DomainValue represents a single register that has been forced -/// into one of more execution domains. There is a separate collapsed -/// DomainValue for each register, but it may contain multiple execution -/// domains. A register value is initially created in a single execution -/// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact the the register is now available in multiple -/// domains. -namespace { -struct DomainValue { - // Basic reference counting. - unsigned Refs; - - // Bitmask of available domains. For an open DomainValue, it is the still - // possible domains for collapsing. For a collapsed DomainValue it is the - // domains where the register is available for free. - unsigned AvailableDomains; - - // Position of the last defining instruction. - unsigned Dist; - - // Twiddleable instructions using or defining these registers. - SmallVector<MachineInstr*, 8> Instrs; - - // A collapsed DomainValue has no instructions to twiddle - it simply keeps - // track of the domains where the registers are already available. - bool isCollapsed() const { return Instrs.empty(); } - - // Is domain available? - bool hasDomain(unsigned domain) const { - return AvailableDomains & (1u << domain); - } - - // Mark domain as available. - void addDomain(unsigned domain) { - AvailableDomains |= 1u << domain; - } - - // Restrict to a single domain available. - void setSingleDomain(unsigned domain) { - AvailableDomains = 1u << domain; - } - - // Return bitmask of domains that are available and in mask. - unsigned getCommonDomains(unsigned mask) const { - return AvailableDomains & mask; - } - - // First domain available. - unsigned getFirstDomain() const { - return CountTrailingZeros_32(AvailableDomains); - } - - DomainValue() { clear(); } - - void clear() { - Refs = AvailableDomains = Dist = 0; - Instrs.clear(); - } -}; -} - -static const unsigned NumRegs = 16; - -namespace { -class SSEDomainFixPass : public MachineFunctionPass { - static char ID; - SpecificBumpPtrAllocator<DomainValue> Allocator; - SmallVector<DomainValue*,16> Avail; - - MachineFunction *MF; - const X86InstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineBasicBlock *MBB; - DomainValue **LiveRegs; - typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; - LiveOutMap LiveOuts; - unsigned Distance; - -public: - SSEDomainFixPass() : MachineFunctionPass(ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "SSE execution domain fixup"; - } - -private: - // Register mapping. - int RegIndex(unsigned Reg); - - // DomainValue allocation. - DomainValue *Alloc(int domain = -1); - void Recycle(DomainValue*); - - // LiveRegs manipulations. - void SetLiveReg(int rx, DomainValue *DV); - void Kill(int rx); - void Force(int rx, unsigned domain); - void Collapse(DomainValue *dv, unsigned domain); - bool Merge(DomainValue *A, DomainValue *B); - - void enterBasicBlock(); - void visitGenericInstr(MachineInstr*); - void visitSoftInstr(MachineInstr*, unsigned mask); - void visitHardInstr(MachineInstr*, unsigned domain); -}; -} - -char SSEDomainFixPass::ID = 0; - -/// Translate TRI register number to an index into our smaller tables of -/// interesting registers. Return -1 for boring registers. -int SSEDomainFixPass::RegIndex(unsigned reg) { - assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); - reg -= X86::XMM0; - return reg < NumRegs ? (int) reg : -1; -} - -DomainValue *SSEDomainFixPass::Alloc(int domain) { - DomainValue *dv = Avail.empty() ? - new(Allocator.Allocate()) DomainValue : - Avail.pop_back_val(); - dv->Dist = Distance; - if (domain >= 0) - dv->addDomain(domain); - return dv; -} - -void SSEDomainFixPass::Recycle(DomainValue *dv) { - assert(dv && "Cannot recycle NULL"); - dv->clear(); - Avail.push_back(dv); -} - -/// Set LiveRegs[rx] = dv, updating reference counts. -void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs) { - LiveRegs = new DomainValue*[NumRegs]; - std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); - } - - if (LiveRegs[rx] == dv) - return; - if (LiveRegs[rx]) { - assert(LiveRegs[rx]->Refs && "Bad refcount"); - if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); - } - LiveRegs[rx] = dv; - if (dv) ++dv->Refs; -} - -// Kill register rx, recycle or collapse any DomainValue. -void SSEDomainFixPass::Kill(int rx) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs || !LiveRegs[rx]) return; - - // Before killing the last reference to an open DomainValue, collapse it to - // the first available domain. - if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) - Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); - else - SetLiveReg(rx, 0); -} - -/// Force register rx into domain. -void SSEDomainFixPass::Force(int rx, unsigned domain) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - DomainValue *dv; - if (LiveRegs && (dv = LiveRegs[rx])) { - if (dv->isCollapsed()) - dv->addDomain(domain); - else if (dv->hasDomain(domain)) - Collapse(dv, domain); - else { - // This is an incompatible open DomainValue. Collapse it to whatever and force - // the new value into domain. This costs a domain crossing. - Collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx] && "Not live after collapse?"); - LiveRegs[rx]->addDomain(domain); - } - } else { - // Set up basic collapsed DomainValue. - SetLiveReg(rx, Alloc(domain)); - } -} - -/// Collapse open DomainValue into given domain. If there are multiple -/// registers using dv, they each get a unique collapsed DomainValue. -void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { - assert(dv->hasDomain(domain) && "Cannot collapse"); - - // Collapse all the instructions. - while (!dv->Instrs.empty()) - TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain); - dv->setSingleDomain(domain); - - // If there are multiple users, give them new, unique DomainValues. - if (LiveRegs && dv->Refs > 1) - for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == dv) - SetLiveReg(rx, Alloc(domain)); -} - -/// Merge - All instructions and registers in B are moved to A, and B is -/// released. -bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { - assert(!A->isCollapsed() && "Cannot merge into collapsed"); - assert(!B->isCollapsed() && "Cannot merge from collapsed"); - if (A == B) - return true; - // Restrict to the domains that A and B have in common. - unsigned common = A->getCommonDomains(B->AvailableDomains); - if (!common) - return false; - A->AvailableDomains = common; - A->Dist = std::max(A->Dist, B->Dist); - A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); - for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == B) - SetLiveReg(rx, A); - return true; -} - -void SSEDomainFixPass::enterBasicBlock() { - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), - e = MBB->livein_end(); i != e; ++i) { - int rx = RegIndex(*i); - if (rx < 0) continue; - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) continue; - DomainValue *pdv = fi->second[rx]; - if (!pdv) continue; - if (!LiveRegs || !LiveRegs[rx]) { - SetLiveReg(rx, pdv); - continue; - } - - // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx]->isCollapsed()) { - // We are already collapsed, but predecessor is not. Force him. - unsigned domain = LiveRegs[rx]->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - Collapse(pdv, domain); - continue; - } - - // Currently open, merge in predecessor. - if (!pdv->isCollapsed()) - Merge(LiveRegs[rx], pdv); - else - Force(rx, pdv->getFirstDomain()); - } - } -} - -// A hard instruction only works in one domain. All input registers will be -// forced into that domain. -void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { - // Collapse all uses. - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - Force(rx, domain); - } - - // Kill all defs and force them. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - Kill(rx); - Force(rx, domain); - } -} - -// A soft instruction can be changed to work in other domains given by mask. -void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { - // Bitmask of available domains for this instruction after taking collapsed - // operands into account. - unsigned available = mask; - - // Scan the explicit use operands for incoming domains. - SmallVector<int, 4> used; - if (LiveRegs) - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx]) { - // Bitmask of domains that dv and available have in common. - unsigned common = dv->getCommonDomains(available); - // Is it possible to use this collapsed register for free? - if (dv->isCollapsed()) { - // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain - // penalty for this operand. - if (common) available = common; - } else if (common) - // Open DomainValue is compatible, save it for merging. - used.push_back(rx); - else - // Open DomainValue is not compatible with instruction. It is useless - // now. - Kill(rx); - } - } - - // If the collapsed operands force a single domain, propagate the collapse. - if (isPowerOf2_32(available)) { - unsigned domain = CountTrailingZeros_32(available); - TII->SetSSEDomain(mi, domain); - visitHardInstr(mi, domain); - return; - } - - // Kill off any remaining uses that don't match available, and build a list of - // incoming DomainValues that we want to merge. - SmallVector<DomainValue*,4> doms; - for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { - int rx = *i; - DomainValue *dv = LiveRegs[rx]; - // This useless DomainValue could have been missed above. - if (!dv->getCommonDomains(available)) { - Kill(*i); - continue; - } - // sorted, uniqued insert. - bool inserted = false; - for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); - i != e && !inserted; ++i) { - if (dv == *i) - inserted = true; - else if (dv->Dist < (*i)->Dist) { - inserted = true; - doms.insert(i, dv); - } - } - if (!inserted) - doms.push_back(dv); - } - - // doms are now sorted in order of appearance. Try to merge them all, giving - // priority to the latest ones. - DomainValue *dv = 0; - while (!doms.empty()) { - if (!dv) { - dv = doms.pop_back_val(); - continue; - } - - DomainValue *latest = doms.pop_back_val(); - if (Merge(dv, latest)) continue; - - // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) - if (LiveRegs[*i] == latest) - Kill(*i); - } - - // dv is the DomainValue we are going to use for this instruction. - if (!dv) - dv = Alloc(); - dv->Dist = Distance; - dv->AvailableDomains = available; - dv->Instrs.push_back(mi); - - // Finally set all defs and non-collapsed uses to dv. - for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { - Kill(rx); - SetLiveReg(rx, dv); - } - } -} - -void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any XMM registers redefined. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - Kill(rx); - } -} - -bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); - TRI = MF->getTarget().getRegisterInfo(); - MBB = 0; - LiveRegs = 0; - Distance = 0; - assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); - - // If no XMM registers are used in the function, we can skip it completely. - bool anyregs = false; - for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), - E = X86::VR128RegClass.end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { - anyregs = true; - break; - } - if (!anyregs) return false; - - MachineBasicBlock *Entry = MF->begin(); - SmallPtrSet<MachineBasicBlock*, 16> Visited; - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > - DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { - MBB = *DFI; - enterBasicBlock(); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *mi = I; - if (mi->isDebugValue()) continue; - ++Distance; - std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi); - if (domp.first) - if (domp.second) - visitSoftInstr(mi, domp.second); - else - visitHardInstr(mi, domp.first); - else if (LiveRegs) - visitGenericInstr(mi); - } - - // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); - LiveRegs = 0; - } - - // Clear the LiveOuts vectors. Should we also collapse any remaining - // DomainValues? - for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); - i != e; ++i) - delete[] i->second; - LiveOuts.clear(); - Avail.clear(); - Allocator.DestroyAll(); - - return false; -} - -FunctionPass *llvm::createSSEDomainFixPass() { - return new SSEDomainFixPass(); -} diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp index 08d4d84..52a67f7 100644 --- a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp +++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -9,7 +9,7 @@ #include "X86.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheX86_32Target, llvm::TheX86_64Target; diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt deleted file mode 100644 index 3ad5f99..0000000 --- a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86Utils - X86ShuffleDecode.cpp - ) -add_dependencies(LLVMX86Utils X86CodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile deleted file mode 100644 index 1df6f0f..0000000 --- a/contrib/llvm/lib/Target/X86/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Utils - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index cd06060..aeb3309 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -167,24 +167,77 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts / 2; - for (unsigned s = 0; s < NumSections; ++s) { + unsigned End = NumLaneElts / 2; + for (unsigned s = 0; s < NumLanes; ++s) { for (unsigned i = Start; i != End; ++i) { ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } } +// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes and the mask of the first lane must +// be the same of the second. +void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumLanes = (NumElts*32)/128; + unsigned LaneSize = NumElts/NumLanes; + + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = 0; i != LaneSize; ++i) { + unsigned Idx = (Imm >> (i*2)) & 0x3 ; + ShuffleMask.push_back(Idx+(l*LaneSize)); + } + } +} + +// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes but the mask of the first lane can +// be the different of the second (not like VPERMILPS). +void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumLanes = (NumElts*64)/128; + unsigned LaneSize = NumElts/NumLanes; + + for (unsigned l = 0; l < NumLanes; ++l) { + for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { + unsigned Idx = (Imm >> i) & 0x1; + ShuffleMask.push_back(Idx+(l*LaneSize)); + } + } +} + +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned HalfSize = VT.getVectorNumElements()/2; + unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; + unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; + + for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); + for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); +} + +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only + // has information about the instruction and not the types. So for + // instruction comments purpose, assume the 256-bit vector is v4i64. + return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask); +} + } // llvm namespace diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index b18f670..58193e6 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -82,6 +82,26 @@ void DecodeUNPCKLPDMask(unsigned NElts, void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); + +// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes and the mask of the first lane must +// be the same of the second. +void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit +// elements. For 256-bit vectors, it's considered as two 128 lanes, the +// referenced elements can't cross lanes but the mask of the first lane can +// be the different of the second (not like VPERMILPS). +void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeVPERM2F128Mask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + } // llvm namespace #endif diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index ec52dfb..81e9422 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -24,16 +25,8 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; class MachineCodeEmitter; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCObjectWriter; -class MCSubtargetInfo; class Target; -class TargetAsmBackend; class X86TargetMachine; -class formatted_raw_ostream; -class raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -51,22 +44,16 @@ FunctionPass* createGlobalBaseRegPass(); /// FunctionPass *createX86FloatingPointStackifierPass(); -/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain -/// crossings. -FunctionPass *createSSEDomainFixPass(); +/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions +/// before each call to avoid transition penalty between functions encoded with +/// AVX and SSE. +FunctionPass *createX86IssueVZeroUpperPass(); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); - -TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &); -TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); - /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically /// allocated chunk of memory. @@ -79,13 +66,6 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentHeuristicPass(); - -/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. -MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); - } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 4ccb43f..104b91f 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This is a target description file for the Intel i386 architecture, referred to -// here as the "X86" architecture. +// This is a target description file for the Intel i386 architecture, referred +// to here as the "X86" architecture. // //===----------------------------------------------------------------------===// @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; +def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true", + "Native Client mode">; + //===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// @@ -68,6 +71,9 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions", [FeatureCMOV]>; +def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true", + "64-bit with cmpxchg16b", + [Feature64Bit]>; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", @@ -90,6 +96,16 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "Allow unaligned memory operands on vector/SIMD instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions">; +def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", + "Support MOVBE instruction">; +def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", + "Support RDRAND instruction">; +def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", + "Support 16-bit floating point conversion instructions">; +def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", + "Support LZCNT instruction">; +def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", + "Support BMI instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -112,27 +128,43 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE, + FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 -def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES]>; -def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; +def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>; +def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES, FeatureCLMUL]>; +def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureSlowBTMem, FeatureFastUAMem, FeatureAES, + FeatureCLMUL]>; +// Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. // FIXME: Disabling AVX for now since it's not ready. -def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit, +def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, FeatureCLMUL]>; +// Ivy Bridge +def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, + FeatureRDRAND, FeatureF16C]>; + +// Haswell +def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, + FeatureCLMUL, FeatureRDRAND, FeatureF16C, + FeatureFMA3, FeatureMOVBE, FeatureLZCNT, + FeatureBMI]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -150,19 +182,21 @@ def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit, +def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; + Feature3DNowA, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A, - Feature3DNowA]>; -def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA, FeatureCMPXCHG16B, + FeatureSlowBTMem]>; +def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B, + FeatureSSE4A, Feature3DNowA]>; +def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp index 99b4479..4c3ff02 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -35,12 +35,12 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/SmallString.h" using namespace llvm; @@ -504,8 +504,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .indirect_symbol _foo OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(), MCSA_IndirectSymbol); - // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12. - const char HltInsts[] = { -12, -12, -12, -12, -12 }; + // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. + const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } @@ -708,21 +708,8 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // Target Registry Stuff //===----------------------------------------------------------------------===// -static MCInstPrinter *createX86MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI); - if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI); - return 0; -} - // Force static initialization. extern "C" void LLVMInitializeX86AsmPrinter() { RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target); RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target); - - TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); } diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp index 4b11db7..aeff03a 100644 --- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -98,8 +98,6 @@ namespace { void emitMemModRMByte(const MachineInstr &MI, unsigned Op, unsigned RegOpcodeField, intptr_t PCAdj = 0); - - unsigned getX86RegNum(unsigned RegNo) const; }; template<class CodeEmitter> @@ -169,7 +167,7 @@ static unsigned determineREX(const MachineInstr &MI) { const MachineOperand& MO = MI.getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); - if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) + if (X86II::isX86_64NonExtLowByteReg(Reg)) REX |= 0x40; } } @@ -346,11 +344,6 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, MCE.emitWordLE(0); } -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const { - return X86RegisterInfo::getX86RegNum(RegNo); -} - inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); @@ -360,7 +353,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, template<class CodeEmitter> void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){ - MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); + MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg))); } template<class CodeEmitter> @@ -498,7 +491,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // 2-7) and absolute references. unsigned BaseRegNo = -1U; if (BaseReg != 0 && BaseReg != X86::RIP) - BaseRegNo = getX86RegNum(BaseReg); + BaseRegNo = X86_MC::getX86RegNum(BaseReg); if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && @@ -566,7 +559,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, } // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { @@ -574,15 +567,15 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); + IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg); unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); + IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); else IndexRegNo = 4; // For example [ESP+1*<noreg>+4] emitSIBByte(SS, IndexRegNo, BaseRegNo); @@ -809,7 +802,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, } case X86II::AddRegFrm: { - MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); + MCE.emitByte(BaseOpcode + + X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); if (CurOp == NumOps) break; @@ -844,7 +838,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -854,7 +848,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, - getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) + X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) .getReg())); CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) @@ -866,7 +860,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMSrcReg: MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp+1).getReg(), - getX86RegNum(MI.getOperand(CurOp).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -880,8 +874,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, X86II::getSizeOfImm(Desc->TSFlags) : 0; MCE.emitByte(BaseOpcode); - emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), - PCAdj); + emitMemModRMByte(MI, CurOp+1, + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); CurOp += AddrOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), @@ -968,7 +962,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp).getReg())); + X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp index f1d7ede..4a72d15 100644 --- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp @@ -147,7 +147,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) return SymOffset - (RelOffset + 4); else - assert("computeRelocation unknown for this relocation type"); + assert(0 && "computeRelocation unknown for this relocation type"); return 0; } diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index 21e163a..f912b28 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -22,6 +22,7 @@ #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" @@ -59,8 +60,8 @@ public: explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); + X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -134,7 +135,7 @@ private: (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); bool IsMemcpySmall(uint64_t Len); @@ -144,7 +145,7 @@ private: } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { +bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -198,8 +199,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, RC = X86::GR64RegisterClass; break; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::MOVSSrm; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp32m; @@ -207,8 +208,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::MOVSDrm; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp64m; @@ -250,10 +251,12 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { case MVT::i32: Opc = X86::MOV32mr; break; case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. case MVT::f32: - Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; + Opc = X86ScalarSSEf32 ? + (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; break; case MVT::f64: - Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; + Opc = X86ScalarSSEf64 ? + (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; } @@ -336,7 +339,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { U = C; } - if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) + if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -399,7 +402,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); continue; @@ -465,14 +468,23 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Handle constant address. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // Can't handle alternate code models or TLS yet. + // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; + // Can't handle TLS yet. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal()) return false; + // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how + // it works...). + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + if (const GlobalVariable *GVar = + dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false))) + if (GVar->isThreadLocal()) + return false; + // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the // global value into its own register, which we can use as the basereg. @@ -658,6 +670,10 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(const Instruction *I) { + // Atomic stores need special handling. + if (cast<StoreInst>(I)->isAtomic()) + return false; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -780,6 +796,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { + // Atomic loads need special handling. + if (cast<LoadInst>(I)->isAtomic()) + return false; + MVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -797,14 +817,20 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { } static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { + bool HasAVX = Subtarget->hasAVX(); + bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1(); + bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2(); + switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; - case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; - case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; + case MVT::f32: + return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; + case MVT::f64: + return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; } } @@ -1207,7 +1233,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. - if (Subtarget->hasSSE2() && + if (X86ScalarSSEf64 && I->getType()->isDoubleTy()) { const Value *V = I->getOperand(0); if (V->getType()->isFloatTy()) { @@ -1226,7 +1252,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { } bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { - if (Subtarget->hasSSE2()) { + if (X86ScalarSSEf64) { if (I->getType()->isFloatTy()) { const Value *V = I->getOperand(0); if (V->getType()->isDoubleTy()) { @@ -1365,6 +1391,9 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { case Intrinsic::memset: { const MemSetInst &MSI = cast<MemSetInst>(I); + if (MSI.isVolatile()) + return false; + unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth)) return false; @@ -1411,7 +1440,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Replace "add with overflow" intrinsics with an "add" instruction followed // by a seto/setc instruction. const Function *Callee = I.getCalledFunction(); - const Type *RetTy = + Type *RetTy = cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); MVT VT; @@ -1484,8 +1513,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; - const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); bool isVarArg = FTy->isVarArg(); // Don't know how to handle Win64 varargs yet. Nothing special needed for @@ -1547,8 +1576,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { Flags.setZExt(); if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { - const PointerType *Ty = cast<PointerType>(ArgVal->getType()); - const Type *ElementTy = Ty->getElementType(); + PointerType *Ty = cast<PointerType>(ArgVal->getType()); + Type *ElementTy = Ty->getElementType(); unsigned FrameSize = TD.getTypeAllocSize(ElementTy); unsigned FrameAlign = CS.getParamAlignment(AttrInd); if (!FrameAlign) @@ -1600,7 +1629,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (ArgReg == 0) return false; - const Type *ArgTy = ArgVal->getType(); + Type *ArgTy = ArgVal->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; @@ -1709,7 +1738,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { assert(Res && "memcpy length already checked!"); (void)Res; } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { // If this is a really simple value, emit this with the Value* version - //of X86FastEmitStore. If it isn't simple, we don't want to do this, + // of X86FastEmitStore. If it isn't simple, we don't want to do this, // as it can cause us to reevaluate the argument. X86FastEmitStore(ArgVT, ArgVal, AM); } else { @@ -1965,8 +1994,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { RC = X86::GR64RegisterClass; break; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::MOVSSrm; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp32m; @@ -1974,8 +2003,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::MOVSDrm; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp64m; @@ -2070,8 +2099,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { switch (VT.SimpleTy) { default: return false; case MVT::f32: - if (Subtarget->hasSSE1()) { - Opc = X86::FsFLD0SS; + if (X86ScalarSSEf32) { + Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp032; @@ -2079,8 +2108,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { } break; case MVT::f64: - if (Subtarget->hasSSE2()) { - Opc = X86::FsFLD0SD; + if (X86ScalarSSEf64) { + Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp064; diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index 6eed6abd..e3461c8 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -260,6 +260,21 @@ namespace { BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); } + /// duplicatePendingSTBeforeKill - The instruction at I is about to kill + /// RegNo. If any PendingST registers still need the RegNo value, duplicate + /// them to new scratch registers. + void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) { + for (unsigned i = 0; i != NumPendingSTs; ++i) { + if (PendingST[i] != RegNo) + continue; + unsigned SR = getScratchReg(); + DEBUG(dbgs() << "Duplicating pending ST" << i + << " in FP" << RegNo << " to FP" << SR << '\n'); + duplicateToTop(RegNo, SR, I); + PendingST[i] = SR; + } + } + /// popStackAfter - Pop the current value off of the top of the FP stack /// after the specified instruction. void popStackAfter(MachineBasicBlock::iterator &I); @@ -406,6 +421,10 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI->isCopy() && isFPCopy(MI)) FPInstClass = X86II::SpecialFP; + if (MI->isImplicitDef() && + X86::RFP80RegClass.contains(MI->getOperand(0).getReg())) + FPInstClass = X86II::SpecialFP; + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -461,6 +480,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { } dumpStack(); ); + (void)PrevMI; Changed = true; } @@ -969,6 +989,9 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); bool KillsSrc = MI->killsRegister(X86::FP0+Reg); + if (KillsSrc) + duplicatePendingSTBeforeKill(Reg, I); + // FISTP64m is strange because there isn't a non-popping versions. // If we have one _and_ we don't want to pop the operand, duplicate the value // on the stack instead of moving it. This ensure that popping the value is @@ -1032,6 +1055,7 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { bool KillsSrc = MI->killsRegister(X86::FP0+Reg); if (KillsSrc) { + duplicatePendingSTBeforeKill(Reg, I); // If this is the last use of the source register, just make sure it's on // the top of the stack. moveToTop(Reg, I); @@ -1318,6 +1342,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // When the source is killed, allocate a scratch FP register. if (KillsSrc) { + duplicatePendingSTBeforeKill(SrcFP, I); unsigned Slot = getSlot(SrcFP); unsigned SR = getScratchReg(); PendingST[DstST] = SR; @@ -1369,6 +1394,15 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { break; } + case TargetOpcode::IMPLICIT_DEF: { + // All FP registers must be explicitly defined, so load a 0 instead. + unsigned Reg = MI->getOperand(0).getReg() - X86::FP0; + DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n'); + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); + pushReg(Reg); + break; + } + case X86::FpPOP_RETVAL: { // The FpPOP_RETVAL instruction is used after calls that return a value on // the floating point stack. We cannot model this with ST defs since CALL diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index ed45a9a..d54f4ae 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -15,6 +15,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -91,12 +92,12 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; static const unsigned CallerSavedRegs32Bit[] = { - X86::EAX, X86::EDX, X86::ECX + X86::EAX, X86::EDX, X86::ECX, 0 }; static const unsigned CallerSavedRegs64Bit[] = { X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, - X86::R8, X86::R9, X86::R10, X86::R11 + X86::R8, X86::R9, X86::R10, X86::R11, 0 }; unsigned Opc = MBBI->getOpcode(); @@ -283,8 +284,8 @@ static bool isEAXLiveIn(MachineFunction &MF) { } void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { + MCSymbol *Label, + unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -346,6 +347,247 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, } } +/// getCompactUnwindRegNum - Get the compact unwind number for a given +/// register. The number corresponds to the enum lists in +/// compact_unwind_encoding.h. +static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { + int Idx = 1; + for (; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; +} + +/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding +/// used with frameless stacks. It is passed the number of registers to be saved +/// and an array of the registers saved. +static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], + unsigned RegCount, + bool Is64Bit) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + uint32_t RenumRegs[6]; + for (unsigned i = 6 - RegCount; i < 6; ++i) { + int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + + unsigned Countless = 0; + for (unsigned j = 6 - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; +} + +/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a +/// compact encoding with a frame pointer. +static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], + bool Is64Bit) { + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + // Encode the registers in the order they were saved, 3-bits per register. The + // registers are numbered from 1 to 6. + uint32_t RegEnc = 0; + for (int I = 5; I >= 0; --I) { + unsigned Reg = SavedRegs[I]; + if (Reg == 0) break; + int CURegNum = getCompactUnwindRegNum(CURegs, Reg); + if (CURegNum == -1) + return ~0U; + RegEnc |= (CURegNum & 0x7) << (5 - I); + } + + assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); + return RegEnc; +} + +uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + bool Is64Bit = STI.is64Bit(); + bool HasFP = hasFP(MF); + + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + int SavedRegIdx = 6; + + unsigned OffsetSize = (Is64Bit ? 8 : 4); + + unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); + unsigned PushInstrSize = 1; + unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); + unsigned MoveInstrSize = (Is64Bit ? 3 : 2); + unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); + unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); + + unsigned StackDivide = (Is64Bit ? 8 : 4); + + unsigned InstrOffset = 0; + unsigned CFAOffset = 0; + unsigned StackAdjust = 0; + + MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. + bool ExpectEnd = false; + for (MachineBasicBlock::iterator + MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opc = MI.getOpcode(); + if (Opc == X86::PROLOG_LABEL) continue; + if (!MI.getFlag(MachineInstr::FrameSetup)) break; + + // We don't exect any more prolog instructions. + if (ExpectEnd) return 0; + + if (Opc == PushInstr) { + // If there are too many saved registers, we cannot use compact encoding. + if (--SavedRegIdx < 0) return 0; + + SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); + CFAOffset += OffsetSize; + InstrOffset += PushInstrSize; + } else if (Opc == MoveInstr) { + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + + if (DstReg != FramePtr || SrcReg != StackPtr) + return 0; + + CFAOffset = 0; + memset(SavedRegs, 0, sizeof(SavedRegs)); + InstrOffset += MoveInstrSize; + } else if (Opc == SubtractInstr) { + if (StackAdjust) + // We all ready have a stack pointer adjustment. + return 0; + + if (!MI.getOperand(0).isReg() || + MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || + MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) + // We need this to be a stack adjustment pointer. Something like: + // + // %RSP<def> = SUB64ri8 %RSP, 48 + return 0; + + StackAdjust = MI.getOperand(2).getImm() / StackDivide; + SubtractInstrIdx += InstrOffset; + ExpectEnd = true; + } + } + + // Encode that we are using EBP/RBP as the frame pointer. + uint32_t CompactUnwindEncoding = 0; + CFAOffset /= StackDivide; + if (HasFP) { + if ((CFAOffset & 0xFF) != CFAOffset) + // Offset was too big for compact encoding. + return 0; + + // Get the encoding of the saved registers when we have a frame pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); + if (RegEnc == ~0U) + return 0; + + CompactUnwindEncoding |= 0x01000000; + CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; + CompactUnwindEncoding |= RegEnc & 0x7FFF; + } else { + unsigned FullOffset = CFAOffset + StackAdjust; + if ((FullOffset & 0xFF) == FullOffset) { + // Frameless stack. + CompactUnwindEncoding |= 0x02000000; + CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; + } else { + if ((CFAOffset & 0x7) != CFAOffset) + // The extra stack adjustments are too big for us to handle. + return 0; + + // Frameless stack with an offset too large for us to encode compactly. + CompactUnwindEncoding |= 0x03000000; + + // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' + // instruction. + CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; + + // Encode any extra stack stack changes (done via push instructions). + CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; + } + + // Get the encoding of the saved registers when we don't have a frame + // pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, + 6 - SavedRegIdx, + Is64Bit); + if (RegEnc == ~0U) return 0; + CompactUnwindEncoding |= RegEnc & 0x3FF; + } + + return CompactUnwindEncoding; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -370,7 +612,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -398,7 +639,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - !IsWin64) { // Win64 has no Red Zone + !IsWin64 && // Win64 has no Red Zone + !EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -459,7 +701,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -478,7 +721,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); } - // Update EBP with the new base value... + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) @@ -487,7 +730,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -504,8 +748,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (RegInfo->needsStackRealignment(MF)) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), - StackPtr).addReg(StackPtr).addImm(-MaxAlign); + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) + .addReg(StackPtr) + .addImm(-MaxAlign) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -522,6 +768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; + MBBI->setFlag(MachineInstr::FrameSetup); ++MBBI; if (!HasFP && needsFrameMoves) { @@ -530,8 +777,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. - unsigned Ptr = StackSize ? - MachineLocation::VirtualFP : StackPtr; + unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr; MachineLocation SPDst(Ptr); MachineLocation SPSrc(Ptr, StackOffset); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); @@ -586,26 +832,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); + .addReg(X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes); + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) .addExternalSymbol(StackProbeSymbol) .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. @@ -618,6 +868,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), StackPtr, false, NumBytes - 4); + MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } } else if (NumBytes) @@ -627,7 +878,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -647,6 +899,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (PushedRegs) emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); } + + // Darwin 10.7 and greater has support for compact unwind encoding. + if (STI.getTargetTriple().isMacOSX() && + !STI.getTargetTriple().isMacOSXVersionLT(10, 7)) + MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); } void X86FrameLowering::emitEpilogue(MachineFunction &MF, @@ -844,23 +1101,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -void -X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Calculate amount of bytes used for return address storing - int stackGrowth = (STI.is64Bit() ? -8 : -4); - const X86RegisterInfo *RI = TM.getRegisterInfo(); - - // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(RI->getStackRegister(), stackGrowth); - Moves.push_back(MachineMove(0, Dst, Src)); - - // Add return address to move list - MachineLocation CSDst(RI->getStackRegister(), stackGrowth); - MachineLocation CSSrc(RI->getRARegister()); - Moves.push_back(MachineMove(0, CSDst, CSSrc)); -} - int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const X86RegisterInfo *RI = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); @@ -873,9 +1113,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con // Skip the saved EBP. Offset += RI->getSlotSize(); } else { - unsigned Align = MFI->getObjectAlignment(FI); - assert((-(Offset + StackSize)) % Align == 0); - Align = 0; + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } // FIXME: Support tail calls @@ -1027,184 +1265,183 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); - FrameIdx = 0; + (void)FrameIdx; } } -/// permuteEncode - Create the permutation encoding used with frameless -/// stacks. It is passed the number of registers to be saved and an array of the -/// registers saved. -static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { - // The saved registers are numbered from 1 to 6. In order to encode the order - // in which they were saved, we re-number them according to their place in the - // register order. The re-numbering is relative to the last re-numbered - // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - bool Used[7] = { false, false, false, false, false, false, false }; - uint32_t RenumRegs[6]; - for (unsigned I = 0; I < SavedCount; ++I) { - uint32_t Renum = 0; - for (unsigned U = 1; U < 7; ++U) { - if (U == Registers[I]) - break; - if (!Used[U]) - ++Renum; - } - - Used[Registers[I]] = true; - RenumRegs[I] = Renum; +static bool +HasNestArgument(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) { + if (I->hasNestAttr()) + return true; } + return false; +} - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (SavedCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] - + 3 * RenumRegs[2] + RenumRegs[3]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] - + RenumRegs[2]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; - break; - case 1: - permutationEncoding |= RenumRegs[0]; - break; +static unsigned +GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { + if (Is64Bit) { + return X86::R11; + } else { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall) { + if (IsNested) { + report_fatal_error("Segmented stacks does not support fastcall with " + "nested function."); + return -1; + } else { + return X86::EAX; + } + } else { + if (IsNested) + return X86::EDX; + else + return X86::ECX; + } } - - return permutationEncoding; } -uint32_t X86FrameLowering:: -getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, - int DataAlignmentFactor, bool IsEH) const { - uint32_t Encoding = 0; - int CFAOffset = 0; - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; - unsigned SavedRegIdx = 0; - int FramePointerReg = -1; +void +X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + uint64_t StackSize; + bool Is64Bit = STI.is64Bit(); + unsigned TlsReg, TlsOffset; + DebugLoc DL; + const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>(); - for (ArrayRef<MCCFIInstruction>::const_iterator - I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { - const MCCFIInstruction &Inst = *I; - MCSymbol *Label = Inst.getLabel(); + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "Scratch register is live-in"); - // Ignore invalid labels. - if (Label && !Label->isDefined()) continue; + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetLinux()) + report_fatal_error("Segmented stacks supported only on linux."); - unsigned Operation = Inst.getOperation(); - if (Operation != MCCFIInstruction::Move && - Operation != MCCFIInstruction::RelMove) - // FIXME: We can't handle this frame just yet. - return 0; - - const MachineLocation &Dst = Inst.getDestination(); - const MachineLocation &Src = Inst.getSource(); - const bool IsRelative = (Operation == MCCFIInstruction::RelMove); - - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() != MachineLocation::VirtualFP) { - // DW_CFA_def_cfa - assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); - if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; - FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); - } // else DW_CFA_def_cfa_offset - - if (IsRelative) - CFAOffset += Src.getOffset(); - else - CFAOffset -= Src.getOffset(); + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool IsNested = false; - continue; - } + // We need to know if the function has a nest argument only in 64 bit mode. + if (Is64Bit) + IsNested = HasNestArgument(&MF); - if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - // DW_CFA_def_cfa_register - assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + // The MOV R10, RAX needs to be in a different block, since the RET we emit in + // allocMBB needs to be last (terminating) instruction. + MachineBasicBlock *restoreR10MBB = NULL; + if (IsNested) + restoreR10MBB = MF.CreateMachineBasicBlock(); - if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); i != e; i++) { + allocMBB->addLiveIn(*i); + checkMBB->addLiveIn(*i); - FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); - if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) - return 0; + if (IsNested) + restoreR10MBB->addLiveIn(*i); + } - SavedRegs[0] = 0; - SavedRegIdx = 0; - continue; - } + if (IsNested) { + allocMBB->addLiveIn(X86::R10); + restoreR10MBB->addLiveIn(X86::RAX); + } - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - if (IsRelative) - Offset -= CFAOffset; - Offset /= DataAlignmentFactor; + if (IsNested) + MF.push_front(restoreR10MBB); + MF.push_front(allocMBB); + MF.push_front(checkMBB); + + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Read the limit off the current stacklet off the stack_guard location. + if (Is64Bit) { + TlsReg = X86::FS; + TlsOffset = 0x70; + + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else { + TlsReg = X86::GS; + TlsOffset = 0x30; - if (Offset < 0) { - // FIXME: Handle? - // DW_CFA_offset_extended_sf - return 0; - } else if (Reg < 64) { - // DW_CFA_offset + Reg - if (SavedRegIdx >= 6) return 0; - int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); - if (CURegNum == -1) return 0; - SavedRegs[SavedRegIdx++] = CURegNum; - } else { - // FIXME: Handle? - // DW_CFA_offset_extended - return 0; - } + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } - // Bail if there are too many registers to encode. - if (SavedRegIdx > 6) return 0; + // This jump is taken if SP >= (Stacklet Limit + Stack Space required). + // It jumps to normal execution of the function body. + BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB); + + // On 32 bit we first push the arguments size and then the frame size. On 64 + // bit, we pass the stack frame size in r10 and the argument size in r11. + if (Is64Bit) { + // Functions with nested arguments use R10, so it needs to be saved across + // the call to _morestack + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); + + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) + .addImm(StackSize); + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) + .addImm(X86FI->getArgumentStackSize()); + MF.getRegInfo().setPhysRegUsed(X86::R10); + MF.getRegInfo().setPhysRegUsed(X86::R11); + } else { + // Since we'll call __morestack, stack alignment needs to be preserved. + BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(StackSize); + } - // Check if the offset is too big. - CFAOffset /= 4; - if ((CFAOffset & 0xFF) != CFAOffset) - return 0; - Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. - - if (FramePointerReg != -1) { - Encoding |= 0x01000000; // EBP/RBP Unwind Frame - for (unsigned I = 0; I != SavedRegIdx; ++I) { - unsigned Reg = SavedRegs[I]; - if (Reg == unsigned(FramePointerReg)) continue; - Encoding |= (Reg & 0x7) << (I * 3); // Register encoding - } + // __morestack is in libgcc + if (Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack"); + else + BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack"); + + // __morestack only seems to remove 8 bytes off the stack. Add back the + // additional 8 bytes we added before pushing the arguments. + if (!Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::RET)); + + if (IsNested) + BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10) + .addReg(X86::RAX); + + if (IsNested) { + allocMBB->addSuccessor(restoreR10MBB); + restoreR10MBB->addSuccessor(&prologueMBB); } else { - Encoding |= 0x02000000; // Frameless unwind with small stack - Encoding |= (SavedRegIdx & 0x7) << 10; - Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + allocMBB->addSuccessor(&prologueMBB); } - return Encoding; + checkMBB->addSuccessor(allocMBB); + checkMBB->addSuccessor(&prologueMBB); + +#ifdef XDEBUG + MF.verify(); +#endif } diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h index 14c31ed..6f49064 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h @@ -41,6 +41,8 @@ public: void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void adjustForSegmentedStacks(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; @@ -57,11 +59,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - - uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, - int DataAlignmentFactor, bool IsEH) const; + uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2b0f283..02b0ff2 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -474,10 +474,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - // If the source and destination are SSE registers, then this is a legal - // conversion that should not be lowered. EVT SrcVT = N->getOperand(0).getValueType(); EVT DstVT = N->getValueType(0); + + // If any of the sources are vectors, no fp stack involved. + if (SrcVT.isVector() || DstVT.isVector()) + continue; + + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) @@ -2168,9 +2173,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, Reg); - // Emit a testb. No special NOREX tricks are needed since there's - // only one GPR operand! - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only + // target GR8_NOREX registers, so make sure the register class is + // forced. + return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, Subreg, ShiftedImm); } diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 5096d9a..7c8ce17 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; using namespace dwarf; @@ -71,9 +72,6 @@ static SDValue Extract128BitVector(SDValue Vec, SelectionDAG &DAG, DebugLoc dl); -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG); - - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -85,14 +83,10 @@ static SDValue Extract128BitVector(SDValue Vec, DebugLoc dl) { EVT VT = Vec.getValueType(); assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); - EVT ElVT = VT.getVectorElementType(); - - int Factor = VT.getSizeInBits() / 128; - - EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), - ElVT, - VT.getVectorNumElements() / Factor); + int Factor = VT.getSizeInBits()/128; + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, + VT.getVectorNumElements()/Factor); // Extract from UNDEF is UNDEF. if (Vec.getOpcode() == ISD::UNDEF) @@ -111,7 +105,6 @@ static SDValue Extract128BitVector(SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -136,21 +129,18 @@ static SDValue Insert128BitVector(SDValue Result, assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - EVT ResultVT = Result.getValueType(); // Insert the relevant 128 bits. - unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + unsigned ElemsPerChunk = 128/ElVT.getSizeInBits(); // This is the index of the first element of the 128-bit chunk // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); return Result; @@ -159,34 +149,6 @@ static SDValue Insert128BitVector(SDValue Result, return SDValue(); } -/// Given two vectors, concat them. -static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) { - DebugLoc dl = Lower.getDebugLoc(); - - assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!"); - - EVT VT = EVT::getVectorVT(*DAG.getContext(), - Lower.getValueType().getVectorElementType(), - Lower.getValueType().getVectorNumElements() * 2); - - // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors). - assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!"); - - // Insert the upper subvector. - SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, - DAG.getConstant( - // This is half the length of the result - // vector. Start inserting the upper 128 - // bits here. - Lower.getValueType().getVectorNumElements(), - MVT::i32), - DAG, dl); - - // Insert the lower subvector. - Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl); - return Vec; -} - static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); bool is64Bit = Subtarget->is64Bit(); @@ -197,11 +159,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { return new TargetLoweringObjectFileMachO(); } - if (Subtarget->isTargetELF()) { - if (is64Bit) - return new X8664_ELFTargetObjectFile(TM); - return new X8632_ELFTargetObjectFile(TM); - } + if (Subtarget->isTargetELF()) + return new TargetLoweringObjectFileELF(); if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) return new TargetLoweringObjectFileCOFF(); llvm_unreachable("unknown subtarget type"); @@ -222,6 +181,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); + // X86-SSE is even stranger. It uses -1 or 0 for vector masks. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. @@ -354,7 +315,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); } else if (!UseSoftFloat) { - if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) + // Since AVX is a superset of SSE3, only check for SSE here. + if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. @@ -417,15 +379,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ , MVT::i8 , Custom); - setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTTZ , MVT::i32 , Custom); - setOperationAction(ISD::CTLZ , MVT::i32 , Custom); - if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTTZ , MVT::i64 , Custom); - setOperationAction(ISD::CTLZ , MVT::i64 , Custom); + if (Subtarget->hasBMI()) { + setOperationAction(ISD::CTTZ , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTTZ , MVT::i8 , Custom); + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTTZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTTZ , MVT::i64 , Custom); + } + + if (Subtarget->hasLZCNT()) { + setOperationAction(ISD::CTLZ , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTLZ , MVT::i8 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i32 , Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } if (Subtarget->hasPOPCNT()) { @@ -491,8 +462,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasXMM()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - // We may not have a libcall for MEMBARRIER so we should lower this. setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory @@ -506,9 +477,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) MVT VT = IntVTs[i]; setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } if (!Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); @@ -518,6 +491,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } + if (Subtarget->hasCmpxchg16b()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + } + // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && @@ -539,7 +516,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -556,11 +534,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, - (Subtarget->is64Bit() ? MVT::i64 : MVT::i32), - (Subtarget->isTargetCOFF() - && !Subtarget->isTargetEnvMacho() - ? Custom : Expand)); + + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); + else if (EnableSegmentedStacks) + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Expand); if (!UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -739,7 +722,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); @@ -754,6 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand); for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) setTruncStoreAction((MVT::SimpleValueType)VT, @@ -816,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); + setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } if (!UseSoftFloat && Subtarget->hasXMMInt()) { @@ -846,10 +830,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v16i8, Custom); + setOperationAction(ISD::SETCC, MVT::v8i16, Custom); + setOperationAction(ISD::SETCC, MVT::v4i32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); @@ -925,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41()) { + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -944,6 +928,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); + setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional @@ -964,10 +954,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE2()) { + if (Subtarget->hasXMMInt()) { setOperationAction(ISD::SRL, MVT::v2i64, Custom); setOperationAction(ISD::SRL, MVT::v4i32, Custom); setOperationAction(ISD::SRL, MVT::v16i8, Custom); + setOperationAction(ISD::SRL, MVT::v8i16, Custom); setOperationAction(ISD::SHL, MVT::v2i64, Custom); setOperationAction(ISD::SHL, MVT::v4i32, Custom); @@ -977,15 +968,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); } - if (Subtarget->hasSSE42()) - setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); + if (Subtarget->hasSSE42() || Subtarget->hasAVX()) + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { - addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); - addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); - addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); + addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); @@ -1005,6 +997,59 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); + + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); + setOperationAction(ISD::SRL, MVT::v16i16, Custom); + setOperationAction(ISD::SRL, MVT::v32i8, Custom); + + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v16i16, Custom); + setOperationAction(ISD::SHL, MVT::v32i8, Custom); + + setOperationAction(ISD::SRA, MVT::v8i32, Custom); + setOperationAction(ISD::SRA, MVT::v16i16, Custom); + + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); + setOperationAction(ISD::SETCC, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v8i32, Custom); + setOperationAction(ISD::SETCC, MVT::v4i64, Custom); + + setOperationAction(ISD::SELECT, MVT::v4f64, Custom); + setOperationAction(ISD::SELECT, MVT::v4i64, Custom); + setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v16i16, Custom); + setOperationAction(ISD::ADD, MVT::v32i8, Custom); + + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v16i16, Custom); + setOperationAction(ISD::SUB, MVT::v32i8, Custom); + + setOperationAction(ISD::MUL, MVT::v4i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i32, Custom); + setOperationAction(ISD::MUL, MVT::v16i16, Custom); + // Don't lower v32i8 because there is no 128-bit byte mul + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1093,6 +1138,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); @@ -1100,7 +1146,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::FADD); + setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SINT_TO_FP); @@ -1124,25 +1173,26 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } -MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i8; +EVT X86TargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return MVT::i8; + return VT.changeVectorElementTypeToInteger(); } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. -static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { +static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { if (MaxAlign == 16) return; - if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { if (VTy->getBitWidth() == 128) MaxAlign = 16; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { unsigned EltAlign = 0; getMaxByValAlign(ATy->getElementType(), EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; - } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { unsigned EltAlign = 0; getMaxByValAlign(STy->getElementType(i), EltAlign); @@ -1159,7 +1209,7 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { /// function arguments in the caller parameter area. For X86, aggregates /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. -unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { +unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { if (Subtarget->is64Bit()) { // Max of 8 and alignment of type. unsigned TyAlign = TD->getABITypeAlignment(Ty); @@ -1203,9 +1253,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16))) && Subtarget->getStackAlignment() >= 16) { - if (Subtarget->hasSSE2()) + if (Subtarget->hasAVX() && + Subtarget->getStackAlignment() >= 32) + return MVT::v8f32; + if (Subtarget->hasXMMInt()) return MVT::v4i32; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && @@ -1408,7 +1461,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. - if (!Subtarget->hasSSE2()) + if (!Subtarget->hasXMMInt()) ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } @@ -1700,6 +1753,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // places. assert(VA.getValNo() != LastVal && "Don't support value assigned to multiple locs yet"); + (void)LastVal; LastVal = VA.getValNo(); if (VA.isRegLoc()) { @@ -1917,6 +1971,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } + FuncInfo->setArgumentStackSize(StackSize); + return Chain; } @@ -2744,8 +2800,6 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -2754,10 +2808,17 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: return true; } return false; @@ -2783,6 +2844,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2796,6 +2861,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: + case X86ISD::VPERM2F128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2815,8 +2881,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: @@ -2825,6 +2889,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -3026,6 +3092,17 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } +/// isUndefOrInRange - Return true if every element in Mask, begining +/// from position Pos and ending in Pos+Size, falls within the specified +/// range (L, L+Pos]. or is undef. +static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low, int Hi) { + for (int i = Pos, e = Pos+Size; i != e; ++i) + if (!isUndefOrInRange(Mask[i], Low, Hi)) + return false; + return true; +} + /// isUndefOrEqual - Val is either less than zero (undef) or equal to the /// specified value. static bool isUndefOrEqual(int Val, int CmpVal) { @@ -3034,6 +3111,17 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return false; } +/// isSequentialOrUndefInRange - Return true if every element in Mask, begining +/// from position Pos and ending in Pos+Size, falls within the specified +/// sequential range (L, L+Pos]. or is undef. +static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, + int Pos, int Size, int Low) { + for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (!isUndefOrEqual(Mask[i], Low)) + return false; + return true; +} + /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. @@ -3104,11 +3192,13 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PALIGNR. static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool hasSSSE3) { + bool hasSSSE3OrAVX) { int i, e = VT.getVectorNumElements(); + if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64) + return false; // Do not handle v2i64 / v2f64 shuffles with palignr. - if (e < 4 || !hasSSSE3) + if (e < 4 || !hasSSSE3OrAVX) return false; for (i = 0; i != e; ++i) @@ -3119,42 +3209,176 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, if (i == e) return false; - // Determine if it's ok to perform a palignr with only the LHS, since we - // don't have access to the actual shuffle elements to see if RHS is undef. - bool Unary = Mask[i] < (int)e; - bool NeedsUnary = false; + // Make sure we're shifting in the right direction. + if (Mask[i] <= i) + return false; int s = Mask[i] - i; // Check the rest of the elements to see if they are consecutive. for (++i; i != e; ++i) { int m = Mask[i]; - if (m < 0) - continue; + if (m >= 0 && m != s+i) + return false; + } + return true; +} + +/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// VSHUFPSY. +static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElems = VT.getVectorNumElements(); + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + if (NumElems != 8) + return false; - Unary = Unary && (m < (int)e); - NeedsUnary = NeedsUnary || (m < s); + // VSHUFPSY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0 + // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9 + // + // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, + // Y3..Y0, Y3..Y0, X3..X0, X3..X0 + // + int QuarterSize = NumElems/4; + int HalfSize = QuarterSize*2; + for (int i = 0; i < QuarterSize; ++i) + if (!isUndefOrInRange(Mask[i], 0, HalfSize)) + return false; + for (int i = QuarterSize; i < QuarterSize*2; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) + return false; - if (NeedsUnary && !Unary) + // The mask of the second half must be the same as the first but with + // the appropriate offsets. This works in the same way as VPERMILPS + // works with masks. + for (int i = QuarterSize*2; i < QuarterSize*3; ++i) { + if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) + return false; + int FstHalfIdx = i-HalfSize; + if (Mask[FstHalfIdx] < 0) + continue; + if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) return false; - if (Unary && m != ((s+i) & (e-1))) + } + for (int i = QuarterSize*3; i < NumElems; ++i) { + if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) return false; - if (!Unary && m != (s+i)) + int FstHalfIdx = i-HalfSize; + if (Mask[FstHalfIdx] < 0) + continue; + if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) return false; + } + return true; } -bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isPALIGNRMask(M, N->getValueType(0), true); +/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPSY instruction. +static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + assert(NumElems == 8 && VT.getSizeInBits() == 256 && + "Only supports v8i32 and v8f32 types"); + + int HalfSize = NumElems/2; + unsigned Mask = 0; + for (int i = 0; i != NumElems ; ++i) { + if (SVOp->getMaskElt(i) < 0) + continue; + // The mask of the first half must be equal to the second one. + unsigned Shamt = (i%HalfSize)*2; + unsigned Elt = SVOp->getMaskElt(i) % HalfSize; + Mask |= Elt << Shamt; + } + + return Mask; +} + +/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS +/// version and the mask of the second half isn't binded with the first +/// one. +static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElems = VT.getVectorNumElements(); + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + if (NumElems != 4) + return false; + + // VSHUFPSY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X3 X2 X1 X0 + // SRC2 => Y3 Y2 Y1 Y0 + // + // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0 + // + int QuarterSize = NumElems/4; + int HalfSize = QuarterSize*2; + for (int i = 0; i < QuarterSize; ++i) + if (!isUndefOrInRange(Mask[i], 0, HalfSize)) + return false; + for (int i = QuarterSize; i < QuarterSize*2; ++i) + if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) + return false; + for (int i = QuarterSize*2; i < QuarterSize*3; ++i) + if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) + return false; + for (int i = QuarterSize*3; i < NumElems; ++i) + if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) + return false; + + return true; +} + +/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPDY instruction. +static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + assert(NumElems == 4 && VT.getSizeInBits() == 256 && + "Only supports v4i64 and v4f64 types"); + + int HalfSize = NumElems/2; + unsigned Mask = 0; + for (int i = 0; i != NumElems ; ++i) { + if (SVOp->getMaskElt(i) < 0) + continue; + int Elt = SVOp->getMaskElt(i) % HalfSize; + Mask |= Elt << i; + } + + return Mask; } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to SHUFP*. +/// specifies a shuffle of elements that is suitable for input to 128-bit +/// SHUFPS and SHUFPD. static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; + if (NumElems != 2 && NumElems != 4) return false; @@ -3204,7 +3428,13 @@ static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; + + if (NumElems != 4) return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 @@ -3218,15 +3448,19 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if (VT.getSizeInBits() != 128) + return false; if (NumElems != 4) return false; return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand @@ -3273,20 +3507,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElts / NumSections; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; unsigned Start = 0; - unsigned End = NumSectionElts; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = Start, j = s * NumSectionElts; + unsigned End = NumLaneElts; + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = Start, j = s * NumLaneElts; i != End; i += 2, ++j) { int BitI = Mask[i]; @@ -3302,8 +3538,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, } } // Process the next 128 bits. - Start += NumSectionElts; - End += NumSectionElts; + Start += NumLaneElts; + End += NumLaneElts; } return true; @@ -3320,21 +3556,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) return false; - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j + NumElts/2)) - return false; - if (V2IsSplat) { - if (isUndefOrEqual(BitI1, NumElts)) - return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + unsigned Start = 0; + unsigned End = NumLaneElts; + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2; + i != End; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) return false; + if (V2IsSplat) { + if (isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j+NumElts)) + return false; + } } + // Process the next 128 bits. + Start += NumLaneElts; + End += NumLaneElts; } return true; } @@ -3353,16 +3606,21 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - // Handle vector lengths > 128 bits. Define a "section" as a set of - // 128 bits. AVX defines UNPCK* to operate independently on 128-bit - // sections. - unsigned NumSections = VT.getSizeInBits() / 128; - if (NumSections == 0 ) NumSections = 1; // Handle MMX - unsigned NumSectionElts = NumElems / NumSections; + // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern + // FIXME: Need a better way to get rid of this, there's no latency difference + // between UNPCKLPD and MOVDDUP, the later should always be checked first and + // the former later. We should also remove the "_undef" special mask. + if (NumElems == 4 && VT.getSizeInBits() == 256) + return false; + + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElems / NumLanes; - for (unsigned s = 0; s < NumSections; ++s) { - for (unsigned i = s * NumSectionElts, j = s * NumSectionElts; - i != NumSectionElts * (s + 1); + for (unsigned s = 0; s < NumLanes; ++s) { + for (unsigned i = s * NumLaneElts, j = s * NumLaneElts; + i != NumLaneElts * (s + 1); i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -3433,6 +3691,189 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } +/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// as permutations between 128-bit chunks or halves. As an example: this +/// shuffle bellow: +/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> +/// The first half comes from the second half of V1 and the second half from the +/// the second half of V2. +static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + return false; + + // The shuffle result is divided into half A and half B. In total the two + // sources have 4 halves, namely: C, D, E, F. The final values of A and + // B must come from C, D, E or F. + int HalfSize = VT.getVectorNumElements()/2; + bool MatchA = false, MatchB = false; + + // Check if A comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { + MatchA = true; + break; + } + } + + // Check if B comes from one of C, D, E, F. + for (int Half = 0; Half < 4; ++Half) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128 instructions. +static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int HalfSize = VT.getVectorNumElements()/2; + + int FstHalf = 0, SndHalf = 0; + for (int i = 0; i < HalfSize; ++i) { + if (SVOp->getMaskElt(i) > 0) { + FstHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + for (int i = HalfSize; i < HalfSize*2; ++i) { + if (SVOp->getMaskElt(i) > 0) { + SndHalf = SVOp->getMaskElt(i)/HalfSize; + break; + } + } + + return (FstHalf | (SndHalf << 4)); +} + +/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to VPERMILPD*. +/// Note that VPERMIL mask matching is different depending whether theunderlying +/// type is 32 or 64. In the VPERMILPS the high half of the mask should point +/// to the same elements of the low, but to the higher half of the source. +/// In VPERMILPD the two lanes could be shuffled independently of each other +/// with the same restriction that lanes can't be crossed. +static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + + if (!Subtarget->hasAVX()) + return false; + + // Only match 256-bit with 64-bit types + if (VT.getSizeInBits() != 256 || NumElts != 4) + return false; + + // The mask on the high lane is independent of the low. Both can match + // any element in inside its own lane, but can't cross. + int LaneSize = NumElts/NumLanes; + for (int l = 0; l < NumLanes; ++l) + for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { + int LaneStart = l*LaneSize; + if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize)) + return false; + } + + return true; +} + +/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to VPERMILPS*. +/// Note that VPERMIL mask matching is different depending whether theunderlying +/// type is 32 or 64. In the VPERMILPS the high half of the mask should point +/// to the same elements of the low, but to the higher half of the source. +/// In VPERMILPD the two lanes could be shuffled independently of each other +/// with the same restriction that lanes can't be crossed. +static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + + if (!Subtarget->hasAVX()) + return false; + + // Only match 256-bit with 32-bit types + if (VT.getSizeInBits() != 256 || NumElts != 8) + return false; + + // The mask on the high lane should be the same as the low. Actually, + // they can differ if any of the corresponding index in a lane is undef + // and the other stays in range. + int LaneSize = NumElts/NumLanes; + for (int i = 0; i < LaneSize; ++i) { + int HighElt = i+LaneSize; + bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts); + bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize); + + if (!HighValid || !LowValid) + return false; + if (Mask[i] < 0 || Mask[HighElt] < 0) + continue; + if (Mask[HighElt]-Mask[i] != LaneSize) + return false; + } + + return true; +} + +/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPS* instructions. +static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + int LaneSize = NumElts/NumLanes; + + // Although the mask is equal for both lanes do it twice to get the cases + // where a mask will match because the same mask element is undef on the + // first half but valid on the second. This would get pathological cases + // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid. + unsigned Mask = 0; + for (int l = 0; l < NumLanes; ++l) { + for (int i = 0; i < LaneSize; ++i) { + int MaskElt = SVOp->getMaskElt(i+(l*LaneSize)); + if (MaskElt < 0) + continue; + if (MaskElt >= LaneSize) + MaskElt -= LaneSize; + Mask |= MaskElt << (i*2); + } + } + + return Mask; +} + +/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPD* instructions. +static unsigned getShuffleVPERMILPDImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VT = SVOp->getValueType(0); + + int NumElts = VT.getVectorNumElements(); + int NumLanes = VT.getSizeInBits()/128; + + unsigned Mask = 0; + int LaneSize = NumElts/NumLanes; + for (int l = 0; l < NumLanes; ++l) + for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { + int MaskElt = SVOp->getMaskElt(i); + if (MaskElt < 0) + continue; + Mask |= (MaskElt-l*LaneSize) << i; + } + + return Mask; +} + /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. @@ -3463,58 +3904,92 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. -bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> +bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) return false; - // Expect 1, 1, 3, 3 - for (unsigned i = 0; i < 2; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 1) - return false; - } + // The second vector must be undef + if (N->getOperand(1).getOpcode() != ISD::UNDEF) + return false; - bool HasHi = false; - for (unsigned i = 2; i < 4; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 3) + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if ((VT.getSizeInBits() == 128 && NumElems != 4) || + (VT.getSizeInBits() == 256 && NumElems != 8)) + return false; + + // "i+1" is the value the indexed mask element must have + for (unsigned i = 0; i < NumElems; i += 2) + if (!isUndefOrEqual(N->getMaskElt(i), i+1) || + !isUndefOrEqual(N->getMaskElt(i+1), i+1)) return false; - if (Elt == 3) - HasHi = true; - } - // Don't use movshdup if it can be done with a shufps. - // FIXME: verify that matching u, u, 3, 3 is what we want. - return HasHi; + + return true; } /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. -bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { - if (N->getValueType(0).getVectorNumElements() != 4) +/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> +bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + return false; + + // The second vector must be undef + if (N->getOperand(1).getOpcode() != ISD::UNDEF) + return false; + + EVT VT = N->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + + if ((VT.getSizeInBits() == 128 && NumElems != 4) || + (VT.getSizeInBits() == 256 && NumElems != 8)) return false; - // Expect 0, 0, 2, 2 - for (unsigned i = 0; i < 2; ++i) - if (N->getMaskElt(i) > 0) + // "i" is the value the indexed mask element must have + for (unsigned i = 0; i < NumElems; i += 2) + if (!isUndefOrEqual(N->getMaskElt(i), i) || + !isUndefOrEqual(N->getMaskElt(i+1), i)) return false; - bool HasHi = false; - for (unsigned i = 2; i < 4; ++i) { - int Elt = N->getMaskElt(i); - if (Elt >= 0 && Elt != 2) + return true; +} + +/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to 256-bit +/// version of MOVDDUP. +static bool isMOVDDUPYMask(ShuffleVectorSDNode *N, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + int NumElts = VT.getVectorNumElements(); + bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF; + + if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 || + !V2IsUndef || NumElts != 4) + return false; + + for (int i = 0; i != NumElts/2; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), 0)) return false; - if (Elt == 2) - HasHi = true; - } - // Don't use movsldup if it can be done with a shufps. - return HasHi; + for (int i = NumElts/2; i != NumElts; ++i) + if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2)) + return false; + return true; } /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVDDUP. +/// specifies a shuffle of elements that is suitable for input to 128-bit +/// version of MOVDDUP. bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { - int e = N->getValueType(0).getVectorNumElements() / 2; + EVT VT = N->getValueType(0); + + if (VT.getSizeInBits() != 128) + return false; + int e = VT.getVectorNumElements() / 2; for (int i = 0; i < e; ++i) if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; @@ -3627,6 +4102,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { if (Val >= 0) break; } + assert(Val - i > 0 && "PALIGNR imm should be positive"); return (Val - i) * EltSize; } @@ -3644,7 +4120,6 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { EVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); - return Index / NumElemsPerChunk; } @@ -3662,7 +4137,6 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { EVT ElVT = VecVT.getVectorElementType(); unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); - return Index / NumElemsPerChunk; } @@ -3716,7 +4190,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { - if (Op->getValueType(0).getVectorNumElements() != 4) + EVT VT = Op->getValueType(0); + if (VT.getSizeInBits() != 128) + return false; + if (VT.getVectorNumElements() != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) @@ -3748,6 +4225,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, ShuffleVectorSDNode *Op) { + EVT VT = Op->getValueType(0); + if (VT.getSizeInBits() != 128) + return false; + if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) return false; // Is V2 is a vector load, don't do this transformation. We will try to use @@ -3755,7 +4236,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, if (ISD::isNON_EXTLoad(V2)) return false; - unsigned NumElems = Op->getValueType(0).getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4) return false; @@ -3811,7 +4292,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, +static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -3819,7 +4300,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.getSizeInBits() == 128) { // SSE - if (HasSSE2) { // SSE2 + if (HasXMMInt) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 @@ -3838,21 +4319,25 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to -/// their original type, ensuring they get CSE'd. +/// Always build ones vectors as <4 x i32>. For 256-bit types, use two +/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their +/// original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected a 128-bit or 256-bit vector type"); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Cst, Cst, Cst, Cst); - SDValue Vec; if (VT.is256BitVector()) { - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); - } else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), + Vec, DAG.getConstant(0, MVT::i32), DAG, dl); + Vec = Insert128BitVector(InsV, Vec, + DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + } + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -3902,7 +4387,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. +/// getUnpackh - Returns a vector_shuffle node for an unpackh operation. static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); @@ -3915,31 +4400,95 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - EVT PVT = MVT::v4f32; - EVT VT = SV->getValueType(0); - DebugLoc dl = SV->getDebugLoc(); - SDValue V1 = SV->getOperand(0); +// PromoteSplati8i16 - All i16 and i8 vector types can't be used directly by +// a generic shuffle instruction because the target has no such instructions. +// Generate shuffles which repeat i16 and i8 several times until they can be +// represented by v4f32 and then be manipulated by target suported shuffles. +static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { + EVT VT = V.getValueType(); int NumElems = VT.getVectorNumElements(); - int EltNo = SV->getSplatIndex(); + DebugLoc dl = V.getDebugLoc(); - // unpack elements to the correct location while (NumElems > 4) { if (EltNo < NumElems/2) { - V1 = getUnpackl(DAG, dl, VT, V1, V1); + V = getUnpackl(DAG, dl, VT, V, V); } else { - V1 = getUnpackh(DAG, dl, VT, V1, V1); + V = getUnpackh(DAG, dl, VT, V, V); EltNo -= NumElems/2; } NumElems >>= 1; } + return V; +} + +/// getLegalSplat - Generate a legal splat with supported x86 shuffles +static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { + EVT VT = V.getValueType(); + DebugLoc dl = V.getDebugLoc(); + assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256) + && "Vector size not supported"); + + if (VT.getSizeInBits() == 128) { + V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); + int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; + V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), + &SplatMask[0]); + } else { + // To use VPERMILPS to splat scalars, the second half of indicies must + // refer to the higher part, which is a duplication of the lower one, + // because VPERMILPS can only handle in-lane permutations. + int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo, + EltNo+4, EltNo+4, EltNo+4, EltNo+4 }; + + V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V); + V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32), + &SplatMask[0]); + } + + return DAG.getNode(ISD::BITCAST, dl, VT, V); +} + +/// PromoteSplat - Splat is promoted to target supported vector shuffles. +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { + EVT SrcVT = SV->getValueType(0); + SDValue V1 = SV->getOperand(0); + DebugLoc dl = SV->getDebugLoc(); + + int EltNo = SV->getSplatIndex(); + int NumElems = SrcVT.getVectorNumElements(); + unsigned Size = SrcVT.getSizeInBits(); + + assert(((Size == 128 && NumElems > 4) || Size == 256) && + "Unknown how to promote splat for type"); + + // Extract the 128-bit part containing the splat element and update + // the splat element index when it refers to the higher register. + if (Size == 256) { + unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0; + V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl); + if (Idx > 0) + EltNo -= NumElems/2; + } + + // All i16 and i8 vector types can't be used directly by a generic shuffle + // instruction because the target has no such instruction. Generate shuffles + // which repeat i16 and i8 several times until they fit in i32, and then can + // be manipulated by target suported shuffles. + EVT EltVT = SrcVT.getVectorElementType(); + if (EltVT == MVT::i8 || EltVT == MVT::i16) + V1 = PromoteSplati8i16(V1, DAG, EltNo); + + // Recreate the 256-bit vector and place the same 128-bit vector + // into the low and high part. This is necessary because we want + // to use VPERM* to shuffle the vectors + if (Size == 256) { + SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + V1 = Insert128BitVector(InsV, V1, + DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + } - // Perform the splat. - int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; - V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1); - V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, V1); + return getLegalSplat(DAG, V1, EltNo); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3947,11 +4496,11 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { /// element of V2 is swizzled into the zero/undef vector, landing at element /// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, - bool isZero, bool HasSSE2, - SelectionDAG &DAG) { + bool isZero, bool HasXMMInt, + SelectionDAG &DAG) { EVT VT = V2.getValueType(); SDValue V1 = isZero - ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) @@ -4005,6 +4554,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: DecodeUNPCKHPMask(NumElems, ShuffleMask); break; case X86ISD::PUNPCKLBW: @@ -4015,8 +4566,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); @@ -4052,8 +4601,41 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } + case X86ISD::VPERMILPS: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPSY: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERMILPDY: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::VPERM2F128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::MOVDDUP: + case X86ISD::MOVLHPD: + case X86ISD::MOVLPD: + case X86ISD::MOVLPS: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::PALIGN: + return SDValue(); // Not yet implemented. default: - assert("not implemented for target shuffle node"); + assert(0 && "unknown target shuffle node"); return SDValue(); } @@ -4205,6 +4787,11 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, /// logical left or right shift of a vector. static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + // Although the logic below support any bitwidth size, there are no + // shift instructions which handle more than 128-bit vectors. + if (SVOp->getValueType(0).getSizeInBits() > 128) + return false; + if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt)) return true; @@ -4295,6 +4882,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { + assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); @@ -4333,42 +4921,52 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, return SDValue(); } + // FIXME: 256-bit vector instructions don't require a strict alignment, + // improve this code to support it better. + unsigned RequiredAlign = VT.getSizeInBits()/8; SDValue Chain = LD->getChain(); - // Make sure the stack object alignment is at least 16. + // Make sure the stack object alignment is at least 16 or 32. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - if (DAG.InferPtrAlignment(Ptr) < 16) { + if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) { if (MFI->isFixedObjectIndex(FI)) { // Can't change the alignment. FIXME: It's possible to compute // the exact stack offset and reference FI + adjust offset instead. // If someone *really* cares about this. That's the way to implement it. return SDValue(); } else { - MFI->setObjectAlignment(FI, 16); + MFI->setObjectAlignment(FI, RequiredAlign); } } - // (Offset % 16) must be multiple of 4. Then address is then + // (Offset % 16 or 32) must be multiple of 4. Then address is then // Ptr + (Offset & ~15). if (Offset < 0) return SDValue(); - if ((Offset % 16) & 3) + if ((Offset % RequiredAlign) & 3) return SDValue(); - int64_t StartOffset = Offset & ~15; + int64_t StartOffset = Offset & ~(RequiredAlign-1); if (StartOffset) Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); int EltNo = (Offset - StartOffset) >> 2; - int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; - EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr, + int NumElems = VT.getVectorNumElements(); + + EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); + SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), false, false, 0); - // Canonicalize it to a v4i32 shuffle. - V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getVectorShuffle(MVT::v4i32, dl, V1, - DAG.getUNDEF(MVT::v4i32),&Mask[0])); + + // Canonicalize it to a v4i32 or v8i32 shuffle. + SmallVector<int, 8> Mask; + for (int i = 0; i < NumElems; ++i) + Mask.push_back(EltNo); + + V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1); + return DAG.getNode(ISD::BITCAST, dl, NVT, + DAG.getVectorShuffle(CanonVT, dl, V1, + DAG.getUNDEF(CanonVT),&Mask[0])); } return SDValue(); @@ -4428,12 +5026,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->getAlignment()); - } else if (NumElems == 4 && LastLoadedElt == 1) { + } else if (NumElems == 4 && LastLoadedElt == 1 && + DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, - Ops, 2, MVT::i32, - LDBase->getMemOperand()); + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64, + LDBase->getPointerInfo(), + LDBase->getAlignment(), + false/*isVolatile*/, true/*ReadMem*/, + false/*WriteMem*/); return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); @@ -4445,47 +5047,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT ExtVT = VT.getVectorElementType(); - unsigned NumElems = Op.getNumOperands(); - // For AVX-length vectors, build the individual 128-bit pieces and - // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !ISD::isBuildVectorAllZeros(Op.getNode())) { - SmallVector<SDValue, 8> V; - V.resize(NumElems); - for (unsigned i = 0; i < NumElems; ++i) { - V[i] = Op.getOperand(i); - } - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); - - // Build the lower subvector. - SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); - // Build the upper subvector. - SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], - NumElems/2); + // Vectors containing all zeros can be matched by pxor and xorps later + if (ISD::isBuildVectorAllZeros(Op.getNode())) { + // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd + // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) + return Op; - return ConcatVectors(Lower, Upper, DAG); + return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl); } - // All zero's: - // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) - // All one's: - // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) - if (ISD::isBuildVectorAllZeros(Op.getNode()) || - ISD::isBuildVectorAllOnes(Op.getNode())) { - // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to - // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are - // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || - Op.getValueType() == MVT::v8i32) + // Vectors containing all ones can be matched by pcmpeqd on 128-bit width + // vectors or broken into v4i32 operations on 256-bit vectors. + if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (Op.getValueType() == MVT::v4i32) return Op; - if (ISD::isBuildVectorAllOnes(Op.getNode())) - return getOnesVector(Op.getValueType(), DAG, dl); - return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); + return getOnesVector(Op.getValueType(), DAG, dl); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4538,7 +5119,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. @@ -4566,7 +5147,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { (ExtVT == MVT::i64 && Subtarget->is64Bit())) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(), DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); @@ -4574,7 +5155,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -4603,7 +5184,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(i == Idx ? 0 : 1); @@ -4631,6 +5212,27 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (IsAllConstants) return SDValue(); + // For AVX-length vectors, build the individual 128-bit pieces and use + // shuffles to put them in place. + if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) { + SmallVector<SDValue, 32> V; + for (unsigned i = 0; i < NumElems; ++i) + V.push_back(Op.getOperand(i)); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + + // Build both the lower and upper subvector. + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], + NumElems/2); + + // Recreate the wider vector with the lower and upper part. + SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) { if (NumNonZero == 1) { @@ -4639,7 +5241,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, - Subtarget->hasSSE2(), DAG); + Subtarget->hasXMMInt(), DAG); } return SDValue(); } @@ -4664,7 +5266,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1 << i)); if (isZero) - V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); else V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } @@ -4708,7 +5310,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41()) { + if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -4758,13 +5360,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - // We support concatenate two MMX registers and place them in a MMX - // register. This is better than doing a stack convert. +// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place +// them in a MMX register. This is better than doing a stack convert. +static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT ResVT = Op.getValueType(); - assert(Op.getNumOperands() == 2); + assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || ResVT == MVT::v8i16 || ResVT == MVT::v16i8); int Mask[2]; @@ -4785,6 +5386,42 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); } +// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction +// to create 256-bit vectors from two other 128-bit ones. +static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + EVT ResVT = Op.getValueType(); + + assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide"); + + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + unsigned NumElems = ResVT.getVectorNumElements(); + + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); +} + +SDValue +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { + EVT ResVT = Op.getValueType(); + + assert(Op.getNumOperands() == 2); + assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) && + "Unsupported CONCAT_VECTORS for value type"); + + // We support concatenate two MMX registers and place them in a MMX register. + // This is better than doing a stack convert. + if (ResVT.is128BitVector()) + return LowerMMXCONCAT_VECTORS(Op, DAG); + + // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors + // from two other 128-bit ones. + return LowerAVXCONCAT_VECTORS(Op, DAG); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -4844,7 +5481,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -4917,7 +5554,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -4985,7 +5622,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && + (Subtarget->hasSSSE3() || Subtarget->hasAVX())) NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFLWImmediate(NewV.getNode()), @@ -5013,7 +5651,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && + (Subtarget->hasSSSE3() || Subtarget->hasAVX())) NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFHWImmediate(NewV.getNode()), @@ -5079,7 +5718,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3()) { + if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) { SmallVector<SDValue,16> pshufbMask; // If all result elements are from one input vector, then only translate @@ -5276,15 +5915,109 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, OpVT, SrcOp))); } -/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of -/// shuffles. +/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector +/// shuffle node referes to only one lane in the sources. +static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + int HalfSize = NumElems/2; + SmallVector<int, 16> M; + SVOp->getMask(M); + bool MatchA = false, MatchB = false; + + for (int l = 0; l < NumElems*2; l += HalfSize) { + if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) { + MatchA = true; + break; + } + } + + for (int l = 0; l < NumElems*2; l += HalfSize) { + if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) { + MatchB = true; + break; + } + } + + return MatchA && MatchB; +} + +/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles +/// which could not be matched by any known target speficic shuffle +static SDValue +LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + if (areShuffleHalvesWithinDisjointLanes(SVOp)) { + // If each half of a vector shuffle node referes to only one lane in the + // source vectors, extract each used 128-bit lane and shuffle them using + // 128-bit shuffles. Then, concatenate the results. Otherwise leave + // the work to the legalizer. + DebugLoc dl = SVOp->getDebugLoc(); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + int HalfSize = NumElems/2; + + // Extract the reference for each half + int FstVecExtractIdx = 0, SndVecExtractIdx = 0; + int FstVecOpNum = 0, SndVecOpNum = 0; + for (int i = 0; i < HalfSize; ++i) { + int Elt = SVOp->getMaskElt(i); + if (SVOp->getMaskElt(i) < 0) + continue; + FstVecOpNum = Elt/NumElems; + FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; + break; + } + for (int i = HalfSize; i < NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (SVOp->getMaskElt(i) < 0) + continue; + SndVecOpNum = Elt/NumElems; + SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; + break; + } + + // Extract the subvectors + SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum), + DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum), + DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl); + + // Generate 128-bit shuffles + SmallVector<int, 16> MaskV1, MaskV2; + for (int i = 0; i < HalfSize; ++i) { + int Elt = SVOp->getMaskElt(i); + MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize); + } + for (int i = HalfSize; i < NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize); + } + + EVT NVT = V1.getValueType(); + V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]); + V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]); + + // Concatenate the result back + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + return SDValue(); +} + +/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with +/// 4 elements, and match them with several different shuffle types. static SDValue -LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { +LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); EVT VT = SVOp->getValueType(0); + assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); + SmallVector<std::pair<int, int>, 8> Locs; Locs.resize(4); SmallVector<int, 8> Mask1(4U, -1); @@ -5542,18 +6275,21 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { static SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, - bool HasSSE2) { + bool HasXMMInt) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); assert(VT != MVT::v2i64 && "unsupported shuffle type"); - if (HasSSE2 && VT == MVT::v2f64) + if (HasXMMInt && VT == MVT::v2f64) return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); - // v4f32 or v4i32 - return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); + // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1) + return DAG.getNode(ISD::BITCAST, dl, VT, + getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1), + DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG)); } static @@ -5572,8 +6308,24 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); } +static inline unsigned getSHUFPOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8f32: + case MVT::v4i32: // Use fp unit for int unpack. + case MVT::v4f32: return X86ISD::SHUFPS; + case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4f64: + case MVT::v2i64: // Use fp unit for int unpack. + case MVT::v2f64: return X86ISD::SHUFPD; + default: + llvm_unreachable("Unknown type for shufp*"); + } + return 0; +} + static -SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { +SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); @@ -5602,7 +6354,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { CanFoldLoad = false; if (CanFoldLoad) { - if (HasSSE2 && NumElems == 2) + if (HasXMMInt && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) @@ -5616,28 +6368,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // this is horrible, but will stay like this until we move all shuffle // matching to x86 specific nodes. Note that for the 1st condition all // types are matched with movsd. - if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp)) - return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); - else if (HasSSE2) + if (HasXMMInt) { + // FIXME: isMOVLMask should be checked and matched before getMOVLP, + // as to remove this logic from here, as much as possible + if (NumElems == 2 || !X86::isMOVLMask(SVOp)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); - + } assert(VT != MVT::v4i32 && "unsupported shuffle type"); // Invert the operand order and use SHUFPS to match it. - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1, + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1, X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { +static inline unsigned getUNPCKLOpcode(EVT VT) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; - case MVT::v2f64: - return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v8i32: // Use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4i64: // Use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; @@ -5653,6 +6407,10 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; default: @@ -5661,6 +6419,68 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { return 0; } +static inline unsigned getVPERMILOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: + case MVT::v4f32: return X86ISD::VPERMILPS; + case MVT::v2i64: + case MVT::v2f64: return X86ISD::VPERMILPD; + case MVT::v8i32: + case MVT::v8f32: return X86ISD::VPERMILPSY; + case MVT::v4i64: + case MVT::v4f64: return X86ISD::VPERMILPDY; + default: + llvm_unreachable("Unknown type for vpermil"); + } + return 0; +} + +/// isVectorBroadcast - Check if the node chain is suitable to be xformed to +/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming +/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded. +static bool isVectorBroadcast(SDValue &Op) { + EVT VT = Op.getValueType(); + bool Is256 = VT.getSizeInBits() == 256; + + assert((VT.getSizeInBits() == 128 || Is256) && + "Unsupported type for vbroadcast node"); + + SDValue V = Op; + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (Is256 && !(V.hasOneUse() && + V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(0).getOpcode() == ISD::UNDEF)) + return false; + + if (Is256) + V = V.getOperand(1); + + if (!V.hasOneUse()) + return false; + + // Check the source scalar_to_vector type. 256-bit broadcasts are + // supported for 32/64-bit sizes, while 128-bit ones are only supported + // for 32-bit scalars. + if (V.getOpcode() != ISD::SCALAR_TO_VECTOR) + return false; + + unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits(); + if (ScalarSize != 32 && ScalarSize != 64) + return false; + if (!Is256 && ScalarSize == 64) + return false; + + V = V.getOperand(0); + if (!MayFoldLoad(V)) + return false; + + // Return the load node + Op = V; + return true; +} + static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -5672,23 +6492,29 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) - return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); + return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); // Handle splat operations if (SVOp->isSplat()) { - // Special case, this is the only place now where it's - // allowed to return a vector_shuffle operation without - // using a target specific node, because *hopefully* it - // will be optimized away by the dag combiner. - if (VT.getVectorNumElements() <= 4 && - CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) + unsigned NumElem = VT.getVectorNumElements(); + int Size = VT.getSizeInBits(); + // Special case, this is the only place now where it's allowed to return + // a vector_shuffle operation without using a target specific node, because + // *hopefully* it will be optimized away by the dag combiner. FIXME: should + // this be moved to DAGCombine instead? + if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) return Op; - // Handle splats by matching through known masks - if (VT.getVectorNumElements() <= 4) + // Use vbroadcast whenever the splat comes from a foldable load + if (Subtarget->hasAVX() && isVectorBroadcast(V1)) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1); + + // Handle splats by matching through known shuffle masks + if ((Size == 128 && NumElem <= 4) || + (Size == 256 && NumElem < 8)) return SDValue(); - // Canonicalize all of the remaining to v4f32. + // All remaning splats are promoted to target supported vector shuffles. return PromoteSplat(SVOp, DAG); } @@ -5698,7 +6524,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); - } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + } else if ((VT == MVT::v4i32 || + (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { @@ -5731,9 +6558,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); - bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX(); + bool HasXMMInt = Subtarget->hasXMMInt(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -5765,21 +6590,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); - if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && - RelaxedMayFoldVectorLoad(V1)) + if (X86::isMOVDDUPMask(SVOp) && + (Subtarget->hasSSE3() || Subtarget->hasAVX()) && + V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); if (X86::isMOVHLPS_v_undef_Mask(SVOp)) return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -5792,24 +6616,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); - if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) + if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32)) return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); - if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) - return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1, - TargetMask, DAG); - - if (VT == MVT::v4f32) - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1, - TargetMask, DAG); + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1, + TargetMask, DAG); } // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = getSubtarget()->hasXMMInt() && + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -5824,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); if (!X86::isMOVLPMask(SVOp)) { - if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); if (VT == MVT::v4i32 || VT == MVT::v4f32) @@ -5834,19 +6653,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // FIXME: fold these into legal mask. if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + if (X86::isMOVSHDUPMask(SVOp, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + if (X86::isMOVSLDUPMask(SVOp, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); + return getMOVLP(Op, dl, DAG, HasXMMInt); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5887,8 +6706,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5915,8 +6733,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); @@ -5932,18 +6749,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - if (isPALIGNRMask(M, VT, HasSSSE3)) + if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, X86::getShufflePALIGNRImmediate(SVOp), DAG); if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) { - X86ISD::NodeType Opcode = - getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; - return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); - } + if (VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -5958,23 +6772,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShufflePSHUFLWImmediate(SVOp), DAG); - if (isSHUFPMask(M, VT)) { - unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); - if (VT == MVT::v4f32 || VT == MVT::v4i32) - return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2, - TargetMask, DAG); - if (VT == MVT::v2f64 || VT == MVT::v2i64) - return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2, - TargetMask, DAG); - } + if (isSHUFPMask(M, VT)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), - dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + + //===--------------------------------------------------------------------===// + // Generate target specific nodes for 128 or 256-bit shuffles only + // supported in the AVX instruction set. + // + + // Handle VMOVDDUPY permutations + if (isMOVDDUPYMask(SVOp, Subtarget)) + return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); + + // Handle VPERMILPS* permutations + if (isVPERMILPSMask(M, VT, Subtarget)) + return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, + getShuffleVPERMILPSImmediate(SVOp), DAG); + + // Handle VPERMILPD* permutations + if (isVPERMILPDMask(M, VT, Subtarget)) + return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, + getShuffleVPERMILPDImmediate(SVOp), DAG); + + // Handle VPERM2F128 permutations + if (isVPERM2F128Mask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, + getShuffleVPERM2F128Immediate(SVOp), DAG); + + // Handle VSHUFPSY permutations + if (isVSHUFPSYMask(M, VT, Subtarget)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + getShuffleVSHUFPSYImmediate(SVOp), DAG); + + // Handle VSHUFPDY permutations + if (isVSHUFPDYMask(M, VT, Subtarget)) + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, + getShuffleVSHUFPDYImmediate(SVOp), DAG); + + //===--------------------------------------------------------------------===// + // Since no target specific shuffle was selected for this generic one, + // lower it into other known shuffles. FIXME: this isn't true yet, but + // this is the plan. + // // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { @@ -5989,9 +6834,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles. - if (NumElems == 4) - return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); + // Handle all 128-bit wide vectors with 4 elements, and match them with + // several different shuffle types. + if (NumElems == 4 && VT.getSizeInBits() == 128) + return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG); + + // Handle general 256-bit shuffles + if (VT.is256BitVector()) + return LowerVECTOR_SHUFFLE_256(SVOp, DAG); return SDValue(); } @@ -6001,6 +6851,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + + if (Op.getOperand(0).getValueType().getSizeInBits() != 128) + return SDValue(); + if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); @@ -6060,36 +6914,26 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = Op.getOperand(0); EVT VecVT = Vec.getValueType(); - // If this is a 256-bit vector result, first extract the 128-bit - // vector and then extract from the 128-bit vector. - if (VecVT.getSizeInBits() > 128) { + // If this is a 256-bit vector result, first extract the 128-bit vector and + // then extract the element from the 128-bit vector. + if (VecVT.getSizeInBits() == 256) { DebugLoc dl = Op.getNode()->getDebugLoc(); unsigned NumElems = VecVT.getVectorNumElements(); SDValue Idx = Op.getOperand(1); - - if (!isa<ConstantSDNode>(Idx)) - return SDValue(); - - unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128); unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); // Get the 128-bit vector. - bool Upper = IdxVal >= ExtractNumElems; - Vec = Extract128BitVector(Vec, Idx, DAG, dl); - - // Extract from it. - SDValue ScaledIdx = Idx; - if (Upper) - ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx, - DAG.getConstant(ExtractNumElems, - Idx.getValueType())); + bool Upper = IdxVal >= NumElems/2; + Vec = Extract128BitVector(Vec, + DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, - ScaledIdx); + Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx); } assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41()) { + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -6120,7 +6964,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return Op; // SHUFPS the element to the lowest double word, then movss. - int Mask[4] = { Idx, -1, -1, -1 }; + int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 }; EVT VVT = Op.getOperand(0).getValueType(); SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); @@ -6159,6 +7003,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); + if (VT.getSizeInBits() == 256) + return SDValue(); + if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { unsigned Opc; @@ -6206,35 +7053,28 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - // If this is a 256-bit vector result, first insert into a 128-bit - // vector and then insert into the 256-bit vector. - if (VT.getSizeInBits() > 128) { + // If this is a 256-bit vector result, first extract the 128-bit vector, + // insert the element into the extracted half and then place it back. + if (VT.getSizeInBits() == 256) { if (!isa<ConstantSDNode>(N2)) return SDValue(); - // Get the 128-bit vector. + // Get the desired 128-bit vector half. unsigned NumElems = VT.getVectorNumElements(); unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue(); - bool Upper = IdxVal >= NumElems / 2; - - SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl); + bool Upper = IdxVal >= NumElems/2; + SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32); + SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl); - // Insert into it. - SDValue ScaledN2 = N2; - if (Upper) - ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / - (VT.getSizeInBits() / 128), - N2.getValueType())); - Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, - N1, ScaledN2); + // Insert the element into the desired half. + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, + N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2); - // Insert the 128-bit vector - // FIXME: Why UNDEF? - return Insert128BitVector(N0, Op, N2, DAG, dl); + // Insert the changed part back to the 256-bit vector + return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41()) + if (Subtarget->hasSSE41() || Subtarget->hasAVX()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -6405,12 +7245,17 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { CodeModel::Model M = getTargetMachine().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && - (M == CodeModel::Small || M == CodeModel::Kernel)) + (M == CodeModel::Small || M == CodeModel::Kernel)) { + if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF()) + OpFlag = X86II::MO_GOTPCREL; WrapperKind = X86ISD::WrapperRIP; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleStubPIC()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOT; + } else if (Subtarget->isPICStyleStubPIC()) { + OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE; + } else if (Subtarget->isPICStyleStubNoDynamic()) { + OpFlag = X86II::MO_DARWIN_NONLAZY; + } SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); @@ -6427,6 +7272,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { Result); } + // For symbols that require a load from a stub to get the address, emit the + // load. + if (isGlobalStubReference(OpFlag)) + Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, 0); + return Result; } @@ -6676,7 +7527,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), + Chain.getValue(1)); } assert(false && @@ -6922,9 +7774,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, // Load the 32-bit value into an XMM register. SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, - DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Op.getOperand(0), - DAG.getIntPtrConstant(0))); + Op.getOperand(0)); + + // Zero out the upper parts of the register. + Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(), + DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), @@ -7513,6 +8367,9 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + + if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG); + assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -7563,6 +8420,39 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(X86CC, MVT::i8), EFLAGS); } +// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 +// ones, and then concatenate the result back. +static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC && + "Unsupported value type for operation"); + + int NumElems = VT.getVectorNumElements(); + DebugLoc dl = Op.getDebugLoc(); + SDValue CC = Op.getOperand(2); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + // Extract the RHS vectors + SDValue RHS = Op.getOperand(1); + SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + + // Issue the operation on the smaller types and concatenate the result back + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC), + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); +} + + SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); @@ -7575,11 +8465,21 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (isFP) { unsigned SSECC = 8; - EVT VT0 = Op0.getValueType(); - assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); - unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; + EVT EltVT = Op0.getValueType().getVectorElementType(); + assert(EltVT == MVT::f32 || EltVT == MVT::f64); + + unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; + // SSE Condition code mapping: + // 0 - EQ + // 1 - LT + // 2 - LE + // 3 - UNORD + // 4 - NEQ + // 5 - NLT + // 6 - NLE + // 7 - ORD switch (SetCCOpcode) { default: break; case ISD::SETOEQ: @@ -7624,6 +8524,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); } + // Break 256-bit integer vector compare into smaller ones. + if (!isFP && VT.getSizeInBits() == 256) + return Lower256IntVSETCC(Op, DAG); + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. @@ -7654,6 +8558,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (Swap) std::swap(Op0, Op1); + // Check that the operation in question is available (most are plain SSE2, + // but PCMPGTQ and PCMPEQQ have different requirements). + if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX()) + return SDValue(); + if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX()) + return SDValue(); + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { @@ -8014,9 +8925,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && - "This should be used only on Windows targets"); - assert(!Subtarget->isTargetEnvMacho()); + assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() || + EnableSegmentedStacks) && + "This should be used only on Windows targets or when segmented stacks " + "are being used"); + assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -8024,23 +8937,49 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); // FIXME: Ensure alignment here - SDValue Flag; + bool Is64Bit = Subtarget->is64Bit(); + EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); + if (EnableSegmentedStacks) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); - Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); - Flag = Chain.getValue(1); + if (Is64Bit) { + // The 64 bit implementation of segmented stacks needs to clobber both r10 + // r11. This makes it impossible to use it along with nested parameters. + const Function *F = MF.getFunction(); + + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) + if (I->hasNestAttr()) + report_fatal_error("Cannot use segmented stacks with functions that " + "have nested arguments."); + } + + const TargetRegisterClass *AddrRegClass = + getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32); + unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); + Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); + SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + DAG.getRegister(Vreg, SPTy)); + SDValue Ops1[2] = { Value, Chain }; + return DAG.getMergeValues(Ops1, 2, dl); + } else { + SDValue Flag; + unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); + Flag = Chain.getValue(1); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); - Flag = Chain.getValue(1); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + Flag = Chain.getValue(1); - Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); - SDValue Ops1[2] = { Chain.getValue(0), Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + SDValue Ops1[2] = { Chain.getValue(0), Chain }; + return DAG.getMergeValues(Ops1, 2, dl); + } } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -8118,7 +9057,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT ArgVT = Op.getNode()->getValueType(0); - const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); uint8_t ArgMode; @@ -8292,6 +9231,19 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // Arithmetic intrinsics. + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: + case Intrinsic::x86_avx_hadd_ps_256: + case Intrinsic::x86_avx_hadd_pd_256: + return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse3_hsub_ps: + case Intrinsic::x86_sse3_hsub_pd: + case Intrinsic::x86_avx_hsub_ps_256: + case Intrinsic::x86_avx_hsub_pd_256: + return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -8535,8 +9487,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } -SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -8552,8 +9509,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. - const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); - const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); + const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10); + const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11); const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix @@ -8600,9 +9557,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); } else { const Function *Func = cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); @@ -8619,7 +9574,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, NestReg = X86::ECX; // Check that ECX wasn't needed by an 'inreg' parameter. - const FunctionType *FTy = Func->getFunctionType(); + FunctionType *FTy = Func->getFunctionType(); const AttrListPtr &Attrs = Func->getAttributes(); if (!Attrs.isEmpty() && !Func->isVarArg()) { @@ -8657,7 +9612,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. - const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); + const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr), @@ -8682,9 +9637,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); } } @@ -8822,8 +9775,58 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { return Op; } -SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { +// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit +// ones, and then concatenate the result back. +static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + assert(VT.getSizeInBits() == 256 && VT.isInteger() && + "Unsupported value type for operation"); + + int NumElems = VT.getVectorNumElements(); + DebugLoc dl = Op.getDebugLoc(); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + // Extract the RHS vectors + SDValue RHS = Op.getOperand(1); + SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), + DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); +} + +SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType().getSizeInBits() == 256 && + Op.getValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return Lower256IntArith(Op, DAG); +} + +SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType().getSizeInBits() == 256 && + Op.getValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return Lower256IntArith(Op, DAG); +} + +SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + + // Decompose 256-bit ops into smaller 128-bit ops. + if (VT.getSizeInBits() == 256) + return Lower256IntArith(Op, DAG); + assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -8872,11 +9875,51 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - LLVMContext *Context = DAG.getContext(); - // Must have SSE2. - if (!Subtarget->hasSSE2()) return SDValue(); + if (!Subtarget->hasXMMInt()) + return SDValue(); + + // Decompose 256-bit shifts into smaller 128-bit shifts. + if (VT.getSizeInBits() == 256) { + int NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + // Extract the two vectors + SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + + // Recreate the shift amount vectors + SDValue Amt1, Amt2; + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + // Constant shift amount + SmallVector<SDValue, 4> Amt1Csts; + SmallVector<SDValue, 4> Amt2Csts; + for (int i = 0; i < NumElems/2; ++i) + Amt1Csts.push_back(Amt->getOperand(i)); + for (int i = NumElems/2; i < NumElems; ++i) + Amt2Csts.push_back(Amt->getOperand(i)); + + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt1Csts[0], NumElems/2); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt2Csts[0], NumElems/2); + } else { + // Variable shift amount + Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); + Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + // Issue new vector shifts for the smaller types + V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); + V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); + + // Concatenate the result back + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); + } // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { @@ -8927,9 +9970,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE2 or later. - if (!Subtarget->hasSSE2()) return SDValue(); - if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), @@ -8971,7 +10011,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -8986,13 +10026,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, - R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, + R, DAG.getNode(ISD::ADD, dl, VT, R, R)); return R; } return SDValue(); @@ -9057,8 +10097,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(X86::COND_O, MVT::i32), SDValue(Sum.getNode(), 2)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); - return Sum; + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } } @@ -9071,8 +10110,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); - return Sum; + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ @@ -9080,8 +10118,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) SDNode* Node = Op.getNode(); EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); - - if (Subtarget->hasSSE2() && VT.isVector()) { + if (Subtarget->hasXMMInt() && VT.isVector()) { unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); @@ -9091,11 +10128,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); - case MVT::v2i64: { - SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; - SRAIntrinsicsID = 0; - break; - } case MVT::v4i32: { SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; @@ -9115,12 +10147,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) // In case of 1 bit sext, no need to shr if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; - if (SRAIntrinsicsID) { - Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(SRAIntrinsicsID, MVT::i32), - Tmp1, ShAmt); - } - return Tmp1; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); } return SDValue(); @@ -9132,7 +10161,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { + if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { @@ -9172,6 +10201,45 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); } +SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + + // The only fence that needs an instruction is a sequentially-consistent + // cross-thread fence. + if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { + // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for + // no-sse2). There isn't any reason to disable it if the target processor + // supports it. + if (Subtarget->hasXMMInt() || Subtarget->is64Bit()) + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); + + SDValue Chain = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, MVT::i32); + SDValue Ops[] = { + DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(0, MVT::i32), // Disp + DAG.getRegister(0, MVT::i32), // Segment. + Zero, + Chain + }; + SDNode *Res = + DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, + array_lengthof(Ops)); + return SDValue(Res, 0); + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc DL = Op.getDebugLoc(); @@ -9227,7 +10295,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); - assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && + assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && @@ -9255,7 +10323,34 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { Node->getOperand(0), Node->getOperand(1), negOp, cast<AtomicSDNode>(Node)->getSrcValue(), - cast<AtomicSDNode>(Node)->getAlignment()); + cast<AtomicSDNode>(Node)->getAlignment(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); +} + +static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT(); + + // Convert seq_cst store -> xchg + // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) + // FIXME: On 32-bit, store -> fist or movq would be more efficient + // (The only way to get a 16-byte store is cmpxchg16b) + // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. + if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent || + !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + return Swap.getValue(1); + } + // Other atomic stores have a simple pattern. + return Op; } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { @@ -9291,8 +10386,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); + case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -9318,7 +10415,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -9332,11 +10428,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); - case ISD::MUL: return LowerMUL_V2I64(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, DAG); @@ -9352,15 +10449,38 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADD: return LowerADD(Op, DAG); + case ISD::SUB: return LowerSUB(Op, DAG); } } +static void ReplaceATOMIC_LOAD(SDNode *Node, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + DebugLoc dl = Node->getDebugLoc(); + EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT(); + + // Convert wide load -> cmpxchg8b/cmpxchg16b + // FIXME: On 32-bit, load -> fild or movq would be more efficient + // (The only way to get a 16-byte load is cmpxchg16b) + // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment. + SDValue Zero = DAG.getConstant(0, VT); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, + Node->getOperand(0), + Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); +} + void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG, unsigned NewOp) const { - EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); - assert (T == MVT::i64 && "Only know how to expand i64 atomics"); + assert (Node->getValueType(0) == MVT::i64 && + "Only know how to expand i64 atomics"); SDValue Chain = Node->getOperand(0); SDValue In1 = Node->getOperand(1); @@ -9423,37 +10543,48 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); - assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); + assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); + bool Regs64bit = T == MVT::i128; + EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; - cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), - DAG.getConstant(0, MVT::i32)); - cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), - DAG.getConstant(1, MVT::i32)); - cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue()); - cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH, - cpInL.getValue(1)); + cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), + DAG.getConstant(0, HalfT)); + cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), + DAG.getConstant(1, HalfT)); + cpInL = DAG.getCopyToReg(N->getOperand(0), dl, + Regs64bit ? X86::RAX : X86::EAX, + cpInL, SDValue()); + cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, + Regs64bit ? X86::RDX : X86::EDX, + cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; - swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), - DAG.getConstant(0, MVT::i32)); - swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), - DAG.getConstant(1, MVT::i32)); - swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL, - cpInH.getValue(1)); - swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH, - swapInL.getValue(1)); + swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), + DAG.getConstant(0, HalfT)); + swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), + DAG.getConstant(1, HalfT)); + swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, + Regs64bit ? X86::RBX : X86::EBX, + swapInL, cpInH.getValue(1)); + swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, + Regs64bit ? X86::RCX : X86::ECX, + swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), swapInH.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, + unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : + X86ISD::LCMPXCHG8_DAG; + SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, 3, T, MMO); - SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, - MVT::i32, Result.getValue(1)); - SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, - MVT::i32, cpOutL.getValue(2)); + SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, + Regs64bit ? X86::RAX : X86::EAX, + HalfT, Result.getValue(1)); + SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, + Regs64bit ? X86::RDX : X86::EDX, + HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2)); Results.push_back(cpOutH.getValue(1)); return; } @@ -9478,6 +10609,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_SWAP: ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); return; + case ISD::ATOMIC_LOAD: + ReplaceATOMIC_LOAD(N, Results, DAG); } } @@ -9527,11 +10660,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; - case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::FHADD: return "X86ISD::FHADD"; + case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; @@ -9570,6 +10704,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; + case X86ISD::ANDN: return "X86ISD::ANDN"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; @@ -9596,9 +10731,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; - case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; - case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; @@ -9610,16 +10742,24 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; + case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; + case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; + case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; + case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; + case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; + case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; } } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); Reloc::Model R = getTargetMachine().getRelocationModel(); @@ -9671,7 +10811,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, } -bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { +bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); @@ -9691,7 +10831,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return true; } -bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { +bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } @@ -9715,7 +10855,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -9725,7 +10865,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || isUNPCKLMask(M, VT) || isUNPCKHMask(M, VT) || isUNPCKL_v_undef_Mask(M, VT) || @@ -10158,7 +11298,9 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } - BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + BuildMI(*BB, MI, dl, + TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr), + MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); @@ -10513,6 +11655,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MBB->addSuccessor(EndMBB); } + unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; @@ -10521,7 +11664,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset), MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); - BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) + BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) .addFrameIndex(RegSaveFrameIndex) .addImm(/*Scale=*/1) .addReg(/*IndexReg=*/0) @@ -10565,17 +11708,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. - const MachineFunction *MF = BB->getParent(); - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - BitVector ReservedRegs = TRI->getReservedRegs(*MF); - - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); - if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; - unsigned Reg = MO.getReg(); - if (Reg != X86::EFLAGS) continue; - copy0MBB->addLiveIn(Reg); - sinkMBB->addLiveIn(Reg); + if (!MI->killsRegister(X86::EFLAGS)) { + copy0MBB->addLiveIn(X86::EFLAGS); + sinkMBB->addLiveIn(X86::EFLAGS); } // Transfer the remainder of BB and its successor edges to sinkMBB. @@ -10611,6 +11746,119 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, } MachineBasicBlock * +X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, + bool Is64Bit) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + + assert(EnableSegmentedStacks); + + unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; + unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; + + // BB: + // ... [Till the alloca] + // If stacklet is not large enough, jump to mallocMBB + // + // bumpMBB: + // Allocate by subtracting from RSP + // Jump to continueMBB + // + // mallocMBB: + // Allocate by call to runtime + // + // continueMBB: + // ... + // [rest of original BB] + // + + MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *AddrRegClass = + getRegClassFor(Is64Bit ? MVT::i64:MVT::i32); + + unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), + bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), + tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), + sizeVReg = MI->getOperand(1).getReg(), + physSPReg = Is64Bit ? X86::RSP : X86::ESP; + + MachineFunction::iterator MBBIter = BB; + ++MBBIter; + + MF->insert(MBBIter, bumpMBB); + MF->insert(MBBIter, mallocMBB); + MF->insert(MBBIter, continueMBB); + + continueMBB->splice(continueMBB->begin(), BB, llvm::next + (MachineBasicBlock::iterator(MI)), BB->end()); + continueMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Add code to the main basic block to check if the stack limit has been hit, + // and if so, jump to mallocMBB otherwise to bumpMBB. + BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); + BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg) + .addReg(tmpSPVReg).addReg(sizeVReg); + BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr)) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg) + .addReg(tmpSPVReg); + BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB); + + // bumpMBB simply decreases the stack pointer, since we know the current + // stacklet has enough space. + BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) + .addReg(tmpSPVReg); + BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) + .addReg(tmpSPVReg); + BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); + + // Calls into a routine in libgcc to allocate more space from the heap. + if (Is64Bit) { + BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) + .addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI); + } else { + BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) + .addImm(12); + BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack_allocate_stack_space"); + } + + if (!Is64Bit) + BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) + .addImm(16); + + BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) + .addReg(Is64Bit ? X86::RAX : X86::EAX); + BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); + + // Set up the CFG correctly. + BB->addSuccessor(bumpMBB); + BB->addSuccessor(mallocMBB); + mallocMBB->addSuccessor(continueMBB); + bumpMBB->addSuccessor(continueMBB); + + // Take care of the PHI nodes. + BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), + MI->getOperand(0).getReg()) + .addReg(mallocPtrVReg).addMBB(mallocMBB) + .addReg(bumpSPPtrVReg).addMBB(bumpMBB); + + // Delete the original pseudo instruction. + MI->eraseFromParent(); + + // And we're done. + return continueMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -10718,11 +11966,11 @@ MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); + default: assert(0 && "Unexpected instr type to insert"); case X86::TAILJMPd64: case X86::TAILJMPr64: case X86::TAILJMPm64: - assert(!"TAILJMP64 would not be touched here."); + assert(0 && "TAILJMP64 would not be touched here."); case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: @@ -10745,6 +11993,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); + case X86::SEG_ALLOCA_32: + return EmitLoweredSegAlloca(MI, BB, false); + case X86::SEG_ALLOCA_64: + return EmitLoweredSegAlloca(MI, BB, true); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); @@ -10754,6 +12006,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_V4F32: case X86::CMOV_V2F64: case X86::CMOV_V2I64: + case X86::CMOV_V8F32: + case X86::CMOV_V4F64: + case X86::CMOV_V4I64: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: @@ -11074,6 +12329,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), Mask.getBitWidth() - 1); break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned NumLoBits = 0; + switch (IntId) { + default: break; + case Intrinsic::x86_sse_movmsk_ps: + case Intrinsic::x86_avx_movmsk_ps_256: + case Intrinsic::x86_sse2_movmsk_pd: + case Intrinsic::x86_avx_movmsk_pd_256: + case Intrinsic::x86_mmx_pmovmskb: + case Intrinsic::x86_sse2_pmovmskb_128: { + // High bits of movmskp{s|d}, pmovmskb are known zero. + switch (IntId) { + case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break; + case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break; + case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break; + case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break; + case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break; + case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; + } + KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), + Mask.getBitWidth() - NumLoBits); + break; + } + } + break; + } } } @@ -11102,23 +12384,132 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -/// PerformShuffleCombine - Combine a vector_shuffle that is equal to -/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load -/// if the load addresses are consecutive, non-overlapping, and in the right -/// order. +/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the +/// same as extracting the high 128-bit part of 256-bit vector and then +/// inserting the result into the low part of a new 256-bit vector +static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + +/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the +/// same as extracting the low 128-bit part of 256-bit vector and then +/// inserting the result into the high part of a new 256-bit vector +static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1> + for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j) + if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || + SVOp->getMaskElt(j) >= 0) + return false; + + return true; +} + +/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. +static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + DebugLoc dl = N->getDebugLoc(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + EVT VT = SVOp->getValueType(0); + int NumElems = VT.getVectorNumElements(); + + if (V1.getOpcode() == ISD::CONCAT_VECTORS && + V2.getOpcode() == ISD::CONCAT_VECTORS) { + // + // 0,0,0,... + // | + // V UNDEF BUILD_VECTOR UNDEF + // \ / \ / + // CONCAT_VECTOR CONCAT_VECTOR + // \ / + // \ / + // RESULT: V + zero extended + // + if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR || + V2.getOperand(1).getOpcode() != ISD::UNDEF || + V1.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode())) + return SDValue(); + + // To match the shuffle mask, the first half of the mask should + // be exactly the first vector, and all the rest a splat with the + // first element of the second one. + for (int i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || + !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) + return SDValue(); + + // Emit a zeroed vector and insert the desired subvector on its + // first half. + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), + DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + //===--------------------------------------------------------------------===// + // Combine some shuffles into subvector extracts and inserts: + // + + // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + if (isShuffleHigh128VectorInsertLow(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1> + if (isShuffleLow128VectorInsertHigh(SVOp)) { + SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), + V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + return SDValue(); +} + +/// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 128) - return SDValue(); - // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) return SDValue(); + // Combine 256-bit vector shuffles. This is only profitable when in AVX mode + if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && + N->getOpcode() == ISD::VECTOR_SHUFFLE) + return PerformShuffleCombine256(N, DAG, DCI); + + // Only handle 128 wide vector from here on. + if (VT.getSizeInBits() != 128) + return SDValue(); + + // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, + // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are + // consecutive, non-overlapping, and in the right order. SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); @@ -11209,7 +12600,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. +/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT +/// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); @@ -11217,14 +12609,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); + EVT VT = LHS.getValueType(); // If we have SSE[12] support, try to form min/max nodes. SSE min/max // instructions match the semantics of the common C idiom x<y?x:y but not // x<=y?x:y, because of how they handle negative zero (which can be // ignored in unsafe-math mode). - if (Subtarget->hasSSE2() && - (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && - Cond.getOpcode() == ISD::SETCC) { + if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && + VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) && + (Subtarget->hasXMMInt() || + (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); unsigned Opcode = 0; @@ -11267,7 +12661,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. if (!UnsafeFPMath && - !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMAX; break; @@ -11680,7 +13074,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) + if (!Subtarget->hasXMMInt()) return SDValue(); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) @@ -11796,7 +13190,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. - if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { + if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); @@ -11864,6 +13258,36 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector +/// so it can be folded inside ANDNP. +static bool CanFoldXORWithAllOnes(const SDNode *N) { + EVT VT = N->getValueType(0); + + // Match direct AllOnes for 128 and 256-bit vectors + if (ISD::isBuildVectorAllOnes(N)) + return true; + + // Look through a bit convert. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + // Sometimes the operand may come from a insert_subvector building a 256-bit + // allones vector + if (VT.getSizeInBits() == 256 && + N->getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + + if (V1.getOpcode() == ISD::INSERT_SUBVECTOR && + V1.getOperand(0).getOpcode() == ISD::UNDEF && + ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) && + ISD::isBuildVectorAllOnes(V2.getNode())) + return true; + } + + return false; +} + static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -11874,11 +13298,28 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; + EVT VT = N->getValueType(0); + + // Create ANDN instructions + if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Check LHS for not + if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1); + // Check RHS for not + if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); + + return SDValue(); + } + // Want to form ANDNP nodes: // 1) In the hopes of then easily combining them with OR and AND nodes // to form PBLEND/PSIGN. // 2) To match ANDN packed intrinsics - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); @@ -11888,12 +13329,14 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + CanFoldXORWithAllOnes(N0.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + CanFoldXORWithAllOnes(N1.getOperand(1).getNode())) return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); @@ -11917,7 +13360,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); // look for psign/blend - if (Subtarget->hasSSSE3()) { + if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS if (N0.getOpcode() == X86ISD::ANDNP) @@ -11990,13 +13433,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } } // PBLENDVB only available on SSE 4.1 - if (!Subtarget->hasSSE41()) + if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) return SDValue(); X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); } } @@ -12057,24 +13500,211 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. +static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + LoadSDNode *Ld = cast<LoadSDNode>(N); + EVT RegVT = Ld->getValueType(0); + EVT MemVT = Ld->getMemoryVT(); + DebugLoc dl = Ld->getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + ISD::LoadExtType Ext = Ld->getExtensionType(); + + // If this is a vector EXT Load then attempt to optimize it using a + // shuffle. We need SSE4 for the shuffles. + // TODO: It is possible to support ZExt by zeroing the undef values + // during the shuffle phase or after the shuffle. + if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { + assert(MemVT != RegVT && "Cannot extend to the same type"); + assert(MemVT.isVector() && "Must load a vector from memory"); + + unsigned NumElems = RegVT.getVectorNumElements(); + unsigned RegSz = RegVT.getSizeInBits(); + unsigned MemSz = MemVT.getSizeInBits(); + assert(RegSz > MemSz && "Register size must be greater than the mem size"); + // All sizes must be a power of two + if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue(); + + // Attempt to load the original value using a single load op. + // Find a scalar type which is equal to the loaded word size. + MVT SclrLoadTy = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) { + SclrLoadTy = Tp; + break; + } + } + + // Proceed if a load word is found. + if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue(); + + EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy, + RegSz/SclrLoadTy.getSizeInBits()); + + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + RegSz/MemVT.getScalarType().getSizeInBits()); + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + // Perform a single load. + SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), + Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->isVolatile(), + Ld->isNonTemporal(), Ld->getAlignment()); + + // Insert the word loaded into a vector. + SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + LoadUnitVecVT, ScalarLoad); + + // Bitcast the loaded value to a vector of the original element type, in + // the size of the target vector type. + SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector); + unsigned SizeRatio = RegSz/MemSz; + + // Redistribute the loaded elements into the different locations. + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i; + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, + DAG.getUNDEF(SlicedVec.getValueType()), + ShuffleVec.data()); + + // Bitcast to the requested type. + Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + // Replace the original load with the new sequence + // and return the new chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff); + return SDValue(ScalarLoad.getNode(), 1); + } + + return SDValue(); +} + /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + StoreSDNode *St = cast<StoreSDNode>(N); + EVT VT = St->getValue().getValueType(); + EVT StVT = St->getMemoryVT(); + DebugLoc dl = St->getDebugLoc(); + SDValue StoredVal = St->getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // If we are saving a concatination of two XMM registers, perform two stores. + // This is better in Sandy Bridge cause one 256-bit mem op is done via two + // 128-bit ones. If in the future the cost becomes only one memory access the + // first version would be better. + if (VT.getSizeInBits() == 256 && + StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && + StoredVal.getNumOperands() == 2) { + + SDValue Value0 = StoredVal.getOperand(0); + SDValue Value1 = StoredVal.getOperand(1); + + SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); + SDValue Ptr0 = St->getBasePtr(); + SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride); + + SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); + } + + // Optimize trunc store (of multiple scalars) to shuffle and store. + // First, pack all of the elements in one place. Next, store to memory + // in fewer chunks. + if (St->isTruncatingStore() && VT.isVector()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromSz) % ToSz) return SDValue(); + + unsigned SizeRatio = FromSz / ToSz; + + assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), + StVT.getScalarType(), NumElems*SizeRatio); + + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue()); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz) + StoreType = Tp; + } + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue Ptr = St->getBasePtr(); + + // Perform one or more big stores into memory. + for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + StoreType, ShuffWide, + DAG.getIntPtrConstant(i)); + SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + Chains.push_back(Ch); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], + Chains.size()); + } + + // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right // places to insert EMMS. This qualifies as a quick hack. // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. - StoreSDNode *St = cast<StoreSDNode>(N); - EVT VT = St->getValue().getValueType(); if (VT.getSizeInBits() != 64) return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps - && Subtarget->hasSSE2(); + && Subtarget->hasXMMInt(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && @@ -12172,6 +13802,150 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal" +/// and return the operands for the horizontal operation in LHS and RHS. A +/// horizontal operation performs the binary operation on successive elements +/// of its first operand, then on successive elements of its second operand, +/// returning the resulting values in a vector. For example, if +/// A = < float a0, float a1, float a2, float a3 > +/// and +/// B = < float b0, float b1, float b2, float b3 > +/// then the result of doing a horizontal operation on A and B is +/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >. +/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form +/// A horizontal-op B, for some already available A and B, and if so then LHS is +/// set to A, RHS to B, and the routine returns 'true'. +/// Note that the binary operation should have the property that if one of the +/// operands is UNDEF then the result is UNDEF. +static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { + // Look for the following pattern: if + // A = < float a0, float a1, float a2, float a3 > + // B = < float b0, float b1, float b2, float b3 > + // and + // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6> + // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7> + // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 > + // which is A horizontal-op B. + + // At least one of the operands should be a vector shuffle. + if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE && + RHS.getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + EVT VT = LHS.getValueType(); + unsigned N = VT.getVectorNumElements(); + + // View LHS in the form + // LHS = VECTOR_SHUFFLE A, B, LMask + // If LHS is not a shuffle then pretend it is the shuffle + // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1> + // NOTE: in what follows a default initialized SDValue represents an UNDEF of + // type VT. + SDValue A, B; + SmallVector<int, 8> LMask(N); + if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { + if (LHS.getOperand(0).getOpcode() != ISD::UNDEF) + A = LHS.getOperand(0); + if (LHS.getOperand(1).getOpcode() != ISD::UNDEF) + B = LHS.getOperand(1); + cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask); + } else { + if (LHS.getOpcode() != ISD::UNDEF) + A = LHS; + for (unsigned i = 0; i != N; ++i) + LMask[i] = i; + } + + // Likewise, view RHS in the form + // RHS = VECTOR_SHUFFLE C, D, RMask + SDValue C, D; + SmallVector<int, 8> RMask(N); + if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { + if (RHS.getOperand(0).getOpcode() != ISD::UNDEF) + C = RHS.getOperand(0); + if (RHS.getOperand(1).getOpcode() != ISD::UNDEF) + D = RHS.getOperand(1); + cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask); + } else { + if (RHS.getOpcode() != ISD::UNDEF) + C = RHS; + for (unsigned i = 0; i != N; ++i) + RMask[i] = i; + } + + // Check that the shuffles are both shuffling the same vectors. + if (!(A == C && B == D) && !(A == D && B == C)) + return false; + + // If everything is UNDEF then bail out: it would be better to fold to UNDEF. + if (!A.getNode() && !B.getNode()) + return false; + + // If A and B occur in reverse order in RHS, then "swap" them (which means + // rewriting the mask). + if (A != C) + for (unsigned i = 0; i != N; ++i) { + unsigned Idx = RMask[i]; + if (Idx < N) + RMask[i] += N; + else if (Idx < 2*N) + RMask[i] -= N; + } + + // At this point LHS and RHS are equivalent to + // LHS = VECTOR_SHUFFLE A, B, LMask + // RHS = VECTOR_SHUFFLE A, B, RMask + // Check that the masks correspond to performing a horizontal operation. + for (unsigned i = 0; i != N; ++i) { + unsigned LIdx = LMask[i], RIdx = RMask[i]; + + // Ignore any UNDEF components. + if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N)) + || (!B.getNode() && (LIdx >= N || RIdx >= N))) + continue; + + // Check that successive elements are being operated on. If not, this is + // not a horizontal operation. + if (!(LIdx == 2*i && RIdx == 2*i + 1) && + !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i)) + return false; + } + + LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it. + RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it. + return true; +} + +/// PerformFADDCombine - Do target-specific dag combines on floating point adds. +static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Try to synthesize horizontal adds from adds of shuffles. + if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && + (VT == MVT::v4f32 || VT == MVT::v2f64) && + isHorizontalBinOp(LHS, RHS, true)) + return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); + return SDValue(); +} + +/// PerformFSUBCombine - Do target-specific dag combines on floating point subs. +static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Try to synthesize horizontal subs from subs of shuffles. + if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && + (VT == MVT::v4f32 || VT == MVT::v2f64) && + isHorizontalBinOp(LHS, RHS, false)) + return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); + return SDValue(); +} + /// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and /// X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { @@ -12326,7 +14100,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, // (add Y, (setne X, 0)) -> sbb -1, Y // (sub (sete X, 0), Y) -> sbb 0, Y // (sub (setne X, 0), Y) -> adc -1, Y -static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { +static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { DebugLoc DL = N->getDebugLoc(); // Look through ZExts. @@ -12362,6 +14136,31 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, OtherVal.getValueType()), NewCmp); } +static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // X86 can't encode an immediate LHS of a sub. See if we can push the + // negation into a preceding instruction. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) { + // If the RHS of the sub is a XOR with one use and a constant, invert the + // immediate. Then add one to the LHS of the sub so we can turn + // X-Y -> X+~Y+1, saving one register. + if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR && + isa<ConstantSDNode>(Op1.getOperand(1))) { + APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue(); + EVT VT = Op0.getValueType(); + SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT, + Op1.getOperand(0), + DAG.getConstant(~XorC, VT)); + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor, + DAG.getConstant(C->getAPIntValue()+1, VT)); + } + } + + return OptimizeConditionalInDecrement(N, DAG); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -12369,10 +14168,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::ADD: - case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG); + case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG); + case ISD::SUB: return PerformSubCombine(N, DAG); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: @@ -12380,8 +14180,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); + case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); + case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); + case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); @@ -12398,14 +14201,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: + case X86ISD::VUNPCKHPSY: + case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPS: - case X86ISD::VUNPCKLPD: case X86ISD::VUNPCKLPSY: case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: @@ -12415,7 +14218,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); + case X86ISD::VPERMILPS: + case X86ISD::VPERMILPSY: + case X86ISD::VPERMILPD: + case X86ISD::VPERMILPDY: + case X86ISD::VPERM2F128: + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); } return SDValue(); @@ -12551,7 +14359,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "${0:q}")) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12572,7 +14380,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12603,7 +14411,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12629,7 +14437,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { SplitString(AsmPieces[2], Words, " \t,"); if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && Words[2] == "%edx") { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty || Ty->getBitWidth() % 16 != 0) return false; return IntrinsicLowering::LowerToByteSwap(CI); @@ -12700,7 +14508,7 @@ TargetLowering::ConstraintWeight // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index b603678..342a5e6 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -175,8 +175,14 @@ namespace llvm { /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, - /// PBLENDVB - Variable blend - PBLENDVB, + /// BLEND family of opcodes + BLENDV, + + /// FHADD - Floating point horizontal add. + FHADD, + + /// FHSUB - Floating point horizontal sub. + FHSUB, /// FMAX, FMIN - Floating point max and min. /// @@ -222,6 +228,8 @@ namespace llvm { ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, + ANDN, // ANDN - Bitwise AND NOT with FLAGS results. + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -257,12 +265,12 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPS, - VUNPCKLPD, VUNPCKLPSY, VUNPCKLPDY, UNPCKHPS, UNPCKHPD, + VUNPCKHPSY, + VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, @@ -271,6 +279,12 @@ namespace llvm { PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPERMILPS, + VPERMILPSY, + VPERMILPD, + VPERMILPDY, + VPERM2F128, + VBROADCAST, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded @@ -280,6 +294,11 @@ namespace llvm { // WIN_ALLOCA - Windows's _chkstk call to do stack probing. WIN_ALLOCA, + // SEG_ALLOCA - For allocating variable amounts of stack space when using + // segmented stacks. Check if the current stacklet has enough space, and + // falls back to heap allocation if not. + SEG_ALLOCA, + // Memory barrier MEMBARRIER, MFENCE, @@ -297,9 +316,10 @@ namespace llvm { ATOMNAND64_DAG, ATOMSWAP64_DAG, - // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. + // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. LCMPXCHG_DAG, LCMPXCHG8_DAG, + LCMPXCHG16_DAG, // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -407,20 +427,16 @@ namespace llvm { /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(ShuffleVectorSDNode *N); + bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(ShuffleVectorSDNode *N); + bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool isMOVDDUPMask(ShuffleVectorSDNode *N); - /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to PALIGNR. - bool isPALIGNRMask(ShuffleVectorSDNode *N); - /// isVEXTRACTF128Index - Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for input to VEXTRACTF128. @@ -505,7 +521,7 @@ namespace llvm { /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at /// 4-byte boundaries. - virtual unsigned getByValTypeAlignment(const Type *Ty) const; + virtual unsigned getByValTypeAlignment(Type *Ty) const; /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove @@ -564,8 +580,8 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + /// getSetCCResultType - Return the value type to use for ISD::SETCC. + virtual EVT getSetCCResultType(EVT VT) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -617,12 +633,12 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. - virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a @@ -633,7 +649,7 @@ namespace llvm { /// does not necessarily apply to truncate instructions. e.g. on x86-64, /// all instructions that define 32-bit values implicit zero-extend the /// result out to 64 bits. - virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; + virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; /// isNarrowingProfitable - Return true if it's profitable to narrow @@ -813,11 +829,14 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; @@ -825,6 +844,7 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE @@ -931,6 +951,10 @@ namespace llvm { MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + bool Is64Bit) const; + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td index 9f7a4b0..74b647a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -650,6 +650,15 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> let isCodeGenOnly = 1; } +// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). +class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> + : ITy<opcode, MRMSrcReg, typeinfo, (outs), + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", []> { + // The disassembler should know about this, but not the asmparser. + let isCodeGenOnly = 1; +} + // BinOpRM - Instructions like "add reg, reg, [mem]". class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> @@ -857,11 +866,10 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, // BinOpAI - Instructions like "add %eax, %eax, imm". class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg> + Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, !strconcat("{$src, %", areg.AsmName, "|%", - areg.AsmName, ", $src}"), []> { + mnemonic, operands, []> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -926,10 +934,14 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -993,10 +1005,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -1017,10 +1033,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; - def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; - def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; - def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; @@ -1056,10 +1072,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; } } @@ -1117,9 +1137,37 @@ let Defs = [EFLAGS] in { def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; - def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>; - def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>; - def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>; - def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>; -} + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL, + "{$src, %al|AL, $src}">; + def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX, + "{$src, %ax|AX, $src}">; + def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX, + "{$src, %rax|RAX, $src}">; + + // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the + // register class is constrained to GR8_NOREX. + let isPseudo = 1 in + def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), + "", []>; +} +//===----------------------------------------------------------------------===// +// ANDN Instruction +// +multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, + PatFrag ld_frag> { + def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>; + def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, EFLAGS, + (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>; +} + +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index adcc747..da28690 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -106,6 +106,26 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), "# dynamic stack allocation", [(X86WinAlloca)]>; + +// When using segmented stacks these are lowered into instructions which first +// check if the current stacklet has enough free memory. If it does, memory is +// allocated by bumping the stack pointer. Otherwise memory is allocated from +// the heap. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in +def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), + "# variable sized alloca for segmented stacks", + [(set GR32:$dst, + (X86SegAlloca GR32:$size))]>, + Requires<[In32BitMode]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in +def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), + "# variable sized alloca for segmented stacks", + [(set GR64:$dst, + (X86SegAlloca GR64:$size))]>, + Requires<[In64BitMode]>; + } @@ -329,18 +349,11 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions -let Constraints = "$src1 = $dst" in { - -// Conditional moves -let Uses = [EFLAGS] in { - // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional -// i8 register pressure. Note that CMOV_GR8 is conservatively considered to -// clobber EFLAGS, because if one of the operands is zero, the expansion -// could involve an xor. -let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { +// i8 register pressure. +let usesCustomInserter = 1, Uses = [EFLAGS] in { def CMOV_GR8 : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), "#CMOV_GR8 PSEUDO!", @@ -380,10 +393,7 @@ def CMOV_RFP80 : I<0, Pseudo, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; } // Predicates = [NoCMov] -} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] -} // Uses = [EFLAGS] - -} // Constraints = "$src1 = $dst" in +} // UsesCustomInserter = 1, Uses = [EFLAGS] //===----------------------------------------------------------------------===// @@ -532,7 +542,7 @@ def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", - [(X86MemBarrier)]>, Requires<[HasSSE2]>; + [(X86MemBarrier)]>; // TODO: Get this to fold the constant into the instruction. let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in @@ -630,8 +640,8 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; -defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">; -defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">; +defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; +defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { @@ -665,12 +675,20 @@ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), // Atomic compare and swap. let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - isCodeGenOnly = 1 in { + isCodeGenOnly = 1 in def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; -} + +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], + isCodeGenOnly = 1 in +def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), + "lock\n\t" + "cmpxchg16b\t$ptr", + [(X86cas16 addr:$ptr)]>, TB, LOCK, + Requires<[HasCmpxchg16b]>; + let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), "lock\n\t" @@ -695,7 +713,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" - "cmpxchgq\t$swap,$ptr", + "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; } @@ -718,11 +736,37 @@ def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), TB, LOCK; def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" - "xadd\t$val, $ptr", + "xadd{q}\t{$val, $ptr|$ptr, $val}", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; } +def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR8:$dst, (atomic_load_8 addr:$src))]>; +def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR16:$dst, (atomic_load_16 addr:$src))]>; +def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR32:$dst, (atomic_load_32 addr:$src))]>; +def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), + "#ACQUIRE_MOV PSEUDO!", + [(set GR64:$dst, (atomic_load_64 addr:$src))]>; + +def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_8 addr:$dst, GR8 :$src)]>; +def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_16 addr:$dst, GR16:$src)]>; +def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_32 addr:$dst, GR32:$src)]>; +def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), + "#RELEASE_MOV PSEUDO!", + [(atomic_store_64 addr:$dst, GR64:$src)]>; + //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions. //===----------------------------------------------------------------------===// @@ -759,6 +803,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { [(set VR128:$dst, (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)))]>; + def CMOV_V8F32 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V8F32 PSEUDO!", + [(set VR256:$dst, + (v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V4F64 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V4F64 PSEUDO!", + [(set VR256:$dst, + (v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V4I64 : I<0, Pseudo, + (outs VR256:$dst), (ins VR256:$t, VR256:$f, i8imm:$cond), + "#CMOV_V4I64 PSEUDO!", + [(set VR256:$dst, + (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond, + EFLAGS)))]>; } diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td index 2e1d523..e62e6b7 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td +++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td @@ -76,12 +76,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, - (outs GR32_NOREX:$dst), (ins GR8:$src), + (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, - (outs GR32_NOREX:$dst), (ins i8mem:$src), + (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", []>, TB; diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index 6d89bcc..0a1590b 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -113,6 +113,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } +class VEX_LIG { bit ignoresVEX_L = 1; } class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, @@ -150,6 +151,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register // to be encoded in a immediate field? bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? + bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? // TSFlags layout should be kept in sync with X86InstrInfo.h. @@ -169,7 +171,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{35} = hasVEX_4VPrefix; let TSFlags{36} = hasVEX_i8ImmReg; let TSFlags{37} = hasVEX_L; - let TSFlags{38} = has3DNow0F0FOpcode; + let TSFlags{38} = ignoresVEX_L; + let TSFlags{39} = has3DNow0F0FOpcode; } class PseudoI<dag oops, dag iops, list<dag> pattern> @@ -501,6 +504,9 @@ class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : PDI<o, F, outs, ins, asm, pattern>, REX_W; +class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : VPDI<o, F, outs, ins, asm, pattern>, VEX_W; // MMX Instruction templates // diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b00109c..af919fb 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -39,6 +39,8 @@ def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; +def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; +def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; @@ -49,18 +51,15 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignb : SDNode<"X86ISD::PSIGNB", +def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignw : SDNode<"X86ISD::PSIGNW", +def X86psignw : SDNode<"X86ISD::PSIGNW", SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignd : SDNode<"X86ISD::PSIGND", +def X86psignd : SDNode<"X86ISD::PSIGND", SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pblendv : SDNode<"X86ISD::PBLENDVB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", @@ -109,6 +108,8 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; + def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; @@ -133,12 +134,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; +def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; + +def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; +def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; +def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; +def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; @@ -150,6 +154,15 @@ def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; +def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; +def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; +def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>; + +def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; + +def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -193,17 +206,28 @@ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; -// Like 'store', but always requires vector alignment. +// Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast<StoreSDNode>(N)->getAlignment() >= 16; }]>; -// Like 'load', but always requires vector alignment. +// Like 'store', but always requires 256-bit vector alignment. +def alignedstore256 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 32; +}]>; + +// Like 'load', but always requires 128-bit vector alignment. def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; +// Like 'load', but always requires 256-bit vector alignment. +def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 32; +}]>; + def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), @@ -221,13 +245,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // 256-bit aligned load pattern fragments def alignedloadv8f32 : PatFrag<(ops node:$ptr), - (v8f32 (alignedload node:$ptr))>; + (v8f32 (alignedload256 node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), - (v4f64 (alignedload node:$ptr))>; + (v4f64 (alignedload256 node:$ptr))>; def alignedloadv8i32 : PatFrag<(ops node:$ptr), - (v8i32 (alignedload node:$ptr))>; + (v8i32 (alignedload256 node:$ptr))>; def alignedloadv4i64 : PatFrag<(ops node:$ptr), - (v4i64 (alignedload node:$ptr))>; + (v4i64 (alignedload256 node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to @@ -356,7 +380,7 @@ def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N)); }]>; -// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to +// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to // VINSERTF128 imm. def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{ return getI8Imm(X86::getInsertVINSERTF128Immediate(N)); @@ -398,16 +422,6 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); }]>; -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); @@ -418,16 +432,6 @@ def unpckh : PatFrag<(ops node:$lhs, node:$rhs), return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); }]>; -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - def pshufd : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); @@ -448,11 +452,6 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_pshuflw_imm>; -def palign : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_palign_imm>; - def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index), (extract_subvector node:$bigvec, node:$index), [{ @@ -465,3 +464,4 @@ def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec, node:$index), [{ return X86::isVINSERTF128Index(N); }], INSERT_get_vinsertf128_imm>; + diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 55b5835..3a02de0 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -53,6 +53,36 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden); +enum { + // Select which memory operand is being unfolded. + // (stored in bits 0 - 7) + TB_INDEX_0 = 0, + TB_INDEX_1 = 1, + TB_INDEX_2 = 2, + TB_INDEX_MASK = 0xff, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + + // Do not insert the reverse map (MemOp -> RegOp) into the table. + // This may be needed because there is a many -> one mapping. + TB_NO_REVERSE = 1 << 16, + + // Do not insert the forward map (RegOp -> MemOp) into the table. + // This is needed for Native Client, which prohibits branch + // instructions from using a memory operand. + TB_NO_FORWARD = 1 << 17, + + TB_FOLDED_LOAD = 1 << 18, + TB_FOLDED_STORE = 1 << 19 +}; + X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() ? X86::ADJCALLSTACKDOWN64 @@ -61,655 +91,829 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { - enum { - TB_NOT_REVERSABLE = 1U << 31, - TB_FLAGS = TB_NOT_REVERSABLE - }; - static const unsigned OpTbl2Addr[][2] = { - { X86::ADC32ri, X86::ADC32mi }, - { X86::ADC32ri8, X86::ADC32mi8 }, - { X86::ADC32rr, X86::ADC32mr }, - { X86::ADC64ri32, X86::ADC64mi32 }, - { X86::ADC64ri8, X86::ADC64mi8 }, - { X86::ADC64rr, X86::ADC64mr }, - { X86::ADD16ri, X86::ADD16mi }, - { X86::ADD16ri8, X86::ADD16mi8 }, - { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, - { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, - { X86::ADD16rr, X86::ADD16mr }, - { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, - { X86::ADD32ri, X86::ADD32mi }, - { X86::ADD32ri8, X86::ADD32mi8 }, - { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, - { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, - { X86::ADD32rr, X86::ADD32mr }, - { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, - { X86::ADD64ri32, X86::ADD64mi32 }, - { X86::ADD64ri8, X86::ADD64mi8 }, - { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, - { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, - { X86::ADD64rr, X86::ADD64mr }, - { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, - { X86::ADD8ri, X86::ADD8mi }, - { X86::ADD8rr, X86::ADD8mr }, - { X86::AND16ri, X86::AND16mi }, - { X86::AND16ri8, X86::AND16mi8 }, - { X86::AND16rr, X86::AND16mr }, - { X86::AND32ri, X86::AND32mi }, - { X86::AND32ri8, X86::AND32mi8 }, - { X86::AND32rr, X86::AND32mr }, - { X86::AND64ri32, X86::AND64mi32 }, - { X86::AND64ri8, X86::AND64mi8 }, - { X86::AND64rr, X86::AND64mr }, - { X86::AND8ri, X86::AND8mi }, - { X86::AND8rr, X86::AND8mr }, - { X86::DEC16r, X86::DEC16m }, - { X86::DEC32r, X86::DEC32m }, - { X86::DEC64_16r, X86::DEC64_16m }, - { X86::DEC64_32r, X86::DEC64_32m }, - { X86::DEC64r, X86::DEC64m }, - { X86::DEC8r, X86::DEC8m }, - { X86::INC16r, X86::INC16m }, - { X86::INC32r, X86::INC32m }, - { X86::INC64_16r, X86::INC64_16m }, - { X86::INC64_32r, X86::INC64_32m }, - { X86::INC64r, X86::INC64m }, - { X86::INC8r, X86::INC8m }, - { X86::NEG16r, X86::NEG16m }, - { X86::NEG32r, X86::NEG32m }, - { X86::NEG64r, X86::NEG64m }, - { X86::NEG8r, X86::NEG8m }, - { X86::NOT16r, X86::NOT16m }, - { X86::NOT32r, X86::NOT32m }, - { X86::NOT64r, X86::NOT64m }, - { X86::NOT8r, X86::NOT8m }, - { X86::OR16ri, X86::OR16mi }, - { X86::OR16ri8, X86::OR16mi8 }, - { X86::OR16rr, X86::OR16mr }, - { X86::OR32ri, X86::OR32mi }, - { X86::OR32ri8, X86::OR32mi8 }, - { X86::OR32rr, X86::OR32mr }, - { X86::OR64ri32, X86::OR64mi32 }, - { X86::OR64ri8, X86::OR64mi8 }, - { X86::OR64rr, X86::OR64mr }, - { X86::OR8ri, X86::OR8mi }, - { X86::OR8rr, X86::OR8mr }, - { X86::ROL16r1, X86::ROL16m1 }, - { X86::ROL16rCL, X86::ROL16mCL }, - { X86::ROL16ri, X86::ROL16mi }, - { X86::ROL32r1, X86::ROL32m1 }, - { X86::ROL32rCL, X86::ROL32mCL }, - { X86::ROL32ri, X86::ROL32mi }, - { X86::ROL64r1, X86::ROL64m1 }, - { X86::ROL64rCL, X86::ROL64mCL }, - { X86::ROL64ri, X86::ROL64mi }, - { X86::ROL8r1, X86::ROL8m1 }, - { X86::ROL8rCL, X86::ROL8mCL }, - { X86::ROL8ri, X86::ROL8mi }, - { X86::ROR16r1, X86::ROR16m1 }, - { X86::ROR16rCL, X86::ROR16mCL }, - { X86::ROR16ri, X86::ROR16mi }, - { X86::ROR32r1, X86::ROR32m1 }, - { X86::ROR32rCL, X86::ROR32mCL }, - { X86::ROR32ri, X86::ROR32mi }, - { X86::ROR64r1, X86::ROR64m1 }, - { X86::ROR64rCL, X86::ROR64mCL }, - { X86::ROR64ri, X86::ROR64mi }, - { X86::ROR8r1, X86::ROR8m1 }, - { X86::ROR8rCL, X86::ROR8mCL }, - { X86::ROR8ri, X86::ROR8mi }, - { X86::SAR16r1, X86::SAR16m1 }, - { X86::SAR16rCL, X86::SAR16mCL }, - { X86::SAR16ri, X86::SAR16mi }, - { X86::SAR32r1, X86::SAR32m1 }, - { X86::SAR32rCL, X86::SAR32mCL }, - { X86::SAR32ri, X86::SAR32mi }, - { X86::SAR64r1, X86::SAR64m1 }, - { X86::SAR64rCL, X86::SAR64mCL }, - { X86::SAR64ri, X86::SAR64mi }, - { X86::SAR8r1, X86::SAR8m1 }, - { X86::SAR8rCL, X86::SAR8mCL }, - { X86::SAR8ri, X86::SAR8mi }, - { X86::SBB32ri, X86::SBB32mi }, - { X86::SBB32ri8, X86::SBB32mi8 }, - { X86::SBB32rr, X86::SBB32mr }, - { X86::SBB64ri32, X86::SBB64mi32 }, - { X86::SBB64ri8, X86::SBB64mi8 }, - { X86::SBB64rr, X86::SBB64mr }, - { X86::SHL16rCL, X86::SHL16mCL }, - { X86::SHL16ri, X86::SHL16mi }, - { X86::SHL32rCL, X86::SHL32mCL }, - { X86::SHL32ri, X86::SHL32mi }, - { X86::SHL64rCL, X86::SHL64mCL }, - { X86::SHL64ri, X86::SHL64mi }, - { X86::SHL8rCL, X86::SHL8mCL }, - { X86::SHL8ri, X86::SHL8mi }, - { X86::SHLD16rrCL, X86::SHLD16mrCL }, - { X86::SHLD16rri8, X86::SHLD16mri8 }, - { X86::SHLD32rrCL, X86::SHLD32mrCL }, - { X86::SHLD32rri8, X86::SHLD32mri8 }, - { X86::SHLD64rrCL, X86::SHLD64mrCL }, - { X86::SHLD64rri8, X86::SHLD64mri8 }, - { X86::SHR16r1, X86::SHR16m1 }, - { X86::SHR16rCL, X86::SHR16mCL }, - { X86::SHR16ri, X86::SHR16mi }, - { X86::SHR32r1, X86::SHR32m1 }, - { X86::SHR32rCL, X86::SHR32mCL }, - { X86::SHR32ri, X86::SHR32mi }, - { X86::SHR64r1, X86::SHR64m1 }, - { X86::SHR64rCL, X86::SHR64mCL }, - { X86::SHR64ri, X86::SHR64mi }, - { X86::SHR8r1, X86::SHR8m1 }, - { X86::SHR8rCL, X86::SHR8mCL }, - { X86::SHR8ri, X86::SHR8mi }, - { X86::SHRD16rrCL, X86::SHRD16mrCL }, - { X86::SHRD16rri8, X86::SHRD16mri8 }, - { X86::SHRD32rrCL, X86::SHRD32mrCL }, - { X86::SHRD32rri8, X86::SHRD32mri8 }, - { X86::SHRD64rrCL, X86::SHRD64mrCL }, - { X86::SHRD64rri8, X86::SHRD64mri8 }, - { X86::SUB16ri, X86::SUB16mi }, - { X86::SUB16ri8, X86::SUB16mi8 }, - { X86::SUB16rr, X86::SUB16mr }, - { X86::SUB32ri, X86::SUB32mi }, - { X86::SUB32ri8, X86::SUB32mi8 }, - { X86::SUB32rr, X86::SUB32mr }, - { X86::SUB64ri32, X86::SUB64mi32 }, - { X86::SUB64ri8, X86::SUB64mi8 }, - { X86::SUB64rr, X86::SUB64mr }, - { X86::SUB8ri, X86::SUB8mi }, - { X86::SUB8rr, X86::SUB8mr }, - { X86::XOR16ri, X86::XOR16mi }, - { X86::XOR16ri8, X86::XOR16mi8 }, - { X86::XOR16rr, X86::XOR16mr }, - { X86::XOR32ri, X86::XOR32mi }, - { X86::XOR32ri8, X86::XOR32mi8 }, - { X86::XOR32rr, X86::XOR32mr }, - { X86::XOR64ri32, X86::XOR64mi32 }, - { X86::XOR64ri8, X86::XOR64mi8 }, - { X86::XOR64rr, X86::XOR64mr }, - { X86::XOR8ri, X86::XOR8mi }, - { X86::XOR8rr, X86::XOR8mr } + static const unsigned OpTbl2Addr[][3] = { + { X86::ADC32ri, X86::ADC32mi, 0 }, + { X86::ADC32ri8, X86::ADC32mi8, 0 }, + { X86::ADC32rr, X86::ADC32mr, 0 }, + { X86::ADC64ri32, X86::ADC64mi32, 0 }, + { X86::ADC64ri8, X86::ADC64mi8, 0 }, + { X86::ADC64rr, X86::ADC64mr, 0 }, + { X86::ADD16ri, X86::ADD16mi, 0 }, + { X86::ADD16ri8, X86::ADD16mi8, 0 }, + { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, + { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, + { X86::ADD16rr, X86::ADD16mr, 0 }, + { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, + { X86::ADD32ri, X86::ADD32mi, 0 }, + { X86::ADD32ri8, X86::ADD32mi8, 0 }, + { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, + { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32mr, 0 }, + { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, + { X86::ADD64ri32, X86::ADD64mi32, 0 }, + { X86::ADD64ri8, X86::ADD64mi8, 0 }, + { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, + { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64mr, 0 }, + { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, + { X86::ADD8ri, X86::ADD8mi, 0 }, + { X86::ADD8rr, X86::ADD8mr, 0 }, + { X86::AND16ri, X86::AND16mi, 0 }, + { X86::AND16ri8, X86::AND16mi8, 0 }, + { X86::AND16rr, X86::AND16mr, 0 }, + { X86::AND32ri, X86::AND32mi, 0 }, + { X86::AND32ri8, X86::AND32mi8, 0 }, + { X86::AND32rr, X86::AND32mr, 0 }, + { X86::AND64ri32, X86::AND64mi32, 0 }, + { X86::AND64ri8, X86::AND64mi8, 0 }, + { X86::AND64rr, X86::AND64mr, 0 }, + { X86::AND8ri, X86::AND8mi, 0 }, + { X86::AND8rr, X86::AND8mr, 0 }, + { X86::DEC16r, X86::DEC16m, 0 }, + { X86::DEC32r, X86::DEC32m, 0 }, + { X86::DEC64_16r, X86::DEC64_16m, 0 }, + { X86::DEC64_32r, X86::DEC64_32m, 0 }, + { X86::DEC64r, X86::DEC64m, 0 }, + { X86::DEC8r, X86::DEC8m, 0 }, + { X86::INC16r, X86::INC16m, 0 }, + { X86::INC32r, X86::INC32m, 0 }, + { X86::INC64_16r, X86::INC64_16m, 0 }, + { X86::INC64_32r, X86::INC64_32m, 0 }, + { X86::INC64r, X86::INC64m, 0 }, + { X86::INC8r, X86::INC8m, 0 }, + { X86::NEG16r, X86::NEG16m, 0 }, + { X86::NEG32r, X86::NEG32m, 0 }, + { X86::NEG64r, X86::NEG64m, 0 }, + { X86::NEG8r, X86::NEG8m, 0 }, + { X86::NOT16r, X86::NOT16m, 0 }, + { X86::NOT32r, X86::NOT32m, 0 }, + { X86::NOT64r, X86::NOT64m, 0 }, + { X86::NOT8r, X86::NOT8m, 0 }, + { X86::OR16ri, X86::OR16mi, 0 }, + { X86::OR16ri8, X86::OR16mi8, 0 }, + { X86::OR16rr, X86::OR16mr, 0 }, + { X86::OR32ri, X86::OR32mi, 0 }, + { X86::OR32ri8, X86::OR32mi8, 0 }, + { X86::OR32rr, X86::OR32mr, 0 }, + { X86::OR64ri32, X86::OR64mi32, 0 }, + { X86::OR64ri8, X86::OR64mi8, 0 }, + { X86::OR64rr, X86::OR64mr, 0 }, + { X86::OR8ri, X86::OR8mi, 0 }, + { X86::OR8rr, X86::OR8mr, 0 }, + { X86::ROL16r1, X86::ROL16m1, 0 }, + { X86::ROL16rCL, X86::ROL16mCL, 0 }, + { X86::ROL16ri, X86::ROL16mi, 0 }, + { X86::ROL32r1, X86::ROL32m1, 0 }, + { X86::ROL32rCL, X86::ROL32mCL, 0 }, + { X86::ROL32ri, X86::ROL32mi, 0 }, + { X86::ROL64r1, X86::ROL64m1, 0 }, + { X86::ROL64rCL, X86::ROL64mCL, 0 }, + { X86::ROL64ri, X86::ROL64mi, 0 }, + { X86::ROL8r1, X86::ROL8m1, 0 }, + { X86::ROL8rCL, X86::ROL8mCL, 0 }, + { X86::ROL8ri, X86::ROL8mi, 0 }, + { X86::ROR16r1, X86::ROR16m1, 0 }, + { X86::ROR16rCL, X86::ROR16mCL, 0 }, + { X86::ROR16ri, X86::ROR16mi, 0 }, + { X86::ROR32r1, X86::ROR32m1, 0 }, + { X86::ROR32rCL, X86::ROR32mCL, 0 }, + { X86::ROR32ri, X86::ROR32mi, 0 }, + { X86::ROR64r1, X86::ROR64m1, 0 }, + { X86::ROR64rCL, X86::ROR64mCL, 0 }, + { X86::ROR64ri, X86::ROR64mi, 0 }, + { X86::ROR8r1, X86::ROR8m1, 0 }, + { X86::ROR8rCL, X86::ROR8mCL, 0 }, + { X86::ROR8ri, X86::ROR8mi, 0 }, + { X86::SAR16r1, X86::SAR16m1, 0 }, + { X86::SAR16rCL, X86::SAR16mCL, 0 }, + { X86::SAR16ri, X86::SAR16mi, 0 }, + { X86::SAR32r1, X86::SAR32m1, 0 }, + { X86::SAR32rCL, X86::SAR32mCL, 0 }, + { X86::SAR32ri, X86::SAR32mi, 0 }, + { X86::SAR64r1, X86::SAR64m1, 0 }, + { X86::SAR64rCL, X86::SAR64mCL, 0 }, + { X86::SAR64ri, X86::SAR64mi, 0 }, + { X86::SAR8r1, X86::SAR8m1, 0 }, + { X86::SAR8rCL, X86::SAR8mCL, 0 }, + { X86::SAR8ri, X86::SAR8mi, 0 }, + { X86::SBB32ri, X86::SBB32mi, 0 }, + { X86::SBB32ri8, X86::SBB32mi8, 0 }, + { X86::SBB32rr, X86::SBB32mr, 0 }, + { X86::SBB64ri32, X86::SBB64mi32, 0 }, + { X86::SBB64ri8, X86::SBB64mi8, 0 }, + { X86::SBB64rr, X86::SBB64mr, 0 }, + { X86::SHL16rCL, X86::SHL16mCL, 0 }, + { X86::SHL16ri, X86::SHL16mi, 0 }, + { X86::SHL32rCL, X86::SHL32mCL, 0 }, + { X86::SHL32ri, X86::SHL32mi, 0 }, + { X86::SHL64rCL, X86::SHL64mCL, 0 }, + { X86::SHL64ri, X86::SHL64mi, 0 }, + { X86::SHL8rCL, X86::SHL8mCL, 0 }, + { X86::SHL8ri, X86::SHL8mi, 0 }, + { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, + { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, + { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, + { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, + { X86::SHR16r1, X86::SHR16m1, 0 }, + { X86::SHR16rCL, X86::SHR16mCL, 0 }, + { X86::SHR16ri, X86::SHR16mi, 0 }, + { X86::SHR32r1, X86::SHR32m1, 0 }, + { X86::SHR32rCL, X86::SHR32mCL, 0 }, + { X86::SHR32ri, X86::SHR32mi, 0 }, + { X86::SHR64r1, X86::SHR64m1, 0 }, + { X86::SHR64rCL, X86::SHR64mCL, 0 }, + { X86::SHR64ri, X86::SHR64mi, 0 }, + { X86::SHR8r1, X86::SHR8m1, 0 }, + { X86::SHR8rCL, X86::SHR8mCL, 0 }, + { X86::SHR8ri, X86::SHR8mi, 0 }, + { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, + { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, + { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, + { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, + { X86::SUB16ri, X86::SUB16mi, 0 }, + { X86::SUB16ri8, X86::SUB16mi8, 0 }, + { X86::SUB16rr, X86::SUB16mr, 0 }, + { X86::SUB32ri, X86::SUB32mi, 0 }, + { X86::SUB32ri8, X86::SUB32mi8, 0 }, + { X86::SUB32rr, X86::SUB32mr, 0 }, + { X86::SUB64ri32, X86::SUB64mi32, 0 }, + { X86::SUB64ri8, X86::SUB64mi8, 0 }, + { X86::SUB64rr, X86::SUB64mr, 0 }, + { X86::SUB8ri, X86::SUB8mi, 0 }, + { X86::SUB8rr, X86::SUB8mr, 0 }, + { X86::XOR16ri, X86::XOR16mi, 0 }, + { X86::XOR16ri8, X86::XOR16mi8, 0 }, + { X86::XOR16rr, X86::XOR16mr, 0 }, + { X86::XOR32ri, X86::XOR32mi, 0 }, + { X86::XOR32ri8, X86::XOR32mi8, 0 }, + { X86::XOR32rr, X86::XOR32mr, 0 }, + { X86::XOR64ri32, X86::XOR64mi32, 0 }, + { X86::XOR64ri8, X86::XOR64mi8, 0 }, + { X86::XOR64rr, X86::XOR64mr, 0 }, + { X86::XOR8ri, X86::XOR8mi, 0 }, + { X86::XOR8rr, X86::XOR8mr, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; - assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); - RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 0, folded load and store, no alignment requirement. - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - - assert(!MemOp2RegOpTable.count(MemOp) && - "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); - } - - // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][4] = { - { X86::BT16ri8, X86::BT16mi8, 1, 0 }, - { X86::BT32ri8, X86::BT32mi8, 1, 0 }, - { X86::BT64ri8, X86::BT64mi8, 1, 0 }, - { X86::CALL32r, X86::CALL32m, 1, 0 }, - { X86::CALL64r, X86::CALL64m, 1, 0 }, - { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, - { X86::CMP16ri, X86::CMP16mi, 1, 0 }, - { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, - { X86::CMP16rr, X86::CMP16mr, 1, 0 }, - { X86::CMP32ri, X86::CMP32mi, 1, 0 }, - { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, - { X86::CMP32rr, X86::CMP32mr, 1, 0 }, - { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, - { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, - { X86::CMP64rr, X86::CMP64mr, 1, 0 }, - { X86::CMP8ri, X86::CMP8mi, 1, 0 }, - { X86::CMP8rr, X86::CMP8mr, 1, 0 }, - { X86::DIV16r, X86::DIV16m, 1, 0 }, - { X86::DIV32r, X86::DIV32m, 1, 0 }, - { X86::DIV64r, X86::DIV64m, 1, 0 }, - { X86::DIV8r, X86::DIV8m, 1, 0 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, - { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1, 0 }, - { X86::IDIV32r, X86::IDIV32m, 1, 0 }, - { X86::IDIV64r, X86::IDIV64m, 1, 0 }, - { X86::IDIV8r, X86::IDIV8m, 1, 0 }, - { X86::IMUL16r, X86::IMUL16m, 1, 0 }, - { X86::IMUL32r, X86::IMUL32m, 1, 0 }, - { X86::IMUL64r, X86::IMUL64m, 1, 0 }, - { X86::IMUL8r, X86::IMUL8m, 1, 0 }, - { X86::JMP32r, X86::JMP32m, 1, 0 }, - { X86::JMP64r, X86::JMP64m, 1, 0 }, - { X86::MOV16ri, X86::MOV16mi, 0, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0, 0 }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, - { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, - { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, - { X86::MUL16r, X86::MUL16m, 1, 0 }, - { X86::MUL32r, X86::MUL32m, 1, 0 }, - { X86::MUL64r, X86::MUL64m, 1, 0 }, - { X86::MUL8r, X86::MUL8m, 1, 0 }, - { X86::SETAEr, X86::SETAEm, 0, 0 }, - { X86::SETAr, X86::SETAm, 0, 0 }, - { X86::SETBEr, X86::SETBEm, 0, 0 }, - { X86::SETBr, X86::SETBm, 0, 0 }, - { X86::SETEr, X86::SETEm, 0, 0 }, - { X86::SETGEr, X86::SETGEm, 0, 0 }, - { X86::SETGr, X86::SETGm, 0, 0 }, - { X86::SETLEr, X86::SETLEm, 0, 0 }, - { X86::SETLr, X86::SETLm, 0, 0 }, - { X86::SETNEr, X86::SETNEm, 0, 0 }, - { X86::SETNOr, X86::SETNOm, 0, 0 }, - { X86::SETNPr, X86::SETNPm, 0, 0 }, - { X86::SETNSr, X86::SETNSm, 0, 0 }, - { X86::SETOr, X86::SETOm, 0, 0 }, - { X86::SETPr, X86::SETPm, 0, 0 }, - { X86::SETSr, X86::SETSm, 0, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, - { X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 }, - { X86::TEST16ri, X86::TEST16mi, 1, 0 }, - { X86::TEST32ri, X86::TEST32mi, 1, 0 }, - { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, - { X86::TEST8ri, X86::TEST8mi, 1, 0 } + unsigned MemOp = OpTbl2Addr[i][1]; + unsigned Flags = OpTbl2Addr[i][2]; + AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, + RegOp, MemOp, + // Index 0, folded load and store, no alignment requirement. + Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); + } + + static const unsigned OpTbl0[][3] = { + { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, + { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, + { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, + { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, + { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, + { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD }, + { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, + { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, + { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, + { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, + { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, + { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, + { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, + { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, + { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, + { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, + { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, + { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, + { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, + { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, + { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, + { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, + { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, + { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, + { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, + { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, + { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, + { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, + { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, + { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, + { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, + { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, + { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, + { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, + { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, + { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, + { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, + { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, + { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, + { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, + { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, + { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, + { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, + { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, + { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, + { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, + { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, + { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, + { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, + { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, + { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, + { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, + { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, + { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, + { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, + { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, + { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, + { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, + { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, + { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, + { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, + { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, + { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, + { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, + { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, + { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, + // AVX 128-bit versions of foldable instructions + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, + { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, + { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, + { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, + // AVX 256-bit foldable instructions + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; - unsigned FoldedLoad = OpTbl0[i][2]; - unsigned Align = OpTbl0[i][3]; - assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); - RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl0[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 0, folded load or store. - unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + unsigned MemOp = OpTbl0[i][1]; + unsigned Flags = OpTbl0[i][2]; + AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, + RegOp, MemOp, TB_INDEX_0 | Flags); } static const unsigned OpTbl1[][3] = { - { X86::CMP16rr, X86::CMP16rm, 0 }, - { X86::CMP32rr, X86::CMP32rm, 0 }, - { X86::CMP64rr, X86::CMP64rm, 0 }, - { X86::CMP8rr, X86::CMP8rm, 0 }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, - { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, - { X86::IMUL16rri, X86::IMUL16rmi, 0 }, - { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, - { X86::IMUL32rri, X86::IMUL32rmi, 0 }, - { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, - { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, - { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, - { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, - { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, - { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, - { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, - { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, - { X86::MOV16rr, X86::MOV16rm, 0 }, - { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV64rr, X86::MOV64rm, 0 }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, - { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, - { X86::MOV8rr, X86::MOV8rm, 0 }, - { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, - { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, - { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, - { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, - { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, - { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, - { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, - { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, - { X86::PSHUFDri, X86::PSHUFDmi, 16 }, - { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, - { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, - { X86::RCPPSr, X86::RCPPSm, 16 }, - { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, - { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, - { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, - { X86::SQRTPDr, X86::SQRTPDm, 16 }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, - { X86::SQRTPSr, X86::SQRTPSm, 16 }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, - { X86::SQRTSDr, X86::SQRTSDm, 0 }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, - { X86::SQRTSSr, X86::SQRTSSm, 0 }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, - { X86::TEST16rr, X86::TEST16rm, 0 }, - { X86::TEST32rr, X86::TEST32rm, 0 }, - { X86::TEST64rr, X86::TEST64rm, 0 }, - { X86::TEST8rr, X86::TEST8rm, 0 }, + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE }, + { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, + { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, TB_ALIGN_16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, TB_ALIGN_16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 }, + { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, - { X86::UCOMISSrr, X86::UCOMISSrm, 0 } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + // AVX 128-bit versions of foldable instructions + { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, + { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, + { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, + { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, + { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, + { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, + { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, + { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, + { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, + { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, + { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, + { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, + { X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 }, + { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, + { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, + { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 }, + { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 }, + { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, + { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, + { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, + { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, + { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 }, + { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 }, + { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 }, + { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 }, + { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 }, + { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, + { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, + // AVX 256-bit foldable instructions + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; - unsigned Align = OpTbl1[i][2]; - assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); - RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl1[i][1] & TB_NOT_REVERSABLE) - continue; - - // Index 1, folded load - unsigned AuxInfo = 1 | (1 << 4); - assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); + unsigned MemOp = OpTbl1[i][1]; + unsigned Flags = OpTbl1[i][2]; + AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, + RegOp, MemOp, + // Index 1, folded load + Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } static const unsigned OpTbl2[][3] = { - { X86::ADC32rr, X86::ADC32rm, 0 }, - { X86::ADC64rr, X86::ADC64rm, 0 }, - { X86::ADD16rr, X86::ADD16rm, 0 }, - { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD32rr, X86::ADD32rm, 0 }, - { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD64rr, X86::ADD64rm, 0 }, - { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, - { X86::ADD8rr, X86::ADD8rm, 0 }, - { X86::ADDPDrr, X86::ADDPDrm, 16 }, - { X86::ADDPSrr, X86::ADDPSrm, 16 }, - { X86::ADDSDrr, X86::ADDSDrm, 0 }, - { X86::ADDSSrr, X86::ADDSSrm, 0 }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, - { X86::AND16rr, X86::AND16rm, 0 }, - { X86::AND32rr, X86::AND32rm, 0 }, - { X86::AND64rr, X86::AND64rm, 0 }, - { X86::AND8rr, X86::AND8rm, 0 }, - { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, - { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, - { X86::ANDPDrr, X86::ANDPDrm, 16 }, - { X86::ANDPSrr, X86::ANDPSrm, 16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, - { X86::CMPPDrri, X86::CMPPDrmi, 16 }, - { X86::CMPPSrri, X86::CMPPSrmi, 16 }, - { X86::CMPSDrr, X86::CMPSDrm, 0 }, - { X86::CMPSSrr, X86::CMPSSrm, 0 }, - { X86::DIVPDrr, X86::DIVPDrm, 16 }, - { X86::DIVPSrr, X86::DIVPSrm, 16 }, - { X86::DIVSDrr, X86::DIVSDrm, 0 }, - { X86::DIVSSrr, X86::DIVSSrm, 0 }, - { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, - { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, - { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, - { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, - { X86::FsORPDrr, X86::FsORPDrm, 16 }, - { X86::FsORPSrr, X86::FsORPSrm, 16 }, - { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, - { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, - { X86::HADDPDrr, X86::HADDPDrm, 16 }, - { X86::HADDPSrr, X86::HADDPSrm, 16 }, - { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, - { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, - { X86::IMUL16rr, X86::IMUL16rm, 0 }, - { X86::IMUL32rr, X86::IMUL32rm, 0 }, - { X86::IMUL64rr, X86::IMUL64rm, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, - { X86::MAXPDrr, X86::MAXPDrm, 16 }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, - { X86::MAXPSrr, X86::MAXPSrm, 16 }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, - { X86::MAXSDrr, X86::MAXSDrm, 0 }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, - { X86::MAXSSrr, X86::MAXSSrm, 0 }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, - { X86::MINPDrr, X86::MINPDrm, 16 }, - { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, - { X86::MINPSrr, X86::MINPSrm, 16 }, - { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, - { X86::MINSDrr, X86::MINSDrm, 0 }, - { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, - { X86::MINSSrr, X86::MINSSrm, 0 }, - { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, - { X86::MULPDrr, X86::MULPDrm, 16 }, - { X86::MULPSrr, X86::MULPSrm, 16 }, - { X86::MULSDrr, X86::MULSDrm, 0 }, - { X86::MULSSrr, X86::MULSSrm, 0 }, - { X86::OR16rr, X86::OR16rm, 0 }, - { X86::OR32rr, X86::OR32rm, 0 }, - { X86::OR64rr, X86::OR64rm, 0 }, - { X86::OR8rr, X86::OR8rm, 0 }, - { X86::ORPDrr, X86::ORPDrm, 16 }, - { X86::ORPSrr, X86::ORPSrm, 16 }, - { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, - { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, - { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, - { X86::PADDBrr, X86::PADDBrm, 16 }, - { X86::PADDDrr, X86::PADDDrm, 16 }, - { X86::PADDQrr, X86::PADDQrm, 16 }, - { X86::PADDSBrr, X86::PADDSBrm, 16 }, - { X86::PADDSWrr, X86::PADDSWrm, 16 }, - { X86::PADDWrr, X86::PADDWrm, 16 }, - { X86::PANDNrr, X86::PANDNrm, 16 }, - { X86::PANDrr, X86::PANDrm, 16 }, - { X86::PAVGBrr, X86::PAVGBrm, 16 }, - { X86::PAVGWrr, X86::PAVGWrm, 16 }, - { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, - { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, - { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, - { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, - { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, - { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, - { X86::PINSRWrri, X86::PINSRWrmi, 16 }, - { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, - { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, - { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, - { X86::PMINSWrr, X86::PMINSWrm, 16 }, - { X86::PMINUBrr, X86::PMINUBrm, 16 }, - { X86::PMULDQrr, X86::PMULDQrm, 16 }, - { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, - { X86::PMULHWrr, X86::PMULHWrm, 16 }, - { X86::PMULLDrr, X86::PMULLDrm, 16 }, - { X86::PMULLWrr, X86::PMULLWrm, 16 }, - { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, - { X86::PORrr, X86::PORrm, 16 }, - { X86::PSADBWrr, X86::PSADBWrm, 16 }, - { X86::PSLLDrr, X86::PSLLDrm, 16 }, - { X86::PSLLQrr, X86::PSLLQrm, 16 }, - { X86::PSLLWrr, X86::PSLLWrm, 16 }, - { X86::PSRADrr, X86::PSRADrm, 16 }, - { X86::PSRAWrr, X86::PSRAWrm, 16 }, - { X86::PSRLDrr, X86::PSRLDrm, 16 }, - { X86::PSRLQrr, X86::PSRLQrm, 16 }, - { X86::PSRLWrr, X86::PSRLWrm, 16 }, - { X86::PSUBBrr, X86::PSUBBrm, 16 }, - { X86::PSUBDrr, X86::PSUBDrm, 16 }, - { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, - { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, - { X86::PSUBWrr, X86::PSUBWrm, 16 }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, - { X86::PXORrr, X86::PXORrm, 16 }, - { X86::SBB32rr, X86::SBB32rm, 0 }, - { X86::SBB64rr, X86::SBB64rm, 0 }, - { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, - { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, - { X86::SUB16rr, X86::SUB16rm, 0 }, - { X86::SUB32rr, X86::SUB32rm, 0 }, - { X86::SUB64rr, X86::SUB64rm, 0 }, - { X86::SUB8rr, X86::SUB8rm, 0 }, - { X86::SUBPDrr, X86::SUBPDrm, 16 }, - { X86::SUBPSrr, X86::SUBPSrm, 16 }, - { X86::SUBSDrr, X86::SUBSDrm, 0 }, - { X86::SUBSSrr, X86::SUBSSrm, 0 }, + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, + { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, + { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, + { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, + { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, + { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 }, + { X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 }, + { X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 }, + { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, + { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, + { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 }, + { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, + { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, + { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, + { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, + { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, + { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, + { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, + { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, + { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, + { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, + { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, + { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, + { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, + { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, + { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, + { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, + { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, + { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, + { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, + { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, + { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, + { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, + { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, + { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, + { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, + { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, + { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, + { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, + { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, + { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, - { X86::XOR16rr, X86::XOR16rm, 0 }, - { X86::XOR32rr, X86::XOR32rm, 0 }, - { X86::XOR64rr, X86::XOR64rm, 0 }, - { X86::XOR8rr, X86::XOR8rm, 0 }, - { X86::XORPDrr, X86::XORPDrm, 16 }, - { X86::XORPSrr, X86::XORPSrm, 16 } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, + { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + // AVX 128-bit versions of foldable instructions + { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, + { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 }, + { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, + { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, + { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, + { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, + { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, + { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, + { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, + { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, + { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, + { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, + { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, + { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, + { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, + { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 }, + { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 }, + { X86::VADDSDrr, X86::VADDSDrm, 0 }, + { X86::VADDSSrr, X86::VADDSSrm, 0 }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 }, + { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 }, + { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 }, + { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 }, + { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 }, + { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, + { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, + { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 }, + { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 }, + { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, + { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, + { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 }, + { X86::VFsANDNPSrr, X86::VFsANDNPSrm, TB_ALIGN_16 }, + { X86::VFsANDPDrr, X86::VFsANDPDrm, TB_ALIGN_16 }, + { X86::VFsANDPSrr, X86::VFsANDPSrm, TB_ALIGN_16 }, + { X86::VFsORPDrr, X86::VFsORPDrm, TB_ALIGN_16 }, + { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 }, + { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 }, + { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 }, + { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 }, + { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 }, + { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, + { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, + { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 }, + { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 }, + { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 }, + { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 }, + { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, + { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, + { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, + { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, + { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 }, + { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 }, + { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 }, + { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 }, + { X86::VMINSDrr, X86::VMINSDrm, 0 }, + { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, + { X86::VMINSSrr, X86::VMINSSrm, 0 }, + { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, + { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, + { X86::VMULSDrr, X86::VMULSDrm, 0 }, + { X86::VMULSSrr, X86::VMULSSrm, 0 }, + { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 }, + { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, + { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, + { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, + { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, + { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, + { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, + { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, + { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, + { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, + { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, + { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, + { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, + { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, + { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, + { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, + { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, + { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, + { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, + { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, + { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, + { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 }, + { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 }, + { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 }, + { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 }, + { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 }, + { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 }, + { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 }, + { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 }, + { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 }, + { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 }, + { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 }, + { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, + { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, + { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, + { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 } + // FIXME: add AVX 256-bit foldable instructions }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; - unsigned Align = OpTbl2[i][2]; - - assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); - RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); - - // If this is not a reversible operation (because there is a many->one) - // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. - if (OpTbl2[i][1] & TB_NOT_REVERSABLE) - continue; + unsigned MemOp = OpTbl2[i][1]; + unsigned Flags = OpTbl2[i][2]; + AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, + RegOp, MemOp, + // Index 2, folded load + Flags | TB_INDEX_2 | TB_FOLDED_LOAD); + } +} - // Index 2, folded load - unsigned AuxInfo = 2 | (1 << 4); - assert(!MemOp2RegOpTable.count(MemOp) && +void +X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags) { + if ((Flags & TB_NO_FORWARD) == 0) { + assert(!R2MTable.count(RegOp) && "Duplicate entry!"); + R2MTable[RegOp] = std::make_pair(MemOp, Flags); + } + if ((Flags & TB_NO_REVERSE) == 0) { + assert(!M2RTable.count(MemOp) && "Duplicated entries in unfolding maps?"); - MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); - } + M2RTable[MemOp] = std::make_pair(RegOp, Flags); + } } bool @@ -796,6 +1000,11 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOVAPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: case X86::VMOVAPSYrm: case X86::VMOVAPDYrm: case X86::VMOVDQAYrm: @@ -820,6 +1029,11 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOVAPSmr: case X86::MOVAPDmr: case X86::MOVDQAmr: + case X86::VMOVSSmr: + case X86::VMOVSDmr: + case X86::VMOVAPSmr: + case X86::VMOVAPDmr: + case X86::VMOVDQAmr: case X86::VMOVAPSYmr: case X86::VMOVAPDYmr: case X86::VMOVDQAYmr: @@ -852,24 +1066,6 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return 0; } -bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isLoad() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) @@ -892,24 +1088,6 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, return 0; } -bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isStore() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -941,12 +1119,20 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: case X86::FsMOVAPSrm: case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. @@ -1009,15 +1195,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator E = MBB.end(); - // It's always safe to clobber EFLAGS at the end of a block. - if (I == E) - return true; - // For compile time consideration, if we are not able to determine the // safety after visiting 4 instructions in each direction, we will assume // it's not safe. MachineBasicBlock::iterator Iter = I; - for (unsigned i = 0; i < 4; ++i) { + for (unsigned i = 0; Iter != E && i < 4; ++i) { bool SeenDef = false; for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { MachineOperand &MO = Iter->getOperand(j); @@ -1037,10 +1219,16 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, // Skip over DBG_VALUE. while (Iter != E && Iter->isDebugValue()) ++Iter; + } - // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (Iter == E) - return true; + // It is safe to clobber EFLAGS at the end of a block of no successor has it + // live in. + if (Iter == E) { + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(X86::EFLAGS)) + return false; + return true; } MachineBasicBlock::iterator B = MBB.begin(); @@ -1946,7 +2134,8 @@ static bool isHReg(unsigned Reg) { } // Try and copy between VR128/VR64 and GR64 registers. -static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, + bool HasAVX) { // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) // SrcReg(GR64) -> DestReg(VR128) @@ -1955,7 +2144,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { if (X86::GR64RegClass.contains(DestReg)) { if (X86::VR128RegClass.contains(SrcReg)) { // Copy from a VR128 register to a GR64 register. - return X86::MOVPQIto64rr; + return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; } else if (X86::VR64RegClass.contains(SrcReg)) { // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; @@ -1963,12 +2152,23 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) - return X86::MOV64toPQIrr; + return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. else if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } + // SrcReg(FR32) -> DestReg(GR32) + // SrcReg(GR32) -> DestReg(FR32) + + if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + + if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + return 0; } @@ -1977,6 +2177,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -1988,18 +2189,21 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) + TM.getSubtarget<X86Subtarget>().is64Bit()) { Opc = X86::MOV8rr_NOREX; - else + // Both operands must be encodable without an REX prefix. + assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && + "8-bit H register can not be copied outside GR8_NOREX"); + } else Opc = X86::MOV8rr; } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) - Opc = X86::MOVAPSrr; + Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -2043,6 +2247,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool isStackAligned, const TargetMachine &TM, bool load) { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); switch (RC->getSize()) { default: llvm_unreachable("Unknown spill size"); @@ -2061,7 +2266,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, if (X86::GR32RegClass.hasSubClassEq(RC)) return load ? X86::MOV32rm : X86::MOV32mr; if (X86::FR32RegClass.hasSubClassEq(RC)) - return load ? X86::MOVSSrm : X86::MOVSSmr; + return load ? + (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : + (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); if (X86::RFP32RegClass.hasSubClassEq(RC)) return load ? X86::LD_Fp32m : X86::ST_Fp32m; llvm_unreachable("Unknown 4-byte regclass"); @@ -2069,7 +2276,9 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, if (X86::GR64RegClass.hasSubClassEq(RC)) return load ? X86::MOV64rm : X86::MOV64mr; if (X86::FR64RegClass.hasSubClassEq(RC)) - return load ? X86::MOVSDrm : X86::MOVSDmr; + return load ? + (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) : + (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); if (X86::VR64RegClass.hasSubClassEq(RC)) return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; if (X86::RFP64RegClass.hasSubClassEq(RC)) @@ -2078,13 +2287,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, case 10: assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; - case 16: + case 16: { assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) - return load ? X86::MOVAPSrm : X86::MOVAPSmr; + return load ? + (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) : + (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); else - return load ? X86::MOVUPSrm : X86::MOVUPSmr; + return load ? + (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) : + (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } case 32: assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); // If stack is realigned we can use aligned stores. @@ -2118,7 +2332,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2133,7 +2348,9 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = MMOBegin != MMOEnd && + (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2151,7 +2368,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2164,7 +2382,9 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = MMOBegin != MMOEnd && + (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2174,6 +2394,40 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } +/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr +/// instruction with two undef reads of the register being defined. This is +/// used for mapping: +/// %xmm4 = V_SET0 +/// to: +/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef> +/// +static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) { + assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); + unsigned Reg = MI->getOperand(0).getReg(); + MI->setDesc(Desc); + + // MachineInstr::addOperand() will insert explicit operands before any + // implicit operands. + MachineInstrBuilder(MI).addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + // But we don't trust that. + assert(MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getReg() == Reg && "Misplaced operand"); + return true; +} + +bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + switch (MI->getOpcode()) { + case X86::V_SET0: + return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr)); + case X86::TEST8ri_NOREX: + MI->setDesc(get(X86::TEST8ri)); + return true; + } + return false; +} + MachineInstr* X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -2305,7 +2559,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr->find(MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; - unsigned MinAlign = I->second.second; + unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; if (Align < MinAlign) return NULL; bool NarrowToMOV32rm = false; @@ -2352,6 +2606,51 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; } +/// hasPartialRegUpdate - Return true for all instructions that only update +/// the first 32 or 64-bits of the destination register and leave the rest +/// unmodified. This can be used to avoid folding loads if the instructions +/// only update part of the destination register, and the non-updated part is +/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these +/// instructions breaks the partial register dependency and it can improve +/// performance. e.g.: +/// +/// movss (%rdi), %xmm0 +/// cvtss2sd %xmm0, %xmm0 +/// +/// Instead of +/// cvtss2sd (%rdi), %xmm0 +/// +/// FIXME: This should be turned into a TSFlags. +/// +static bool hasPartialRegUpdate(unsigned Opcode) { + switch (Opcode) { + case X86::CVTSD2SSrr: + case X86::Int_CVTSD2SSrr: + case X86::CVTSS2SDrr: + case X86::Int_CVTSS2SDrr: + case X86::RCPSSr: + case X86::RCPSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: + case X86::RSQRTSSr: + case X86::RSQRTSSr_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + // AVX encoded versions + case X86::VCVTSD2SSrr: + case X86::Int_VCVTSD2SSrr: + case X86::VCVTSS2SDrr: + case X86::Int_VCVTSS2SDrr: + case X86::VRCPSSr: + case X86::VROUNDSDr: + case X86::VROUNDSSr: + case X86::VRSQRTSSr: + case X86::VSQRTSSr: + return true; + } + + return false; +} MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, @@ -2360,22 +2659,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check switch flag if (NoFusing) return NULL; - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - switch (MI->getOpcode()) { - case X86::CVTSD2SSrr: - case X86::Int_CVTSD2SSrr: - case X86::CVTSS2SDrr: - case X86::Int_CVTSS2SDrr: - case X86::RCPSSr: - case X86::RCPSSr_Int: - case X86::ROUNDSDr: - case X86::ROUNDSSr: - case X86::RSQRTSSr: - case X86::RSQRTSSr_Int: - case X86::SQRTSSr: - case X86::SQRTSSr_Int: - return 0; - } + // Unless optimizing for size, don't fold to avoid partial + // register update stalls + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + hasPartialRegUpdate(MI->getOpcode())) + return 0; const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); @@ -2412,22 +2700,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check switch flag if (NoFusing) return NULL; - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - switch (MI->getOpcode()) { - case X86::CVTSD2SSrr: - case X86::Int_CVTSD2SSrr: - case X86::CVTSS2SDrr: - case X86::Int_CVTSS2SDrr: - case X86::RCPSSr: - case X86::RCPSSr_Int: - case X86::ROUNDSDr: - case X86::ROUNDSSr: - case X86::RSQRTSSr: - case X86::RSQRTSSr_Int: - case X86::SQRTSSr: - case X86::SQRTSSr_Int: - return 0; - } + // Unless optimizing for size, don't fold to avoid partial + // register update stalls + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + hasPartialRegUpdate(MI->getOpcode())) + return 0; // Determine the alignment of the load. unsigned Alignment = 0; @@ -2439,13 +2716,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PDY: Alignment = 32; break; - case X86::V_SET0PS: - case X86::V_SET0PD: - case X86::V_SET0PI: + case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PS: - case X86::AVX_SET0PD: - case X86::AVX_SET0PI: + case X86::AVX_SETALLONES: Alignment = 16; break; case X86::FsFLD0SD: @@ -2481,18 +2754,16 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { - case X86::V_SET0PS: - case X86::V_SET0PD: - case X86::V_SET0PI: + case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PS: - case X86::AVX_SET0PD: - case X86::AVX_SET0PI: case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: + case X86::AVX_SETALLONES: case X86::FsFLD0SD: - case X86::FsFLD0SS: { - // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. + case X86::FsFLD0SS: + case X86::VFsFLD0SD: + case X86::VFsFLD0SS: { + // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. @@ -2515,7 +2786,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const Type *Ty; + Type *Ty; unsigned Opc = LoadMI->getOpcode(); if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); @@ -2525,9 +2796,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? - Constant::getAllOnesValue(Ty) : - Constant::getNullValue(Ty); + + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES); + const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : + Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); // Create operands to load from the constant pool entry. @@ -2615,9 +2887,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + unsigned Index = I->second.second & TB_INDEX_MASK; + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; if (UnfoldLoad && !FoldedLoad) return false; UnfoldLoad &= FoldedLoad; @@ -2743,9 +3015,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; - unsigned Index = I->second.second & 0xf; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + unsigned Index = I->second.second & TB_INDEX_MASK; + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); unsigned NumDefs = MCID.NumDefs; @@ -2780,7 +3052,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) // Do not introduce a slow unaligned load. return false; - bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (*MMOs.first) && + (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2822,7 +3096,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) // Do not introduce a slow unaligned store. return false; - bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; + unsigned Alignment = RC->getSize() == 32 ? 32 : 16; + bool isAligned = (*MMOs.first) && + (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2843,14 +3119,14 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, MemOp2RegOpTable.find(Opc); if (I == MemOp2RegOpTable.end()) return 0; - bool FoldedLoad = I->second.second & (1 << 4); - bool FoldedStore = I->second.second & (1 << 5); + bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; + bool FoldedStore = I->second.second & TB_FOLDED_STORE; if (UnfoldLoad && !FoldedLoad) return 0; if (UnfoldStore && !FoldedStore) return 0; if (LoadRegIndex) - *LoadRegIndex = I->second.second & 0xf; + *LoadRegIndex = I->second.second & TB_INDEX_MASK; return I->second.first; } @@ -2881,6 +3157,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + // AVX load instructions + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: @@ -2908,6 +3194,16 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + // AVX load instructions + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVDQUrm: case X86::VMOVAPSYrm: case X86::VMOVUPSYrm: case X86::VMOVAPDYrm: @@ -3007,31 +3303,6 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); } - -/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) -/// register? e.g. r8, xmm8, xmm13, etc. -bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { - switch (RegNo) { - default: break; - case X86::R8: case X86::R9: case X86::R10: case X86::R11: - case X86::R12: case X86::R13: case X86::R14: case X86::R15: - case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: - case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: - case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: - case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: - case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: - case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: - case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: - case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: - case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: - case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: - case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: - case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: - return true; - } - return false; -} - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. @@ -3072,7 +3343,6 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, - { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, // AVX 128-bit support @@ -3088,7 +3358,6 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, - { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, // AVX 256-bit support @@ -3111,13 +3380,13 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) { } std::pair<uint16_t, uint16_t> -X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { +X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; return std::make_pair(domain, domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); } -void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { +void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); @@ -3158,6 +3427,29 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::SQRTSSm_Int: case X86::SQRTSSr: case X86::SQRTSSr_Int: + // AVX instructions with high latency + case X86::VDIVSDrm: + case X86::VDIVSDrm_Int: + case X86::VDIVSDrr: + case X86::VDIVSDrr_Int: + case X86::VDIVSSrm: + case X86::VDIVSSrm_Int: + case X86::VDIVSSrr: + case X86::VDIVSSrr_Int: + case X86::VSQRTPDm: + case X86::VSQRTPDm_Int: + case X86::VSQRTPDr: + case X86::VSQRTPDr_Int: + case X86::VSQRTPSm: + case X86::VSQRTPSm_Int: + case X86::VSQRTPSr: + case X86::VSQRTPSr_Int: + case X86::VSQRTSDm: + case X86::VSQRTSDm_Int: + case X86::VSQRTSDr: + case X86::VSQRTSSm: + case X86::VSQRTSSm_Int: + case X86::VSQRTSSr: return true; } } diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index 5f2eba3..97009db 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -27,24 +27,6 @@ namespace llvm { class X86TargetMachine; namespace X86 { - // Enums for memory operand decoding. Each memory operand is represented with - // a 5 operand sequence in the form: - // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] - // These enums help decode this. - enum { - AddrBaseReg = 0, - AddrScaleAmt = 1, - AddrIndexReg = 2, - AddrDisp = 3, - - /// AddrSegmentReg - The operand # of the segment in the memory operand. - AddrSegmentReg = 4, - - /// AddrNumOperands - Total number of operands in a memory reference. - AddrNumOperands = 5 - }; - - // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -82,133 +64,8 @@ namespace X86 { /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); +} // end namespace X86; -} - -/// X86II - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace X86II { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // X86 Specific MachineOperand flags. - - MO_NO_FLAG, - - /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a - /// relocation of: - /// SYMBOL_LABEL + [. - PICBASELABEL] - MO_GOT_ABSOLUTE_ADDRESS, - - /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the - /// immediate should get the value of the symbol minus the PIC base label: - /// SYMBOL_LABEL - PICBASELABEL - MO_PIC_BASE_OFFSET, - - /// MO_GOT - On a symbol operand this indicates that the immediate is the - /// offset to the GOT entry for the symbol name from the base of the GOT. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOT - MO_GOT, - - /// MO_GOTOFF - On a symbol operand this indicates that the immediate is - /// the offset to the location of the symbol name from the base of the GOT. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOTOFF - MO_GOTOFF, - - /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is - /// offset to the GOT entry for the symbol name from the current code - /// location. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @GOTPCREL - MO_GOTPCREL, - - /// MO_PLT - On a symbol operand this indicates that the immediate is - /// offset to the PLT entry of symbol name from the current code location. - /// - /// See the X86-64 ELF ABI supplement for more details. - /// SYMBOL_LABEL @PLT - MO_PLT, - - /// MO_TLSGD - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @TLSGD - MO_TLSGD, - - /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @GOTTPOFF - MO_GOTTPOFF, - - /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @INDNTPOFF - MO_INDNTPOFF, - - /// MO_TPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @TPOFF - MO_TPOFF, - - /// MO_NTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// See 'ELF Handling for Thread-Local Storage' for more details. - /// SYMBOL_LABEL @NTPOFF - MO_NTPOFF, - - /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "__imp_FOO" symbol. This is used for - /// dllimport linkage on windows. - MO_DLLIMPORT, - - /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and earlier. - MO_DARWIN_STUB, - - /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the - /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a - /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY, - - /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates - /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is - /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. - MO_DARWIN_NONLAZY_PIC_BASE, - - /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this - /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", - /// which is a PIC-base-relative reference to a hidden dyld lazy pointer - /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, - - /// MO_TLVP - On a symbol operand this indicates that the immediate is - /// some TLS offset. - /// - /// This is the TLS offset for the Darwin TLS mechanism. - MO_TLVP, - - /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate - /// is some TLS offset from the picbase. - /// - /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. - MO_TLVP_PIC_BASE - }; -} /// isGlobalStubReference - Return true if the specified TargetFlag operand is /// a reference to a stub for a global, not the global itself. @@ -243,353 +100,6 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { } } -/// X86II - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace X86II { - enum { - //===------------------------------------------------------------------===// - // Instruction encodings. These are the standard/most common forms for X86 - // instructions. - // - - // PseudoFrm - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - Pseudo = 0, - - /// Raw - This form is for instructions that don't have any operands, so - /// they are just a fixed opcode value, like 'leave'. - RawFrm = 1, - - /// AddRegFrm - This form is used for instructions like 'push r32' that have - /// their one register operand added to their opcode. - AddRegFrm = 2, - - /// MRMDestReg - This form is used for instructions that use the Mod/RM byte - /// to specify a destination, which in this case is a register. - /// - MRMDestReg = 3, - - /// MRMDestMem - This form is used for instructions that use the Mod/RM byte - /// to specify a destination, which in this case is memory. - /// - MRMDestMem = 4, - - /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte - /// to specify a source, which in this case is a register. - /// - MRMSrcReg = 5, - - /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte - /// to specify a source, which in this case is memory. - /// - MRMSrcMem = 6, - - /// MRM[0-7][rm] - These forms are used to represent instructions that use - /// a Mod/RM byte, and use the middle field to hold extended opcode - /// information. In the intel manual these are represented as /0, /1, ... - /// - - // First, instructions that operate on a register r/m operand... - MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3 - MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7 - - // Next, instructions that operate on a memory r/m operand... - MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 - MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 - - // MRMInitReg - This form is used for instructions whose source and - // destinations are the same register. - MRMInitReg = 32, - - //// MRM_C1 - A mod/rm byte of exactly 0xC1. - MRM_C1 = 33, - MRM_C2 = 34, - MRM_C3 = 35, - MRM_C4 = 36, - MRM_C8 = 37, - MRM_C9 = 38, - MRM_E8 = 39, - MRM_F0 = 40, - MRM_F8 = 41, - MRM_F9 = 42, - MRM_D0 = 45, - MRM_D1 = 46, - - /// RawFrmImm8 - This is used for the ENTER instruction, which has two - /// immediates, the first of which is a 16-bit immediate (specified by - /// the imm encoding) and the second is a 8-bit fixed value. - RawFrmImm8 = 43, - - /// RawFrmImm16 - This is used for CALL FAR instructions, which have two - /// immediates, the first of which is a 16 or 32-bit immediate (specified by - /// the imm encoding) and the second is a 16-bit fixed value. In the AMD - /// manual, this operand is described as pntr16:32 and pntr16:16 - RawFrmImm16 = 44, - - FormMask = 63, - - //===------------------------------------------------------------------===// - // Actual flags... - - // OpSize - Set if this instruction requires an operand size prefix (0x66), - // which most often indicates that the instruction operates on 16 bit data - // instead of 32 bit data. - OpSize = 1 << 6, - - // AsSize - Set if this instruction requires an operand size prefix (0x67), - // which most often indicates that the instruction address 16 bit address - // instead of 32 bit address (or 32 bit address in 64 bit mode). - AdSize = 1 << 7, - - //===------------------------------------------------------------------===// - // Op0Mask - There are several prefix bytes that are used to form two byte - // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is - // used to obtain the setting of this field. If no bits in this field is - // set, there is no prefix byte for obtaining a multibyte opcode. - // - Op0Shift = 8, - Op0Mask = 0x1F << Op0Shift, - - // TB - TwoByte - Set if this instruction has a two byte opcode, which - // starts with a 0x0F byte before the real opcode. - TB = 1 << Op0Shift, - - // REP - The 0xF3 prefix byte indicating repetition of the following - // instruction. - REP = 2 << Op0Shift, - - // D8-DF - These escape opcodes are used by the floating point unit. These - // values must remain sequential. - D8 = 3 << Op0Shift, D9 = 4 << Op0Shift, - DA = 5 << Op0Shift, DB = 6 << Op0Shift, - DC = 7 << Op0Shift, DD = 8 << Op0Shift, - DE = 9 << Op0Shift, DF = 10 << Op0Shift, - - // XS, XD - These prefix codes are for single and double precision scalar - // floating point operations performed in the SSE registers. - XD = 11 << Op0Shift, XS = 12 << Op0Shift, - - // T8, TA, A6, A7 - Prefix after the 0x0F prefix. - T8 = 13 << Op0Shift, TA = 14 << Op0Shift, - A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, - - // TF - Prefix before and after 0x0F - TF = 17 << Op0Shift, - - //===------------------------------------------------------------------===// - // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. - // They are used to specify GPRs and SSE registers, 64-bit operand size, - // etc. We only cares about REX.W and REX.R bits and only the former is - // statically determined. - // - REXShift = Op0Shift + 5, - REX_W = 1 << REXShift, - - //===------------------------------------------------------------------===// - // This three-bit field describes the size of an immediate operand. Zero is - // unused so that we can tell if we forgot to set a value. - ImmShift = REXShift + 1, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm8PCRel = 2 << ImmShift, - Imm16 = 3 << ImmShift, - Imm16PCRel = 4 << ImmShift, - Imm32 = 5 << ImmShift, - Imm32PCRel = 6 << ImmShift, - Imm64 = 7 << ImmShift, - - //===------------------------------------------------------------------===// - // FP Instruction Classification... Zero is non-fp instruction. - - // FPTypeMask - Mask for all of the FP types... - FPTypeShift = ImmShift + 3, - FPTypeMask = 7 << FPTypeShift, - - // NotFP - The default, set for instructions that do not use FP registers. - NotFP = 0 << FPTypeShift, - - // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0 - ZeroArgFP = 1 << FPTypeShift, - - // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst - OneArgFP = 2 << FPTypeShift, - - // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a - // result back to ST(0). For example, fcos, fsqrt, etc. - // - OneArgFPRW = 3 << FPTypeShift, - - // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an - // explicit argument, storing the result to either ST(0) or the implicit - // argument. For example: fadd, fsub, fmul, etc... - TwoArgFP = 4 << FPTypeShift, - - // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an - // explicit argument, but have no destination. Example: fucom, fucomi, ... - CompareFP = 5 << FPTypeShift, - - // CondMovFP - "2 operand" floating point conditional move instructions. - CondMovFP = 6 << FPTypeShift, - - // SpecialFP - Special instruction forms. Dispatch by opcode explicitly. - SpecialFP = 7 << FPTypeShift, - - // Lock prefix - LOCKShift = FPTypeShift + 3, - LOCK = 1 << LOCKShift, - - // Segment override prefixes. Currently we just need ability to address - // stuff in gs and fs segments. - SegOvrShift = LOCKShift + 1, - SegOvrMask = 3 << SegOvrShift, - FS = 1 << SegOvrShift, - GS = 2 << SegOvrShift, - - // Execution domain for SSE instructions in bits 23, 24. - // 0 in bits 23-24 means normal, non-SSE instruction. - SSEDomainShift = SegOvrShift + 2, - - OpcodeShift = SSEDomainShift + 2, - - //===------------------------------------------------------------------===// - /// VEX - The opcode prefix used by AVX instructions - VEXShift = OpcodeShift + 8, - VEX = 1U << 0, - - /// VEX_W - Has a opcode specific functionality, but is used in the same - /// way as REX_W is for regular SSE instructions. - VEX_W = 1U << 1, - - /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 - /// address instructions in SSE are represented as 3 address ones in AVX - /// and the additional register is encoded in VEX_VVVV prefix. - VEX_4V = 1U << 2, - - /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, - /// must be encoded in the i8 immediate field. This usually happens in - /// instructions with 4 operands. - VEX_I8IMM = 1U << 3, - - /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current - /// instruction uses 256-bit wide registers. This is usually auto detected - /// if a VR256 register is used, but some AVX instructions also have this - /// field marked when using a f256 memory references. - VEX_L = 1U << 4, - - /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the - /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents - /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction - /// storing a classifier in the imm8 field. To simplify our implementation, - /// we handle this by storeing the classifier in the opcode field and using - /// this flag to indicate that the encoder should do the wacky 3DNow! thing. - Has3DNow0F0FOpcode = 1U << 5 - }; - - // getBaseOpcodeFor - This function returns the "base" X86 opcode for the - // specified machine instruction. - // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { - return TSFlags >> X86II::OpcodeShift; - } - - static inline bool hasImm(uint64_t TSFlags) { - return (TSFlags & X86II::ImmMask) != 0; - } - - /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field - /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { - switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8: - case X86II::Imm8PCRel: return 1; - case X86II::Imm16: - case X86II::Imm16PCRel: return 2; - case X86II::Imm32: - case X86II::Imm32PCRel: return 4; - case X86II::Imm64: return 8; - } - } - - /// isImmPCRel - Return true if the immediate of the specified instruction's - /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { - switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8PCRel: - case X86II::Imm16PCRel: - case X86II::Imm32PCRel: - return true; - case X86II::Imm8: - case X86II::Imm16: - case X86II::Imm32: - case X86II::Imm64: - return false; - } - } - - /// getMemoryOperandNo - The function returns the MCInst operand # for the - /// first field of the memory operand. If the instruction doesn't have a - /// memory operand, this returns -1. - /// - /// Note that this ignores tied operands. If there is a tied register which - /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only - /// counted as one operand. - /// - static inline int getMemoryOperandNo(uint64_t TSFlags) { - switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); - default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); - case X86II::Pseudo: - case X86II::RawFrm: - case X86II::AddRegFrm: - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - case X86II::RawFrmImm8: - case X86II::RawFrmImm16: - return -1; - case X86II::MRMDestMem: - return 0; - case X86II::MRMSrcMem: { - bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; - unsigned FirstMemOp = 1; - if (HasVEX_4V) - ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). - - // FIXME: Maybe lea should have its own form? This is a horrible hack. - //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - // Opcode == X86::LEA16r || Opcode == X86::LEA32r) - return FirstMemOp; - } - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - return -1; - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - return 0; - case X86II::MRM_C1: - case X86II::MRM_C2: - case X86II::MRM_C3: - case X86II::MRM_C4: - case X86II::MRM_C8: - case X86II::MRM_C9: - case X86II::MRM_E8: - case X86II::MRM_F0: - case X86II::MRM_F8: - case X86II::MRM_F9: - case X86II::MRM_D0: - case X86II::MRM_D1: - return -1; - } - } -} - inline static bool isScale(const MachineOperand &MO) { return MO.isImm() && (MO.getImm() == 1 || MO.getImm() == 2 || @@ -621,14 +131,22 @@ class X86InstrInfo : public X86GenInstrInfo { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; - DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; + typedef DenseMap<unsigned, + std::pair<unsigned, unsigned> > RegOp2MemOpTableType; + RegOp2MemOpTableType RegOp2MemOpTable2Addr; + RegOp2MemOpTableType RegOp2MemOpTable0; + RegOp2MemOpTableType RegOp2MemOpTable1; + RegOp2MemOpTableType RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// - DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable; + typedef DenseMap<unsigned, + std::pair<unsigned, unsigned> > MemOp2RegOpTableType; + MemOp2RegOpTableType MemOp2RegOpTable; + + void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -656,17 +174,6 @@ public: unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const; - /// hasLoadFromStackSlot - If the specified machine instruction has - /// a load from a stack slot, return true along with the FrameIndex - /// of the loaded stack slot and the machine mem operand containing - /// the reference. If not, return false. Unlike - /// isLoadFromStackSlot, this returns true for any instructions that - /// loads from the stack. This is a hint only and may not catch all - /// cases. - bool hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination /// stack locations as well. This uses a heuristic so it isn't @@ -674,16 +181,6 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const; - /// hasStoreToStackSlot - If the specified machine instruction has a - /// store to a stack slot, return true along with the FrameIndex of - /// the loaded stack slot and the machine mem operand containing the - /// reference. If not, return false. Unlike isStoreToStackSlot, - /// this returns true for any instructions that loads from the - /// stack. This is a hint only and may not catch all cases. - bool hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const; - bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -750,6 +247,9 @@ public: MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -829,32 +329,21 @@ public: /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - static bool isX86_64NonExtLowByteReg(unsigned reg) { - return (reg == X86::SPL || reg == X86::BPL || - reg == X86::SIL || reg == X86::DIL); - } - static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; - return isX86_64ExtendedReg(MO.getReg()); + return X86II::isX86_64ExtendedReg(MO.getReg()); } - /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or - /// higher) register? e.g. r8, xmm8, xmm13, etc. - static bool isX86_64ExtendedReg(unsigned RegNo); - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// unsigned getGlobalBaseReg(MachineFunction *MF) const; - /// GetSSEDomain - Return the SSE execution domain of MI as the first element, - /// and a bitmask of possible arguments to SetSSEDomain ase the second. - std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const; + std::pair<uint16_t, uint16_t> + getExecutionDomain(const MachineInstr *MI) const; - /// SetSSEDomain - Set the SSEDomain of MI. - void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 7eb07b0..d54bf27 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -65,7 +65,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2, def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; -def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; @@ -97,6 +97,8 @@ def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; + def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -133,9 +135,13 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, +def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; +def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -218,12 +224,16 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; +def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, + [SDNPHasChain]>; + def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -331,6 +341,11 @@ class ImmSExtAsmOperandClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +class ImmZExtAsmOperandClass : AsmOperandClass { + let SuperClasses = [ImmAsmOperand]; + let RenderMethod = "addImmOperands"; +} + // Sign-extended immediate classes. We don't need to define the full lattice // here because there is no instruction with an ambiguity between ImmSExti64i32 // and ImmSExti32i8. @@ -358,6 +373,12 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } +// [0, 0x000000FF] +def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass { + let Name = "ImmZExtu32u8"; +} + + // [0, 0x0000007F] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { @@ -377,6 +398,11 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } +// 32-bits but only 8 bits are significant, and those 8 bits are unsigned. +def u32u8imm : Operand<i32> { + let ParserMatchClass = ImmZExtu32u8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { @@ -389,11 +415,13 @@ def i64i32imm : Operand<i64> { def i64i32imm_pcrel : Operand<i64> { let PrintMethod = "print_pcrel_imm"; let ParserMatchClass = X86AbsMemAsmOperand; + let OperandType = "OPERAND_PCREL"; } // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64> { let ParserMatchClass = ImmSExti64i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } def lea64_32mem : Operand<i32> { @@ -442,18 +470,33 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; +def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; +def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; +def HasF16C : Predicate<"Subtarget->hasF16C()">; +def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; +def HasBMI : Predicate<"Subtarget->hasBMI()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">, + AssemblerPredicate<"ModeNaCl">; +def IsNaCl32 : Predicate<"Subtarget->isTargetNaCl32()">, + AssemblerPredicate<"ModeNaCl,!Mode64Bit">; +def IsNaCl64 : Predicate<"Subtarget->isTargetNaCl64()">, + AssemblerPredicate<"ModeNaCl,Mode64Bit">; +def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">, + AssemblerPredicate<"!ModeNaCl">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" @@ -766,30 +809,30 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { -def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; -def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; -def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>; +def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize; +def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>; def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in -def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; +def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>; let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in -def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; +def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in -def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; +def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>; let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; -def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; -def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; -def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; +def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>; +def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize; +def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>; def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; -def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; -def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; -def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; +def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>; +def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize; +def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>; def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; @@ -841,22 +884,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{b}\t{$src, %al|%al, $src}", []>, + "mov{b}\t{$src, %al|AL, $src}", []>, Requires<[In32BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), - "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize, + "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize, Requires<[In32BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), - "mov{l}\t{$src, %eax|%eax, $src}", []>, + "mov{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>; def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{b}\t{%al, $dst|$dst, %al}", []>, + "mov{b}\t{%al, $dst|$dst, AL}", []>, Requires<[In32BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), - "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize, + "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize, Requires<[In32BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), - "mov{l}\t{%eax, $dst|$dst, %eax}", []>, + "mov{l}\t{%eax, $dst|$dst, EAX}", []>, Requires<[In32BitMode]>; // FIXME: These definitions are utterly broken @@ -865,13 +908,13 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), // in question. /* def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; + "mov{q}\t{$src, %rax|RAX, $src}", []>; def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; + "mov{q}\t{$src, %rax|RAX, $src}", []>; def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + "mov{q}\t{%rax, $dst|$dst, RAX}", []>; def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + "mov{q}\t{%rax, $dst|$dst, RAX}", []>; */ @@ -926,7 +969,7 @@ let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; -let mayLoad = 1, +let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), @@ -1117,11 +1160,15 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), } def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), - "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), - "xchg{l}\t{$src, %eax|%eax, $src}", []>; + "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>; +// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. +// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. +def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), + "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>; def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), - "xchg{q}\t{$src, %rax|%rax, $src}", []>; + "xchg{q}\t{$src, %rax|RAX, $src}", []>; @@ -1172,7 +1219,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), - "cmpxchg16b\t$dst", []>, TB; + "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>; @@ -1261,6 +1308,104 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; //===----------------------------------------------------------------------===// +// MOVBE Instructions +// +let Predicates = [HasMOVBE] in { + def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "movbe{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8; + def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "movbe{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8; + def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "movbe{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8; + def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "movbe{w}\t{$src, $dst|$dst, $src}", + [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8; + def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movbe{l}\t{$src, $dst|$dst, $src}", + [(store (bswap GR32:$src), addr:$dst)]>, T8; + def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movbe{q}\t{$src, $dst|$dst, $src}", + [(store (bswap GR64:$src), addr:$dst)]>, T8; +} + +//===----------------------------------------------------------------------===// +// RDRAND Instruction +// +let Predicates = [HasRDRAND], Defs = [EFLAGS] in { + def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), + "rdrand{w}\t$dst", []>, OpSize, TB; + def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), + "rdrand{l}\t$dst", []>, TB; + def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), + "rdrand{q}\t$dst", []>, TB; +} + +//===----------------------------------------------------------------------===// +// LZCNT Instruction +// +let Predicates = [HasLZCNT], Defs = [EFLAGS] in { + def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS, + OpSize; + def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctlz (loadi16 addr:$src))), + (implicit EFLAGS)]>, XS, OpSize; + + def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS; + def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "lzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctlz (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "lzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>, + XS; + def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctlz (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} + +//===----------------------------------------------------------------------===// +// TZCNT Instruction +// +let Predicates = [HasBMI], Defs = [EFLAGS] in { + def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS, + OpSize; + def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "tzcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (cttz (loadi16 addr:$src))), + (implicit EFLAGS)]>, XS, OpSize; + + def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS; + def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "tzcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (cttz (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, + XS; + def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "tzcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (cttz (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} + +//===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// @@ -1646,3 +1791,9 @@ def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>; def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>; def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>; def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>; + +// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms. +def : InstAlias<"xchgw %ax, $src", (XCHG16ar GR16:$src)>; +def : InstAlias<"xchgl %eax, $src", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>; +def : InstAlias<"xchgl %eax, $src", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; +def : InstAlias<"xchgq %rax, $src", (XCHG64ar GR64:$src)>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index fe11d77..d3ced23 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -116,7 +116,217 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, } //===----------------------------------------------------------------------===// -// SSE 1 & 2 - Move Instructions +// Non-instruction patterns +//===----------------------------------------------------------------------===// + +// A vector extract of the first f32/f64 position is a subregister copy +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + +// A 128-bit subvector extract from the first 256-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), + (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; +def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), + (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; + +def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), + (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; +def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), + (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; + +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), + (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), + (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; + +// A 128-bit subvector insert to the first 256-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +def : Pat<(v8f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; +def : Pat<(v4f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + +// Bitcasts between 128-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasXMMInt] in { + def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; +} + +// Bitcasts between 256-bit vector types. Return the original type since +// no instruction is needed for the conversion +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; + def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; +} + +// Alias instructions that map fld0 to pxor for sse. +// FIXME: Set encoding to pseudo! +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in { + def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; + def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; + def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; + def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; +} + +//===----------------------------------------------------------------------===// +// AVX & SSE - Zero/One Vectors +//===----------------------------------------------------------------------===// + +// Alias instruction that maps zero vector to pxor / xorp* for sse. +// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then +// swizzled by ExecutionDepsFix to pxor. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, neverHasSideEffects = 1 in { +def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; +} + +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; + + +// The same as done above but for AVX. The 256-bit ISA does not support PI, +// and doesn't need it because on sandy bridge the register is set to zero +// at the rename stage without using any execution unit, so SET0PSY +// and SET0PDY can be used for vector int instructions without penalty +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, Predicates = [HasAVX] in { +def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; +} + + +// AVX has no support for 256-bit integer instructions, but since the 128-bit +// VPXOR instruction writes zero to its upper part, it's safe build zeros. +def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; + +def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; + +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementation, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in + def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move FP Scalar Instructions +// +// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 +// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr +// is used instead. Register-to-register movss/movsd is not modeled as an +// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable +// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : @@ -130,28 +340,57 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))]>; -// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 -// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr -// is used instead. Register-to-register movss/movsd is not modeled as an -// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable -// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. +// AVX def VMOVSSrr : sse12_move_rr<FR32, v4f32, - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, + VEX_LIG; def VMOVSDrr : sse12_move_rr<FR64, v2f64, - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, + VEX_LIG; -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; +// For the disassembler +let isCodeGenOnly = 1 in { + def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + XS, VEX_4V, VEX_LIG; + def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + XD, VEX_4V, VEX_LIG; +} +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX, + VEX_LIG; let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; + def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX, + VEX_LIG; } +def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG; +def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG; + +// SSE1 & 2 let Constraints = "$src1 = $dst" in { def MOVSSrr : sse12_move_rr<FR32, v4f32, "movss\t{$src2, $dst|$dst, $src2}">, XS; def MOVSDrr : sse12_move_rr<FR64, v2f64, "movsd\t{$src2, $dst|$dst, $src2}">, XD; + + // For the disassembler + let isCodeGenOnly = 1 in { + def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", []>, XS; + def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", []>, XD; + } } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -161,54 +400,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; } -let AddedComplexity = 15 in { -// Extract the low 32-bit value from one vector and insert it into another. -def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -// Extract the low 64-bit value from one vector and insert it into another. -def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; -} - -// Implicitly promote a 32-bit scalar to a vector. -def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; -// Implicitly promote a 32-bit scalar to a vector. -def : Pat<(v8f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v4f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; - -let AddedComplexity = 20 in { -// MOVSSrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; -// MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -} - -// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; @@ -216,24 +407,257 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX; +// Patterns +let Predicates = [HasSSE1] in { + let AddedComplexity = 15 in { + // Extract the low 32-bit value from one vector and insert it into another. + def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSS to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + } -// Extract and store. -def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; -def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + } + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + + // Shuffle with MOVSS + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, FR32:$src2)>; + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 15 in { + // Extract the low 64-bit value from one vector and insert it into another. + def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + + // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd + def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSD to the lower bits. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + } + + // Extract and store. + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with MOVSD + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, FR64:$src2)>; + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; +} + +let Predicates = [HasAVX] in { + let AddedComplexity = 15 in { + // Extract the low 32-bit value from one vector and insert it into another. + def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + + // Extract the low 64-bit value from one vector and insert it into another. + def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + + // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd + def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; + + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVS{S,D} to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (VMOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + + // Represent the same patterns above but in the form they appear for + // 256-bit types + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; + } + def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (SUBREG_TO_REG (i32 0), + (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), + sub_xmm)>; + def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (SUBREG_TO_REG (i64 0), + (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), + sub_xmm)>; + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (VMOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (VMOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with VMOVSS + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (VMOVSSrr VR128:$src1, FR32:$src2)>; + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (VMOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + + // Shuffle with VMOVSD + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (VMOVSDrr VR128:$src1, FR64:$src2)>; + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), + sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), + sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), + sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), + sub_sd))>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions +//===----------------------------------------------------------------------===// -// Move Aligned/Unaligned floating point values multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, @@ -248,22 +672,22 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, VEX; + "movaps", SSEPackedSingle>, TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, OpSize, VEX; + "movapd", SSEPackedDouble>, TB, OpSize, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, VEX; + "movups", SSEPackedSingle>, TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, OpSize, VEX; + "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, - "movaps", SSEPackedSingle>, VEX; + "movaps", SSEPackedSingle>, TB, VEX; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, - "movapd", SSEPackedDouble>, OpSize, VEX; + "movapd", SSEPackedDouble>, TB, OpSize, VEX; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, - "movups", SSEPackedSingle>, VEX; + "movups", SSEPackedSingle>, TB, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, - "movupd", SSEPackedDouble, 0>, OpSize, VEX; + "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -287,10 +711,10 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; @@ -298,6 +722,34 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; +// For disassembler +let isCodeGenOnly = 1 in { + def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movups\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movups\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), + (ins VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", []>, VEX; +} + def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; @@ -319,24 +771,155 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)]>; -// Intrinsic forms of MOVUPS/D load and store -def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; -def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; - -def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; -def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; - -// Move Low/High packed floating point values +// For disassembler +let isCodeGenOnly = 1 in { + def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; + def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; + def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movups\t{$src, $dst|$dst, $src}", []>; + def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", []>; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), + (VMOVUPDmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE1] in + def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), + (MOVUPSmr addr:$dst, VR128:$src)>; +let Predicates = [HasSSE2] in + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), + (MOVUPDmr addr:$dst, VR128:$src)>; + +// Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + +// Use vmovaps/vmovups for AVX integer load/store. +let Predicates = [HasAVX] in { + // 128-bit load/store + def : Pat<(alignedloadv4i32 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (VMOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedloadv4i64 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv4i64 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedloadv8i32 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv8i32 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v4i64 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v8i32 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v16i16 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v32i8 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; +} + +// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper +// bits are disregarded. FIXME: Set encoding to pseudo! +let neverHasSideEffects = 1 in { +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>, VEX; +def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper +// bits are disregarded. FIXME: Set encoding to pseudo! +let canFoldAsLoad = 1, isReMaterializable = 1 in { +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +let isCodeGenOnly = 1 in { + def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX; + def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; +} +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Low packed FP Instructions +//===----------------------------------------------------------------------===// + multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, PatFrag mov_frag, string base_opc, string asm_opr> { @@ -359,14 +942,10 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, let AddedComplexity = 20 in { defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $dst|$dst, $src2}">; - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $dst|$dst, $src2}">; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -386,6 +965,147 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)]>; +let Predicates = [HasAVX] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS + def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + // vector_shuffle v1, (load v2) <2, 1> using MOVLPS + def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS + def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), + VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + + // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS + def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + + // Shuffle with VMOVLPS + def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPSrm VR128:$src1, addr:$src2)>; + + // Shuffle with VMOVLPD + def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), + (VMOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (VMOVLPDmr addr:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE1] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS + def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS + def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), + VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + + // Shuffle with MOVLPS + def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), + addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 20 in { + // vector_shuffle v1, (load v2) <2, 1> using MOVLPS + def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + } + + // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS + def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + + // Shuffle with MOVLPD + def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), + addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Hi packed FP Instructions +//===----------------------------------------------------------------------===// + +let AddedComplexity = 20 in { + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $dst|$dst, $src2}">; +} + // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -411,6 +1131,80 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>; +let Predicates = [HasAVX] in { + // VMOVHPS patterns + def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // is during lowering, where it's not possible to recognize the load fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; + + // FIXME: This should be matched by a X86Movhpd instead. Same as above + def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (VMOVHPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (VMOVHPDmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE1] in { + // MOVHPS patterns + def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (MOVHPSmr addr:$dst, VR128:$src)>; +} + +let Predicates = [HasSSE2] in { + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // is during lowering, where it's not possible to recognize the load fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + + // FIXME: This should be matched by a X86Movhpd instead. Same as above + def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + + // Store patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPDmr addr:$dst, VR128:$src)>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions +//===----------------------------------------------------------------------===// + let AddedComplexity = 20 in { def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -438,13 +1232,80 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; } -def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; -let AddedComplexity = 20 in { - def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; - def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; +let Predicates = [HasAVX] in { + // MOVLHPS patterns + let AddedComplexity = 20 in { + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + + // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS + def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + } + def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + + // MOVHLPS patterns + let AddedComplexity = 20 in { + // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS + def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; + + // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS + def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), + (VMOVHLPSrr VR128:$src1, VR128:$src1)>; + def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), + (VMOVHLPSrr VR128:$src1, VR128:$src1)>; + } + + def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (VMOVHLPSrr VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE1] in { + // MOVLHPS patterns + let AddedComplexity = 20 in { + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + + // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS + def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + } + def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + + // MOVHLPS patterns + let AddedComplexity = 20 in { + // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS + def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + + // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS + def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; + def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), + (MOVHLPSrr VR128:$src1, VR128:$src1)>; + } + + def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; } //===----------------------------------------------------------------------===// @@ -462,10 +1323,9 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - []>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - []>; + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>; + let mayLoad = 1 in + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -481,36 +1341,39 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W; + VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, + VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX, VEX_W; + VEX, VEX_W, VEX_LIG; // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, - VEX_4V, VEX_W; + VEX_4V, VEX_W, VEX_LIG; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, - VEX_4V; + VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, - VEX_4V, VEX_W; + VEX_4V, VEX_W, VEX_LIG; let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), @@ -579,11 +1442,6 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS, VEX; -defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, - XS, VEX, VEX_W; defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, f128mem, load, "cvtsd2si">, XD, VEX; defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, @@ -594,14 +1452,12 @@ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, // Get rid of this hack or rename the intrinsics, there are several // intructions that only match with the intrinsic form, why create duplicates // to let them be recognized by the assembler? -defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; -defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS; -defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - f32mem, load, "cvtss2si{q}">, XS, REX_W; + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W, + VEX_LIG; + defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, f128mem, load, "cvtsd2si{l}">, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, @@ -660,10 +1516,11 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, let Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, + VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W; + VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; @@ -671,6 +1528,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; } + let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; @@ -681,19 +1539,43 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } +let Predicates = [HasSSE1] in { + def : Pat<(int_x86_sse_cvtss2si VR128:$src), + (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), + (CVTSS2SIrm addr:$src)>; + def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), + (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), + (CVTSS2SI64rm addr:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_cvtss2si VR128:$src), + (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), + (VCVTSS2SIrm addr:$src)>; + def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), + (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), + (VCVTSS2SI64rm addr:$src)>; +} + /// SSE 2 Only // Convert scalar double to scalar single def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; + VEX_4V, VEX_LIG; +let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; + []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, - Requires<[HasAVX]>; + Requires<[HasAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", @@ -715,13 +1597,25 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, Requires<[HasAVX]>, VEX_4V; + []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; +let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; -def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, - Requires<[HasAVX]>; + []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + +let Predicates = [HasAVX] in { + def : Pat<(f64 (fextend FR32:$src)), + (VCVTSS2SDrr FR32:$src, FR32:$src)>; + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; +} + +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -732,6 +1626,16 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; +// extload f32 -> f64. This matches load+fextend because we have a hack in +// the isel (PreprocessForFPConvert) that can introduce loads after dag +// combine. +// Since these loads aren't folded into the fextend, we have to match it +// explicitly here. +def : Pat<(fextend (loadf32 addr:$src)), + (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; +def : Pat<(extloadf32 addr:$src), + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -759,10 +1663,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, - Requires<[HasSSE2, OptForSpeed]>; - // Convert doubleword to packed single/double fp // SSE2 instructions without OpSize prefix def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -862,10 +1762,12 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // SSE2 packed instructions with XS prefix def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +let mayLoad = 1 in def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +let mayLoad = 1 in def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -877,7 +1779,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; - def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -889,16 +1790,33 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; -def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, - VEX; -def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>, VEX; +let Predicates = [HasSSE2] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_CVTDQ2PSrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_VCVTDQ2PSrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (VCVTTPS2DQrr VR128:$src)>; + def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), + (VCVTTPS2DQYrr VR256:$src)>; +} + +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX; +let isCodeGenOnly = 1 in +def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>, VEX; def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -910,8 +1828,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; @@ -931,13 +1847,13 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; @@ -947,12 +1863,12 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, - VEX, Requires<[HasAVX]>; + TB, VEX, Requires<[HasAVX]>; def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, - VEX, Requires<[HasAVX]>; + TB, VEX, Requires<[HasAVX]>; def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1038,75 +1954,61 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), (VCVTTPS2DQYrm addr:$src)>; +// Match fround and fextend for 128/256-bit conversions +def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; +def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + +def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; +def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, + SDNode OpNode, ValueType VT, PatFrag ld_frag, string asm, string asm_alt> { - let isAsmParserOnly = 1 in { - def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), - asm, []>; - let mayLoad = 1 in - def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), - asm, []>; - } + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>; + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + [(set RC:$dst, (OpNode (VT RC:$src1), + (ld_frag addr:$src2), imm:$cc))]>; // Accept explicit immediate argument form instead of comparison code. - def rr_alt : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, []>; - let mayLoad = 1 in - def rm_alt : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), - asm_alt, []>; + let neverHasSideEffects = 1 in { + def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>; + } } -let neverHasSideEffects = 1 in { - defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, - "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, - XS, VEX_4V; - defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, - "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, - XD, VEX_4V; -} +defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, + "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + XS, VEX_4V, VEX_LIG; +defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, + "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { -def CMPSSrr : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc), + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS; -def CMPSSrm : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc), - "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS; -def CMPSDrr : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc), - "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD; -def CMPSDrm : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc), + "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + XS; + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD; -} -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { -def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; -def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; + "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + XD; } multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, @@ -1151,25 +2053,28 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, VEX; + "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, OpSize, VEX; + "ucomisd", SSEPackedDouble>, TB, OpSize, VEX, + VEX_LIG; let Pattern = []<dag> in { defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, VEX; + "comiss", SSEPackedSingle>, TB, VEX, + VEX_LIG; defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, OpSize, VEX; + "comisd", SSEPackedDouble>, TB, OpSize, VEX, + VEX_LIG; } defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, VEX; + load, "ucomiss", SSEPackedSingle>, TB, VEX; defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX; defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss", SSEPackedSingle>, VEX; + load, "comiss", SSEPackedSingle>, TB, VEX; defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd", SSEPackedDouble>, OpSize, VEX; + load, "comisd", SSEPackedDouble>, TB, OpSize, VEX; defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, @@ -1199,57 +2104,82 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, Domain d> { let isAsmParserOnly = 1 in { def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>; def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>; } // Accept explicit immediate argument form instead of comparison code. def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc), asm_alt, [], d>; } defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; + "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedSingle>, TB, VEX_4V; defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; + "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedSingle>, TB, VEX_4V; defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}", + "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", + "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedSingle>, TB; defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", + "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", + "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedDouble>, TB, OpSize; } +let Predicates = [HasSSE1] in { def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +} + +let Predicates = [HasSSE2] in { def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions @@ -1293,6 +2223,132 @@ let Constraints = "$src1 = $dst" in { memopv2f64, SSEPackedDouble>, TB, OpSize; } +let Predicates = [HasSSE1] in { + def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but + // fall back to this for SSE1) + def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPSrri case. + def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; +} + +let Predicates = [HasSSE2] in { + // Special binary v4i32 shuffle cases with SHUFPS. + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPDrri cases. + def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v2i64 shuffle cases using SHUFPDrri. + def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Generic SHUFPD patterns + def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but + // fall back to this for SSE1) + def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (VSHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPSrri case. + def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPSrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v4i32 shuffle cases with SHUFPS. + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), + (VSHUFPSrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v4i32 (shufp:$src3 VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (VSHUFPSrmi VR128:$src1, addr:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special unary SHUFPDrri cases. + def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), + (VSHUFPDrri VR128:$src1, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + // Special binary v2i64 shuffle cases using SHUFPDrri. + def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), + (VSHUFPDrri VR128:$src1, VR128:$src2, + (SHUFFLE_get_shuf_imm VR128:$src3))>; + + def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + + // 256-bit patterns + def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v8i32 (X86Shufps VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v8f32 (X86Shufps VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v4i64 (X86Shufpd VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; + + def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; + def : Pat<(v4f64 (X86Shufpd VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Unpack Instructions //===----------------------------------------------------------------------===// @@ -1316,29 +2372,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, let AddedComplexity = 10 in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; + SSEPackedSingle>, TB, VEX_4V; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, @@ -1356,6 +2412,103 @@ let AddedComplexity = 10 in { } // Constraints = "$src1 = $dst" } // AddedComplexity +let Predicates = [HasSSE1] in { + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (UNPCKLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (UNPCKHPSrr VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (UNPCKLPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (UNPCKHPDrr VR128:$src1, VR128:$src2)>; + + // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // problem is during lowering, where it's not possible to recognize the load + // fold cause it has two uses through a bitcast. One use disappears at isel + // time and the fold opportunity reappears. + def : Pat<(v2f64 (X86Movddup VR128:$src)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; + + let AddedComplexity = 10 in + def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKHPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; + + // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // problem is during lowering, where it's not possible to recognize the load + // fold cause it has two uses through a bitcast. One use disappears at isel + // time and the fold opportunity reappears. + def : Pat<(v2f64 (X86Movddup VR128:$src)), + (VUNPCKLPDrr VR128:$src, VR128:$src)>; + let AddedComplexity = 10 in + def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), + (VUNPCKLPDrr VR128:$src, VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Extract Floating-Point Sign mask //===----------------------------------------------------------------------===// @@ -1370,91 +2523,60 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; } -// Mask creation -defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, - "movmskps", SSEPackedSingle>, VEX; -defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; -defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, - "movmskps", SSEPackedSingle>, VEX; -defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", SSEPackedSingle>, TB; defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; -// X86fgetsign -def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize; -def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize; -def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB; -def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB; - -// Assembler Only -def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; -def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; -def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; -def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; - -//===----------------------------------------------------------------------===// -// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions -//===----------------------------------------------------------------------===// - -// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have -// names that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in { - // FIXME: Set encoding to pseudo! -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; -def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; -def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; -} - -// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper -// bits are disregarded. -let neverHasSideEffects = 1 in { -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; -} +def : Pat<(i32 (X86fgetsign FR32:$src)), + (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i64 (X86fgetsign FR32:$src)), + (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i32 (X86fgetsign FR64:$src)), + (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; +def : Pat<(i64 (X86fgetsign FR64:$src)), + (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; -// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper -// bits are disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in { -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +let Predicates = [HasAVX] in { + defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, + "movmskps", SSEPackedSingle>, TB, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, + "movmskpd", SSEPackedDouble>, TB, + OpSize, VEX; + defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, + "movmskps", SSEPackedSingle>, TB, VEX; + defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, + "movmskpd", SSEPackedDouble>, TB, + OpSize, VEX; + + def : Pat<(i32 (X86fgetsign FR32:$src)), + (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>; + def : Pat<(i64 (X86fgetsign FR32:$src)), + (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>; + def : Pat<(i32 (X86fgetsign FR64:$src)), + (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>; + def : Pat<(i64 (X86fgetsign FR64:$src)), + (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>; + + // Assembler Only + def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + OpSize, VEX; + def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + OpSize, VEX; } //===----------------------------------------------------------------------===// @@ -1466,10 +2588,10 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, - FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, - FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, @@ -1494,21 +2616,22 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { - let Pattern = []<dag> in { - defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f128mem, - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, - OpSize, VEX_4V; - } + // In AVX no need to add a pattern for 128-bit logical rr ps, because they + // are all promoted to v2i64, and the patterns are covered by the int + // version. This is needed in SSE only, because v2i64 isn't supported on + // SSE1, but only on SSE2. + defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, [], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, @@ -1533,7 +2656,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "ps"), f256mem, [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))], 0>, VEX_4V; + (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, @@ -1541,7 +2664,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, (bc_v4i64 (v4f64 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), (memopv4i64 addr:$src2)))], 0>, - OpSize, VEX_4V; + TB, OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms @@ -1632,32 +2755,32 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { // Binary Arithmetic instructions defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", 0>, - basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG; +defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", 0>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG; +defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG; + defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", 0>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG; + defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG; + defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, basic_sse12_fp_binop_p_int<0x5F, "max", 0>, basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG; + defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, basic_sse12_fp_binop_p_int<0x5D, "min", 0>, basic_sse12_fp_binop_p_y_int<0x5D, "min">, basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; @@ -1720,23 +2843,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int> { +multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, XS, Requires<[HasAVX, OptForSize]>; - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + let mayLoad = 1 in + def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat(OpcodeStr, - "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))]>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins ssmem:$src1, VR128:$src2), !strconcat(OpcodeStr, - "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -1801,21 +2919,17 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int> { +multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), + def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))]>; - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -1863,9 +2977,8 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, - VEX_4V; + defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, + sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, @@ -1879,21 +2992,76 @@ let Predicates = [HasAVX] in { // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt, - int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>, - VEX_4V; + defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; } +def : Pat<(f32 (fsqrt FR32:$src)), + (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (fsqrt (load addr:$src))), + (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; +def : Pat<(f64 (fsqrt FR64:$src)), + (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; +def : Pat<(f64 (fsqrt (load addr:$src))), + (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +def : Pat<(f32 (X86frsqrt FR32:$src)), + (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (X86frsqrt (load addr:$src))), + (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +def : Pat<(f32 (X86frcp FR32:$src)), + (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(f32 (X86frcp (load addr:$src))), + (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse_sqrt_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VSQRTSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), + (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; + + def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), + (VSQRTSDr (f64 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), + sub_sd)>; + def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), + (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; + + def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VRSQRTSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), + (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; + + def : Pat<(int_x86_sse_rcp_ss VR128:$src), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (VRCPSSr (f32 (IMPLICIT_DEF)), + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), + sub_ss)>; + def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), + (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; +} + // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, sse1_fp_unop_p<0x51, "sqrt", fsqrt>, @@ -1992,7 +3160,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -2006,7 +3174,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } //===----------------------------------------------------------------------===// -// SSE 1 & 2 - Misc Instructions (No AVX form) +// SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. @@ -2019,66 +3187,26 @@ def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; -// Load, store, and memory fence -def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - TB, Requires<[HasSSE1]>; -def : Pat<(X86SFence), (SFENCE)>; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementation, it does not expand the instructions below like -// X86MCInstLower does. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; -def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>; -let ExeDomain = SSEPackedInt in -def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; -} - -// The same as done above but for AVX. The 128-bit versions are the -// same, but re-encoded. The 256-bit does not support PI version, and -// doesn't need it because on sandy bridge the register is set to zero -// at the rename stage without using any execution unit, so SET0PSY -// and SET0PDY can be used for vector int instructions without penalty -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like -// X86MCInstLower does. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, Predicates = [HasAVX] in { -def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V; -def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; -let ExeDomain = SSEPackedInt in -def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; -} +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; -def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// Load, store, and memory fence +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), + "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; -// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while -// in the non-AVX version bits 127:64 aren't touched. Find a better way to -// represent this instead of always zeroing SRC1. One possible solution is -// to represent the instruction w/ something similar as the "$src1 = $dst" -// constraint but without the tied operands. -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, - Requires<[HasAVX, OptForSpeed]>; +def : Pat<(X86SFence), (SFENCE)>; +def : Pat<(X86LFence), (LFENCE)>; +def : Pat<(X86MFence), (MFENCE)>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register @@ -2106,10 +3234,22 @@ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; } -def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; -def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; +def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + +// For Disassembler +let isCodeGenOnly = 1 in { +def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; +} let canFoldAsLoad = 1, mayLoad = 1 in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -2147,6 +3287,16 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", []>, XS, Requires<[HasSSE2]>; +// For Disassembler +let isCodeGenOnly = 1 in { +def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>; + +def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + []>, XS, Requires<[HasSSE2]>; +} + let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", @@ -2180,9 +3330,11 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // ExeDomain = SSEPackedInt -def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; -def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), - (VMOVDQUYmr addr:$dst, VR256:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; + def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), + (VMOVDQUYmr addr:$dst, VR256:$src)>; +} //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Arithmetic Instructions @@ -2415,15 +3567,14 @@ let ExeDomain = SSEPackedInt in { def VPANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>, VEX_4V; + [(set VR128:$dst, + (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; def VPANDNrm : PDI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>, - VEX_4V; + [(set VR128:$dst, (X86andnp VR128:$src1, + (memopv2i64 addr:$src2)))]>, VEX_4V; } } @@ -2527,6 +3678,32 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, 0>, VEX_4V; + + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), + (VPCMPEQBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + (VPCMPEQBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), + (VPCMPEQWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + (VPCMPEQWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), + (VPCMPEQDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + (VPCMPEQDrm VR128:$src1, addr:$src2)>; + + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), + (VPCMPGTBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + (VPCMPGTBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), + (VPCMPGTWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + (VPCMPGTWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), + (VPCMPGTDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + (VPCMPGTDrm VR128:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -2538,31 +3715,33 @@ let Constraints = "$src1 = $dst" in { defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; } // Constraints = "$src1 = $dst" -def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), - (PCMPEQBrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), - (PCMPEQBrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), - (PCMPEQWrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), - (PCMPEQWrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), - (PCMPEQDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), - (PCMPEQDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), - (PCMPGTBrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), - (PCMPGTBrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), - (PCMPGTWrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), - (PCMPGTWrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), - (PCMPGTDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), - (PCMPGTDrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE2] in { + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), + (PCMPEQBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + (PCMPEQBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), + (PCMPEQWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + (PCMPEQWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), + (PCMPEQDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + (PCMPEQDrm VR128:$src1, addr:$src2)>; + + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), + (PCMPGTBrr VR128:$src1, VR128:$src2)>; + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + (PCMPGTBrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), + (PCMPGTWrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + (PCMPGTWrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), + (PCMPGTDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + (PCMPGTDrm VR128:$src1, addr:$src2)>; +} //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions @@ -2608,7 +3787,7 @@ def mi : Ii8<0x70, MRMSrcMem, let Predicates = [HasAVX] in { let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize, VEX; // SSE2 with ImmT == Imm8 and XS prefix. @@ -2618,6 +3797,34 @@ let Predicates = [HasAVX] in { // SSE2 with ImmT == Imm8 and XD prefix. defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, VEX; + + let AddedComplexity = 5 in + def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + // Unary v4f32 shuffle with VPSHUF* in order to fold a load. + def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (VPSHUFHWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (VPSHUFHWmi addr:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (VPSHUFLWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (VPSHUFLWmi addr:$src, imm:$imm)>; } let Predicates = [HasSSE2] in { @@ -2629,6 +3836,34 @@ let Predicates = [HasSSE2] in { // SSE2 with ImmT == Imm8 and XD prefix. defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; + + let AddedComplexity = 5 in + def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), + (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + // Unary v4f32 shuffle with PSHUF* in order to fold a load. + def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), + (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; + + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (PSHUFHWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (PSHUFLWri VR128:$src, imm:$imm)>; + def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), + (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -2637,71 +3872,69 @@ let Predicates = [HasSSE2] in { let ExeDomain = SSEPackedInt in { multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, - PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { + SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>; + [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (unp_frag VR128:$src1, + [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))]>; } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, - 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, - 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, - 0>, VEX_4V; + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, + bc_v16i8, 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, + bc_v8i16, 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, + bc_v4i32, 0>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, + VR128:$src2)))]>, VEX_4V; def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; - - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, - 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, - 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, - 0>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, + bc_v16i8, 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, + bc_v8i16, 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, + bc_v4i32, 0>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, + VR128:$src2)))]>, VEX_4V; def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>; + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. @@ -2709,17 +3942,17 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; + (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckl VR128:$src1, + (v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))))]>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>; + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen /// knew to collapse (bitconvert VT to VT) into its operand. @@ -2727,17 +3960,24 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; + (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2i64 (unpckh VR128:$src1, + (v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))))]>; } - } // ExeDomain = SSEPackedInt +// Splat v2f64 / v2i64 +let AddedComplexity = 10 in { + def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), + (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), + (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>; +} + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -2769,7 +4009,7 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2778,11 +4018,11 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, // Insert let Predicates = [HasAVX] in { - defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; + defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in @@ -2839,7 +4079,9 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), // SSE2 - Move Doubleword //===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Int Doubleword to Packed Double Int +// def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2849,6 +4091,14 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, VEX; +def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector GR64:$src)))]>, VEX; +def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX; + def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2865,8 +4115,9 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>; - +//===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar +// def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; @@ -2883,7 +4134,9 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; +//===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int +// def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2902,22 +4155,48 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; -def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +//===---------------------------------------------------------------------===// +// Move Packed Doubleword Int first element to Doubleword Int +// +def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>, + TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; + +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>; + +//===---------------------------------------------------------------------===// +// Bitcast FR64 <-> GR64 +// +let Predicates = [HasAVX] in +def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, + VEX; +def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>; + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; +def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; -def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; -def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; - +//===---------------------------------------------------------------------===// // Move Scalar Single to Double Int +// def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; @@ -2931,7 +4210,9 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; -// movd / movq to XMM register zero-extends +//===---------------------------------------------------------------------===// +// Patterns and instructions to describe movd/movq to XMM register zero-extends +// let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -2967,15 +4248,36 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>; +} -def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), (MOVZDI2PDIrm addr:$src)>; -def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; -def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; } +let Predicates = [HasAVX] in { + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), + (VMOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (VMOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVZDI2PDIrm addr:$src)>; + } + // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), + (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; +} + // These are the correct encodings of the instructions so that we know how to // read correct assembly, even though we continue to emit the wrong ones for // compatibility with Darwin's buggy assembler. @@ -2996,7 +4298,9 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", // SSE2 - Move Quadword //===---------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int +// def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3008,7 +4312,9 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix +//===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int +// def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -3018,10 +4324,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - +//===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. +// def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; @@ -3037,7 +4342,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (loadi64 addr:$src))))))]>, XS, VEX, Requires<[HasAVX]>; -let AddedComplexity = 20 in { +let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3045,15 +4350,27 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (loadi64 addr:$src))))))]>, XS, Requires<[HasSSE2]>; -def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; -def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), (MOVZQI2PQIrm addr:$src)>; -def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; +} + +let Predicates = [HasAVX], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), + (VMOVZQI2PQIrm addr:$src)>; } +//===---------------------------------------------------------------------===// // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +// let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -3077,9 +4394,21 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; +} -def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), - (MOVZPQILo2PQIrm addr:$src)>; +let AddedComplexity = 20 in { + let Predicates = [HasSSE2] in { + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (MOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>; + } + let Predicates = [HasAVX] in { + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + (VMOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (VMOVZPQILo2PQIrr VR128:$src)>; + } } // Instructions to match in the assembler @@ -3102,37 +4431,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; //===---------------------------------------------------------------------===// -// SSE2 - Misc Instructions -//===---------------------------------------------------------------------===// - -// Flush cache -def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; - -// Load, store, and memory fence -def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; -def : Pat<(X86LFence), (LFENCE)>; -def : Pat<(X86MFence), (MFENCE)>; - - -// Pause. This "instruction" is encoded as "rep; nop", so even though it -// was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in - // FIXME: Change encoding to pseudo. - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; - -//===---------------------------------------------------------------------===// // SSE3 - Conversion Instructions //===---------------------------------------------------------------------===// @@ -3164,6 +4462,11 @@ def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTPD2DQYrr VR256:$src)>; +def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTPD2DQYrm addr:$src)>; + // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -3192,41 +4495,74 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), (VCVTPD2DQYrm addr:$src)>; +def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; +def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))), + (VCVTDQ2PDYrm addr:$src)>; + //===---------------------------------------------------------------------===// -// SSE3 - Move Instructions +// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// - -// Replicate Single FP -multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> { -def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), +multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, + ValueType vt, RegisterClass RC, PatFrag mem_frag, + X86MemOperand x86memop> { +def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (rep_frag - VR128:$src, (undef))))]>; -def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + [(set RC:$dst, (vt (OpNode RC:$src)))]>; +def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (rep_frag - (memopv4f32 addr:$src), (undef)))]>; + [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>; } -multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag, - string OpcodeStr> { -def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; -def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +let Predicates = [HasAVX] in { + defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v4f32, VR128, memopv4f32, f128mem>, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", + v8f32, VR256, memopv8f32, f256mem>, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", + v8f32, VR256, memopv8f32, f256mem>, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, + memopv4f32, f128mem>; +defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, + memopv4f32, f128mem>; + +let Predicates = [HasSSE3] in { + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSLDUPrm addr:$src)>; } let Predicates = [HasAVX] in { - // FIXME: Merge above classes when we have patterns for the ymm version - defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; - defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; - defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX; - defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX; + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (VMOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (VMOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (VMOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (VMOVSLDUPrm addr:$src)>; + def : Pat<(v8i32 (X86Movshdup VR256:$src)), + (VMOVSHDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSHDUPYrm addr:$src)>; + def : Pat<(v8i32 (X86Movsldup VR256:$src)), + (VMOVSLDUPYrr VR256:$src)>; + def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))), + (VMOVSLDUPYrm addr:$src)>; } -defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; -defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; -// Replicate Double FP +//===---------------------------------------------------------------------===// +// SSE3 - Replicate Double FP - MOVDDUP +//===---------------------------------------------------------------------===// + multiclass sse3_replicate_dfp<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3238,23 +4574,90 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (undef))))]>; } +// FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { -def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; -def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; +let Predicates = [HasAVX] in { + def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; + def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; + } +} + +defm MOVDDUP : sse3_replicate_dfp<"movddup">; +defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; + +let Predicates = [HasSSE3] in { + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (MOVDDUPrm addr:$src)>; + let AddedComplexity = 5 in { + def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (MOVDDUPrm addr:$src)>; + } + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; } let Predicates = [HasAVX] in { - // FIXME: Merge above classes when we have patterns for the ymm version - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; - defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; + def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), + (undef)), + (VMOVDDUPrm addr:$src)>; + let AddedComplexity = 5 in { + def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), + (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; + def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), + (VMOVDDUPrm addr:$src)>; + } + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + + // 256-bit version + def : Pat<(X86Movddup (memopv4f64 addr:$src)), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (memopv4i64 addr:$src)), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4f64 VR256:$src)), + (VMOVDDUPYrr VR256:$src)>; + def : Pat<(X86Movddup (v4i64 VR256:$src)), + (VMOVDDUPYrr VR256:$src)>; } -defm MOVDDUP : sse3_replicate_dfp<"movddup">; -// Move Unaligned Integer +//===---------------------------------------------------------------------===// +// SSE3 - Move Unaligned Integer +//===---------------------------------------------------------------------===// + let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -3267,38 +4670,6 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; -def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// Several Move patterns -let AddedComplexity = 5 in { -def : Pat<(movddup (memopv2f64 addr:$src), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (memopv2i64 addr:$src), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; -} - -// vector_shuffle v1, <undef> <1, 1, 3, 3> -let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), - (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <0, 0, 2, 2> -let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), - (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; - //===---------------------------------------------------------------------===// // SSE3 - Arithmetic //===---------------------------------------------------------------------===// @@ -3344,62 +4715,58 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3], // Horizontal ops multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; } let Predicates = [HasAVX] in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - int_x86_sse3_hadd_ps, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - int_x86_sse3_hadd_pd, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - int_x86_sse3_hsub_ps, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - int_x86_sse3_hsub_pd, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - int_x86_avx_hadd_ps_256, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - int_x86_avx_hadd_pd_256, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - int_x86_avx_hsub_ps_256, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - int_x86_avx_hsub_pd_256, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, - int_x86_sse3_hadd_ps>; - defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, - int_x86_sse3_hadd_pd>; - defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, - int_x86_sse3_hsub_ps>; - defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, - int_x86_sse3_hsub_pd>; + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; } //===---------------------------------------------------------------------===// @@ -3466,7 +4833,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in { +let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; @@ -3525,17 +4892,33 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128>; } -def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; -def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +let Predicates = [HasSSSE3] in { + def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>; + def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>; + + def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (PSIGNBrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (PSIGNWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (PSIGNDrr128 VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasAVX] in { + def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (VPSHUFBrr128 VR128:$src, VR128:$mask)>; + def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (VPSHUFBrm128 VR128:$src, addr:$mask)>; -def : Pat<(X86psignb VR128:$src1, VR128:$src2), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; -def : Pat<(X86psignw VR128:$src1, VR128:$src2), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; -def : Pat<(X86psignd VR128:$src1, VR128:$src2), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; + def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; +} //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns @@ -3560,33 +4943,35 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; -let Constraints = "$src1 = $dst" in +let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; -let AddedComplexity = 5 in { -def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), - (PALIGNR128rr VR128:$src2, VR128:$src1, - (SHUFFLE_get_palign_imm VR128:$src3))>, - Requires<[HasSSSE3]>; +let Predicates = [HasSSSE3] in { +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// -// SSSE3 Misc Instructions +// SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// -// Thread synchronization let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; @@ -3609,338 +4994,6 @@ def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, Requires<[In64BitMode]>; -//===---------------------------------------------------------------------===// -// Non-Instruction Patterns -//===---------------------------------------------------------------------===// - -// extload f32 -> f64. This matches load+fextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag -// combine. -// Since these loads aren't folded into the fextend, we have to match it -// explicitly here. -let Predicates = [HasSSE2] in - def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>; - -// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while -// in the non-AVX version bits 127:64 aren't touched. Find a better way to -// represent this instead of always zeroing SRC1. One possible solution is -// to represent the instruction w/ something similar as the "$src1 = $dst" -// constraint but without the tied operands. -let Predicates = [HasAVX] in - def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), - addr:$src)>; - -// bit_convert -let Predicates = [HasXMMInt] in { - def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; - def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; -} - -let Predicates = [HasAVX] in { - def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; - def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; -} - -// Move scalar to XMM zero-extended -// movd to XMM register zero-extends -let AddedComplexity = 15 in { -// Zeroing a VR128 then do a MOVS{S|D} to the lower bits. -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>; -def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; -def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0PS)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; -def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0PI)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; -} - -// Splat v2f64 / v2i64 -let AddedComplexity = 10 in { -def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), - (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2f64 VR128:$src), (undef)), - (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), - (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(unpckh (v2i64 VR128:$src), (undef)), - (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -} - -// Special unary SHUFPSrri case. -def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; -let AddedComplexity = 5 in -def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Special unary SHUFPDrri case. -def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Unary v4f32 shuffle with PSHUF* in order to fold a load. -def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[HasSSE2]>; - -// Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; -// Special binary v2i64 shuffle cases using SHUFPDrri. -def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE2]>; - -// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -} -let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>; -def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>; -def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>; -def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>; -} - -// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -let AddedComplexity = 15 in { -def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), - (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>, - Requires<[OptForSpeed, HasSSE2]>; -} -let AddedComplexity = 10 in { -def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>; -def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>; -def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>; -def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>; -} - -let AddedComplexity = 20 in { -// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; - -// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS -def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; - -// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS -def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; -def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; -} - -let AddedComplexity = 20 in { -// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -} - -// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; - -let AddedComplexity = 15 in { -// Setting the lowest element in the vector. -def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; -def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - -// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd -def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, - Requires<[HasSSE2]>; -def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, - Requires<[HasSSE2]>; -} - -// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but -// fall back to this for SSE1) -def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - -// Set lowest element and zero upper elements. -def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; - -// vector -> vector casts -def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; - -// Use movaps / movups for SSE integer load / store (one byte shorter). -// The instructions selected below are then converted to MOVDQA/MOVDQU -// during the SSE domain pass. -let Predicates = [HasSSE1] in { - def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -} - -// Use vmovaps/vmovups for AVX integer load/store. -let Predicates = [HasAVX] in { - // 128-bit load/store - def : Pat<(alignedloadv4i32 addr:$src), - (VMOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (VMOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (VMOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (VMOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (VMOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (VMOVUPSmr addr:$dst, VR128:$src)>; - - // 256-bit load/store - def : Pat<(alignedloadv4i64 addr:$src), - (VMOVAPSYrm addr:$src)>; - def : Pat<(loadv4i64 addr:$src), - (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedloadv8i32 addr:$src), - (VMOVAPSYrm addr:$src)>; - def : Pat<(loadv8i32 addr:$src), - (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), - (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v4i64 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; - def : Pat<(store (v8i32 VR256:$src), addr:$dst), - (VMOVUPSYmr addr:$dst, VR256:$src)>; -} - //===----------------------------------------------------------------------===// // SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// @@ -3979,36 +5032,71 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; -// Common patterns involving scalar load. -def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load. + def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (PMOVSXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (PMOVSXWDrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (PMOVSXDQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (PMOVZXBWrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (PMOVZXWDrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (PMOVZXDQrm addr:$src)>; +} + +let Predicates = [HasAVX] in { + // Common patterns involving scalar load. + def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), + (VPMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), + (VPMOVSXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), + (VPMOVSXWDrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), + (VPMOVSXDQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), + (VPMOVZXBWrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), + (VPMOVZXWDrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), + (VPMOVZXDQrm addr:$src)>; +} multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { @@ -4039,17 +5127,31 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (PMOVSXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (PMOVSXWQrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (PMOVZXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (PMOVZXWQrm addr:$src)>; +} -def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>; -def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasAVX] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), + (VPMOVSXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), + (VPMOVSXWQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), + (VPMOVZXBDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), + (VPMOVZXWQrm addr:$src)>; +} multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -4073,16 +5175,31 @@ defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -// Common patterns involving scalar load -def : Pat<(int_x86_sse41_pmovsxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>; +let Predicates = [HasSSE41] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBQrm addr:$src)>; + + def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBQrm addr:$src)>; +} + +let Predicates = [HasAVX] in { + // Common patterns involving scalar load + def : Pat<(int_x86_sse41_pmovsxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBQrm addr:$src)>; -def : Pat<(int_x86_sse41_pmovzxbq - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; + def : Pat<(int_x86_sse41_pmovzxbq + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXBQrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Extract Instructions @@ -4208,7 +5325,12 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + Requires<[HasSSE41]>; +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasAVX]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -4297,7 +5419,7 @@ let Constraints = "$src1 = $dst" in // in the target vector. multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -4306,7 +5428,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), + (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, @@ -4348,7 +5470,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, // Vector intrinsic operation, mem def PSm : Ii8<opcps, MRMSrcMem, - (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, @@ -4366,7 +5488,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, // Vector intrinsic operation, mem def PDm : SS4AIi8<opcpd, MRMSrcMem, - (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, @@ -4501,14 +5623,14 @@ let Predicates = [HasAVX] in { int_x86_avx_round_pd_256>, VEX; defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, - int_x86_sse41_round_sd, 0>, VEX_4V; + int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; // Instructions for the assembler defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, VEX; defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V; + defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -4578,26 +5700,34 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// -def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS; -def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS; - -def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop GR32:$src))]>, XS; -def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS; - -def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop GR64:$src))]>, XS; -def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS; +let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { + def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, + OpSize, XS; + def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop (loadi16 addr:$src))), + (implicit EFLAGS)]>, OpSize, XS; + + def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, + XS; + def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop (loadi32 addr:$src))), + (implicit EFLAGS)]>, XS; + + def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, + XS; + def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop (loadi64 addr:$src))), + (implicit EFLAGS)]>, XS; +} @@ -4666,6 +5796,11 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (VPCMPEQQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (VPCMPEQQrm VR128:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -4720,7 +5855,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i32i8imm:$src3), + (ins RC:$src1, RC:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -4729,7 +5864,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, OpSize; def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i32i8imm:$src3), + (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), @@ -4815,6 +5950,36 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; +let Predicates = [HasAVX] in { + def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), + (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), + (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), + (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), + (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; + def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), + (v8i32 VR256:$src2))), + (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), + (v8f32 VR256:$src2))), + (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), + (v4i64 VR256:$src2))), + (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), + (v4f64 VR256:$src2))), + (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; +} + /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { @@ -4835,12 +6000,27 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { } } -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; - -def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), - (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; + +let Predicates = [HasSSE41] in { + def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), + (v16i8 VR128:$src2))), + (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), + (v4i32 VR128:$src2))), + (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), + (v4f32 VR128:$src2))), + (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), + (v2i64 VR128:$src2))), + (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), + (v2f64 VR128:$src2))), + (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; +} let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4876,9 +6056,16 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX] in { defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (VPCMPGTQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (VPCMPGTQrm VR128:$src1, addr:$src2)>; +} + let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; @@ -5158,22 +6345,43 @@ let Constraints = "$src1 = $dst" in { int_x86_aesni_aesdeclast>; } -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (AESENCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (AESENCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (AESENCLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (AESENCLASTrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (AESDECrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (AESDECrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (AESDECLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (AESDECLASTrm VR128:$src1, addr:$src2)>; +let Predicates = [HasAES] in { + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (AESENCrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (AESENCrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (AESENCLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (AESENCLASTrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (AESDECrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (AESDECrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (AESDECLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (AESDECLASTrm VR128:$src1, addr:$src2)>; +} + +let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in { + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (VAESENCrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (VAESENCrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (VAESENCLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (VAESENCLASTrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (VAESDECrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (VAESDECrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (VAESDECLASTrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (VAESDECLASTrm VR128:$src1, addr:$src2)>; +} // Perform the AES InvMixColumn Transformation let Predicates = [HasAVX, HasAES] in { @@ -5288,8 +6496,10 @@ defm : pclmul_alias<"lqlq", 0x00>; // AVX Instructions //===----------------------------------------------------------------------===// - -// Load from memory and broadcast to all elements of the destination operand +//===----------------------------------------------------------------------===// +// VBROADCAST - Load from memory and broadcast to all elements of the +// destination operand +// class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> : AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), @@ -5305,7 +6515,26 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -// Insert packed floating-point values +def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), + (VBROADCASTF128 addr:$src)>; + +def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSY addr:$src)>; +def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VBROADCASTSD addr:$src)>; +def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSY addr:$src)>; +def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSD addr:$src)>; + +def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSS addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSS addr:$src)>; + +//===----------------------------------------------------------------------===// +// VINSERTF128 - Insert packed floating-point values +// def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5315,7 +6544,41 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; -// Extract packed floating-point values +def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + +//===----------------------------------------------------------------------===// +// VEXTRACTF128 - Extract packed floating-point values +// def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -5325,7 +6588,41 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -// Conditional SIMD Packed Loads and Stores +def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +//===----------------------------------------------------------------------===// +// VMASKMOV - Conditional SIMD Packed Loads and Stores +// multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, Intrinsic IntSt, Intrinsic IntSt256, @@ -5363,7 +6660,9 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskstore_pd_256, memopv2f64, memopv4f64>; -// Permute Floating-Point Values +//===----------------------------------------------------------------------===// +// VPERMIL - Permute Single and Double Floating-Point Values +// multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, @@ -5404,6 +6703,18 @@ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, int_x86_avx_vpermilvar_pd_256, int_x86_avx_vpermil_pd_256>; +def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), + (VPERMILPSYri VR256:$src1, imm:$imm)>; +def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), + (VPERMILPDYri VR256:$src1, imm:$imm)>; +def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), + (VPERMILPSYri VR256:$src1, imm:$imm)>; +def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), + (VPERMILPDYri VR256:$src1, imm:$imm)>; + +//===----------------------------------------------------------------------===// +// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5413,65 +6724,6 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; -// Zero All YMM registers -def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>; - -// Zero Upper bits of YMM registers -def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", - [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; - -def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; - -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - -def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; - -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; - -def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), - (VBROADCASTF128 addr:$src)>; - def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), @@ -5489,377 +6741,59 @@ def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + //===----------------------------------------------------------------------===// -// SSE Shuffle pattern fragments -//===----------------------------------------------------------------------===// +// VZERO - Zero YMM registers +// +let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { + // Zero All YMM registers + def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", + [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; -// This is part of a "work in progress" refactoring. The idea is that all -// vector shuffles are going to be translated into target specific nodes and -// directly matched by the patterns below (which can be changed along the way) -// The AVX version of some but not all of them are described here, and more -// should come in a near future. - -// Shuffle with PSHUFD instruction folding loads. The first two patterns match -// SSE2 loads, which are always promoted to v2i64. The last one should match -// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD -// in SSE2, how does it ever worked? Anyway, the pattern will remain here until -// we investigate further. -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; -def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked? - -// Shuffle with PSHUFD instruction. -def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - -def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - -// Shuffle with SHUFPD instruction. -def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - -def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - -// Shuffle with SHUFPS instruction. -def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - -def : Pat<(v4i32 (X86Shufps VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86Shufps VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - -// Shuffle with MOVHLPS instruction -def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with MOVDDUP instruction -def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (MOVDDUPrm addr:$src)>; - - -// Shuffle with UNPCKLPS -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), - (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), - (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKHPS -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKHPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), - (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), - (UNPCKHPSrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKLPD -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), - (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), - (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with UNPCKHPD -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKHPDrm VR128:$src1, addr:$src2)>; - -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), - (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), - (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLBW -def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PUNPCKLBWrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)), - (PUNPCKLBWrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLWD -def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PUNPCKLWDrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)), - (PUNPCKLWDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLDQ -def : Pat<(v4i32 (X86Punpckldq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PUNPCKLDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)), - (PUNPCKLDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKLQDQ -def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))), - (PUNPCKLQDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)), - (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHBW -def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PUNPCKHBWrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)), - (PUNPCKHBWrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHWD -def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PUNPCKHWDrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)), - (PUNPCKHWDrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHDQ -def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PUNPCKHDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)), - (PUNPCKHDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with PUNPCKHQDQ -def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))), - (PUNPCKHQDQrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)), - (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>; - -// Shuffle with MOVLHPS -def : Pat<(X86Movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVHPSrm VR128:$src1, addr:$src2)>; -def : Pat<(X86Movlhps VR128:$src1, - (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; - -// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v2f64 (X86Movddup VR128:$src)), - (UNPCKLPDrr VR128:$src, VR128:$src)>; - -// Shuffle with MOVLHPD -def : Pat<(v2f64 (X86Movlhpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVHPDrm VR128:$src1, addr:$src2)>; - -// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVHPDrm VR128:$src1, addr:$src2)>; - -// Shuffle with MOVSS -def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), - (MOVSSrr VR128:$src1, FR32:$src2)>; -def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; -def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(X86Movss VR128:$src1, - (bc_v4i32 (v2i64 (load addr:$src2)))), - (MOVLPSrm VR128:$src1, addr:$src2)>; - -// Shuffle with MOVSD -def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), - (MOVSDrr VR128:$src1, FR64:$src2)>; -def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; -def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; -def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; -def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; - -// Shuffle with MOVSHDUP -def : Pat<(v4i32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; -def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))), - (MOVSHDUPrm addr:$src)>; - -def : Pat<(v4f32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; -def : Pat<(X86Movshdup (memopv4f32 addr:$src)), - (MOVSHDUPrm addr:$src)>; - -// Shuffle with MOVSLDUP -def : Pat<(v4i32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; -def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))), - (MOVSLDUPrm addr:$src)>; - -def : Pat<(v4f32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; -def : Pat<(X86Movsldup (memopv4f32 addr:$src)), - (MOVSLDUPrm addr:$src)>; - -// Shuffle with PSHUFHW -def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), - (PSHUFHWri VR128:$src, imm:$imm)>; -def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), - (PSHUFHWmi addr:$src, imm:$imm)>; - -// Shuffle with PSHUFLW -def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), - (PSHUFLWri VR128:$src, imm:$imm)>; -def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), - (PSHUFLWmi addr:$src, imm:$imm)>; - -// Shuffle with PALIGN -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + // Zero Upper bits of YMM registers + def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", + [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; +} -// Shuffle with MOVLPS -def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(X86Movlps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVLPSrm VR128:$src1, addr:$src2)>; -// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem -// is during lowering, where it's not possible to recognize the load fold cause -// it has two uses through a bitcast. One use disappears at isel time and the -// fold opportunity reappears. -def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; - -def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; - -// Shuffle with MOVLPD -def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v2f64 (X86Movlpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVLPDrm VR128:$src1, addr:$src2)>; - -// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD -def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst), - (MOVHPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), - (MOVHPDmr addr:$dst, VR128:$src)>; - -def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v4i32 (X86Movlps - (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; - -def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; -def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; +//===----------------------------------------------------------------------===// +// Half precision conversion instructions +// +let Predicates = [HasAVX, HasF16C] in { + def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; + def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; + def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td index 31de878..05a5b36 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td @@ -67,43 +67,43 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, // let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), - "in{b}\t{%dx, %al|%AL, %DX}", []>; + "in{b}\t{%dx, %al|AL, DX}", []>; let Defs = [AX], Uses = [DX] in def IN16rr : I<0xED, RawFrm, (outs), (ins), - "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; + "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize; let Defs = [EAX], Uses = [DX] in def IN32rr : I<0xED, RawFrm, (outs), (ins), - "in{l}\t{%dx, %eax|%EAX, %DX}", []>; + "in{l}\t{%dx, %eax|EAX, DX}", []>; let Defs = [AL] in def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), - "in{b}\t{$port, %al|%AL, $port}", []>; + "in{b}\t{$port, %al|AL, $port}", []>; let Defs = [AX] in def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; + "in{w}\t{$port, %ax|AX, $port}", []>, OpSize; let Defs = [EAX] in def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{l}\t{$port, %eax|%EAX, $port}", []>; + "in{l}\t{$port, %eax|EAX, $port}", []>; let Uses = [DX, AL] in def OUT8rr : I<0xEE, RawFrm, (outs), (ins), - "out{b}\t{%al, %dx|%DX, %AL}", []>; + "out{b}\t{%al, %dx|DX, AL}", []>; let Uses = [DX, AX] in def OUT16rr : I<0xEF, RawFrm, (outs), (ins), - "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; + "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize; let Uses = [DX, EAX] in def OUT32rr : I<0xEF, RawFrm, (outs), (ins), - "out{l}\t{%eax, %dx|%DX, %EAX}", []>; + "out{l}\t{%eax, %dx|DX, EAX}", []>; let Uses = [AL] in def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), - "out{b}\t{%al, $port|$port, %AL}", []>; + "out{b}\t{%al, $port|$port, AL}", []>; let Uses = [AX] in def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; + "out{w}\t{%ax, $port|$port, AX}", []>, OpSize; let Uses = [EAX] in def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{l}\t{%eax, $port|$port, %EAX}", []>; + "out{l}\t{%eax, $port|$port, EAX}", []>; def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize; @@ -229,65 +229,65 @@ def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t{$src}", []>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), - "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), - "push{l}\t%cs", []>, Requires<[In32BitMode]>; + "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>; def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), - "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), - "push{l}\t%ss", []>, Requires<[In32BitMode]>; + "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>; def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), - "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize; def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), - "push{l}\t%ds", []>, Requires<[In32BitMode]>; + "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>; def PUSHES16 : I<0x06, RawFrm, (outs), (ins), - "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize; def PUSHES32 : I<0x06, RawFrm, (outs), (ins), - "push{l}\t%es", []>, Requires<[In32BitMode]>; + "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>; def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), - "push{w}\t%fs", []>, OpSize, TB; + "push{w}\t{%fs|FS}", []>, OpSize, TB; def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>; def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), - "push{w}\t%gs", []>, OpSize, TB; + "push{w}\t{%gs|GS}", []>, OpSize, TB; def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t%fs", []>, TB; + "push{q}\t{%fs|FS}", []>, TB; def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t%gs", []>, TB; + "push{q}\t{%gs|GS}", []>, TB; // No "pop cs" instruction. def POPSS16 : I<0x17, RawFrm, (outs), (ins), - "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>; def POPSS32 : I<0x17, RawFrm, (outs), (ins), - "pop{l}\t%ss", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>; def POPDS16 : I<0x1F, RawFrm, (outs), (ins), - "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>; def POPDS32 : I<0x1F, RawFrm, (outs), (ins), - "pop{l}\t%ds", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>; def POPES16 : I<0x07, RawFrm, (outs), (ins), - "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>; def POPES32 : I<0x07, RawFrm, (outs), (ins), - "pop{l}\t%es", []> , Requires<[In32BitMode]>; + "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>; def POPFS16 : I<0xa1, RawFrm, (outs), (ins), - "pop{w}\t%fs", []>, OpSize, TB; + "pop{w}\t{%fs|FS}", []>, OpSize, TB; def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>; def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t%fs", []>, TB; + "pop{q}\t{%fs|FS}", []>, TB; def POPGS16 : I<0xa9, RawFrm, (outs), (ins), - "pop{w}\t%gs", []>, OpSize, TB; + "pop{w}\t{%gs|GS}", []>, OpSize, TB; def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>; def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t%gs", []>, TB; + "pop{q}\t{%gs|GS}", []>, TB; def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), @@ -400,12 +400,29 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +//===----------------------------------------------------------------------===// +// XSAVE instructions let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [RDX, RAX, RCX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; +let Uses = [RDX, RAX] in { + def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), + "xsave\t$dst", []>, TB; + def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), + "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor\t$dst", []>, TB; + def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), + "xsaveopt\t$dst", []>, TB; + def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), + "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions let Defs = [RAX, RDI], Uses = [RDX, RDI] in @@ -427,3 +444,24 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { } let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6; + +//===----------------------------------------------------------------------===// +// FS/GS Base Instructions +let Predicates = [In64BitMode] in { + def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), + "rdfsbase{l}\t$dst", []>, TB, XS; + def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), + "rdfsbase{q}\t$dst", []>, TB, XS; + def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), + "rdgsbase{l}\t$dst", []>, TB, XS; + def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), + "rdgsbase{q}\t$dst", []>, TB, XS; + def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst), + "wrfsbase{l}\t$dst", []>, TB, XS; + def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst), + "wrfsbase{q}\t$dst", []>, TB, XS; + def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst), + "wrgsbase{l}\t$dst", []>, TB, XS; + def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst), + "wrgsbase{q}\t$dst", []>, TB, XS; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td index daf61e4..09a7a7d0c 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrVMX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td @@ -16,9 +16,15 @@ // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; +def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), + "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8; +def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), + "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; +def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), + "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8; +def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), + "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp index e385335..50bc14d 100644 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -372,15 +372,10 @@ ReSimplify: case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; - case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; - case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; - case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; - case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break; case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; - case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; - case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; + case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 @@ -468,6 +463,18 @@ ReSimplify: case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break; case X86::JG_4: OutMI.setOpcode(X86::JG_1); break; + // Atomic load and store require a separate pseudo-inst because Acquire + // implies mayStore and Release implies mayLoad; fix these to regular MOV + // instructions here + case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; + case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; + case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; + case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; + case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; + case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; + case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; + case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; + // We don't currently select the correct instruction form for instructions // which have a short %eax, etc. form. Handle this by custom lowering, for // now. @@ -585,6 +592,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, } void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { + OutStreamer.EmitCodeRegion(); + X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: @@ -601,7 +610,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); return; - + case X86::EH_RETURN: case X86::EH_RETURN64: { diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 06043ec..b0bb313 100644 --- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -53,10 +53,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; - /// ReserveFP - whether the function should reserve the frame pointer - /// when allocating, even if there may not actually be a frame pointer used. - bool ReserveFP; - /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; /// RegSaveFrameIndex - X86-64 vararg func register save area. @@ -65,6 +61,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsGPOffset; /// VarArgsFPOffset - X86-64 vararg func fp reg offset. unsigned VarArgsFPOffset; + /// ArgumentStackSize - The number of bytes on stack consumed by the arguments + /// being passed on the stack. + unsigned ArgumentStackSize; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -77,7 +76,8 @@ public: VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -87,11 +87,11 @@ public: TailCallReturnAddrDelta(0), SRetReturnReg(0), GlobalBaseReg(0), - ReserveFP(false), VarArgsFrameIndex(0), RegSaveFrameIndex(0), VarArgsGPOffset(0), - VarArgsFPOffset(0) {} + VarArgsFPOffset(0), + ArgumentStackSize(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -114,9 +114,6 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } - bool getReserveFP() const { return ReserveFP; } - void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; } - int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; } @@ -128,6 +125,9 @@ public: unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; } void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; } + + unsigned getArgumentStackSize() const { return ArgumentStackSize; } + void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index f2faf59..c1ac9f3 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -53,7 +52,13 @@ ForceStackAlign("force-align-stack", X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(), TM(tm), TII(tii) { + : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::RIP : X86::EIP, + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false), + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)), + TM(tm), TII(tii) { + X86_MC::InitLLVM2SEHRegisterMapping(this); + // Cache some information. const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); @@ -70,40 +75,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, } } -static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) { - if (!Subtarget->is64Bit()) { - if (Subtarget->isTargetDarwin()) { - if (isEH) - return DWARFFlavour::X86_32_DarwinEH; - else - return DWARFFlavour::X86_32_Generic; - } else if (Subtarget->isTargetCygMing()) { - // Unsupported by now, just quick fallback - return DWARFFlavour::X86_32_Generic; - } else { - return DWARFFlavour::X86_32_Generic; - } - } - return DWARFFlavour::X86_64; -} - -/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF -/// specific numbering, used in debug info and exception tables. -int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - unsigned Flavour = getFlavour(Subtarget, isEH); - - return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); -} - -/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register. -int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - unsigned Flavour = getFlavour(Subtarget, isEH); - - return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour); -} - /// getCompactUnwindRegNum - This function maps the register to the number for /// compact unwind encoding. Return -1 if the register isn't valid. int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { @@ -121,7 +92,7 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { int X86RegisterInfo::getSEHRegNum(unsigned i) const { - int reg = getX86RegNum(i); + int reg = X86_MC::getX86RegNum(i); switch (i) { case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: @@ -140,96 +111,16 @@ X86RegisterInfo::getSEHRegNum(unsigned i) const { return reg; } -/// getX86RegNum - This function maps LLVM register identifiers to their X86 -/// specific numbering, which is used in various places encoding instructions. -unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { - switch(RegNo) { - case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; - case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; - case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; - case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; - case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: - return N86::ESP; - case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: - return N86::EBP; - case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: - return N86::ESI; - case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: - return N86::EDI; - - case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: - return N86::EAX; - case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: - return N86::ECX; - case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: - return N86::EDX; - case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: - return N86::EBX; - case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: - return N86::ESP; - case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: - return N86::EBP; - case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: - return N86::ESI; - case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: - return N86::EDI; - - case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: - case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: - return RegNo-X86::ST0; - - case X86::XMM0: case X86::XMM8: - case X86::YMM0: case X86::YMM8: case X86::MM0: - return 0; - case X86::XMM1: case X86::XMM9: - case X86::YMM1: case X86::YMM9: case X86::MM1: - return 1; - case X86::XMM2: case X86::XMM10: - case X86::YMM2: case X86::YMM10: case X86::MM2: - return 2; - case X86::XMM3: case X86::XMM11: - case X86::YMM3: case X86::YMM11: case X86::MM3: - return 3; - case X86::XMM4: case X86::XMM12: - case X86::YMM4: case X86::YMM12: case X86::MM4: - return 4; - case X86::XMM5: case X86::XMM13: - case X86::YMM5: case X86::YMM13: case X86::MM5: - return 5; - case X86::XMM6: case X86::XMM14: - case X86::YMM6: case X86::YMM14: case X86::MM6: - return 6; - case X86::XMM7: case X86::XMM15: - case X86::YMM7: case X86::YMM15: case X86::MM7: - return 7; - - case X86::ES: return 0; - case X86::CS: return 1; - case X86::SS: return 2; - case X86::DS: return 3; - case X86::FS: return 4; - case X86::GS: return 5; - - case X86::CR0: case X86::CR8 : case X86::DR0: return 0; - case X86::CR1: case X86::CR9 : case X86::DR1: return 1; - case X86::CR2: case X86::CR10: case X86::DR2: return 2; - case X86::CR3: case X86::CR11: case X86::DR3: return 3; - case X86::CR4: case X86::CR12: case X86::DR4: return 4; - case X86::CR5: case X86::CR13: case X86::DR5: return 5; - case X86::CR6: case X86::CR14: case X86::DR6: return 6; - case X86::CR7: case X86::CR15: case X86::DR7: return 7; - - // Pseudo index registers are equivalent to a "none" - // scaled index (See Intel Manual 2A, table 2-3) - case X86::EIZ: - case X86::RIZ: - return 4; - - default: - assert(isVirtualRegister(RegNo) && "Unknown physical register!"); - llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); - return 0; - } +const TargetRegisterClass * +X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + // The sub_8bit sub-register index is more constrained in 32-bit mode. + // It behaves just like the sub_8bit_hi index. + if (!Is64Bit && Idx == X86::sub_8bit) + Idx = X86::sub_8bit_hi; + + // Forward to TableGen's default version. + return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); } const TargetRegisterClass * @@ -355,8 +246,19 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass* X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ + // Don't allow super-classes of GR8_NOREX. This class is only used after + // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied + // to the full GR8 register class in 64-bit mode, so we cannot allow the + // reigster class inflation. + // + // The GR8_NOREX class is always used in a way that won't be constrained to a + // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the + // full GR8 class. + if (RC == X86::GR8_NOREXRegisterClass) + return RC; + const TargetRegisterClass *Super = RC; - TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { switch (Super->getID()) { case X86::GR8RegClassID: @@ -741,11 +643,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -unsigned X86RegisterInfo::getRARegister() const { - return Is64Bit ? X86::RIP // Should have dwarf #16. - : X86::EIP; // Should have dwarf #8. -} - unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? FramePtr : StackPtr; @@ -948,7 +845,7 @@ namespace { for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) { - FuncInfo->setReserveFP(true); + FuncInfo->setForceFramePointer(true); return true; } } diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h index a12eb12..7d39c68 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h @@ -24,22 +24,6 @@ namespace llvm { class TargetInstrInfo; class X86TargetMachine; -/// N86 namespace - Native X86 register numbers -/// -namespace N86 { - enum { - EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7 - }; -} - -/// DWARFFlavour - Flavour of dwarf regnumbers -/// -namespace DWARFFlavour { - enum { - X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2 - }; -} - class X86RegisterInfo : public X86GenRegisterInfo { public: X86TargetMachine &TM; @@ -73,11 +57,6 @@ public: /// register identifier. static unsigned getX86RegNum(unsigned RegNo); - /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum - /// (created by TableGen) for target dependencies. - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; @@ -95,6 +74,9 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + virtual const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const; + const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; @@ -136,7 +118,6 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } // FIXME: Move to FrameInfok diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td index 203722a..9a7db36 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td @@ -390,6 +390,13 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, (GR32_NOREX sub_32bit)]; } +// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit +// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs +// to clear upper 32-bits of RAX so is not a NOP. +def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> { + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; +} + // GR32_NOSP - GR32 registers except ESP. def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -455,8 +462,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; } -def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, - (sequence "YMM%u", 0, 15)> { +def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + 256, (sequence "YMM%u", 0, 15)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 02754f9..6406bce 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -54,7 +54,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { EVT IntPtr = TLI.getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 5e6c659..7064dd0 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -16,9 +16,11 @@ #include "X86InstrInfo.h" #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -185,24 +187,53 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV); - if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); - if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); - if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); - if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); - if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3); - if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41); - if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42); + if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } + if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); } + if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); } + if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); } + if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); } + if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} + if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} + if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX); + //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL); - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3); - HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); - HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); + if (IsIntel && ((ECX >> 1) & 0x1)) { + HasCLMUL = true; + ToggleFeature(X86::FeatureCLMUL); + } + if (IsIntel && ((ECX >> 12) & 0x1)) { + HasFMA3 = true; + ToggleFeature(X86::FeatureFMA3); + } + if (IsIntel && ((ECX >> 22) & 0x1)) { + HasMOVBE = true; + ToggleFeature(X86::FeatureMOVBE); + } + if (IsIntel && ((ECX >> 23) & 0x1)) { + HasPOPCNT = true; + ToggleFeature(X86::FeaturePOPCNT); + } + if (IsIntel && ((ECX >> 25) & 0x1)) { + HasAES = true; + ToggleFeature(X86::FeatureAES); + } + if (IsIntel && ((ECX >> 29) & 0x1)) { + HasF16C = true; + ToggleFeature(X86::FeatureF16C); + } + if (IsIntel && ((ECX >> 30) & 0x1)) { + HasRDRAND = true; + ToggleFeature(X86::FeatureRDRAND); + } + + if ((ECX >> 13) & 0x1) { + HasCmpxchg16b = true; + ToggleFeature(X86::FeatureCMPXCHG16B); + } if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -224,6 +255,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); } + if ((ECX >> 5) & 0x1) { + HasLZCNT = true; + ToggleFeature(X86::FeatureLZCNT); + } if (IsAMD && ((ECX >> 6) & 0x1)) { HasSSE4A = true; ToggleFeature(X86::FeatureSSE4A); @@ -251,14 +286,21 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) + , HasMOVBE(false) + , HasRDRAND(false) + , HasF16C(false) + , HasLZCNT(false) + , HasBMI(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) + , HasCmpxchg16b(false) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) - , In64BitMode(is64Bit) { + , In64BitMode(is64Bit) + , InNaClMode(false) { // Determine default and user specified characteristics if (!FS.empty() || !CPU.empty()) { std::string CPUName = CPU; @@ -304,6 +346,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, if (In64BitMode) ToggleFeature(X86::Mode64Bit); + if (isTargetNaCl()) { + InNaClMode = true; + ToggleFeature(X86::ModeNaCl); + } + if (HasAVX) X86SSELevel = NoMMXSSE; @@ -313,6 +360,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); + if(EnableSegmentedStacks && !isTargetELF()) + report_fatal_error("Segmented stacks are only implemented on ELF."); + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 6d22027..3258d3d 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -90,6 +90,21 @@ protected: /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; + /// HasMOVBE - True if the processor has the MOVBE instruction. + bool HasMOVBE; + + /// HasRDRAND - True if the processor has the RDRAND instruction. + bool HasRDRAND; + + /// HasF16C - Processor has 16-bit floating point conversion instructions. + bool HasF16C; + + /// HasLZCNT - Processor has LZCNT instruction. + bool HasLZCNT; + + /// HasBMI - Processor has BMI1 instructions. + bool HasBMI; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -100,6 +115,10 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; + /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction; + /// this is true for most x86-64 chips, but not the first AMD chips. + bool HasCmpxchg16b; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -115,6 +134,9 @@ private: /// In64BitMode - True if compiling for 64-bit, false for 32-bit. bool In64BitMode; + /// InNaClMode - True if compiling for Native Client target. + bool InNaClMode; + public: /// This constructor initializes the data members to match that @@ -165,9 +187,15 @@ public: bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } + bool hasMOVBE() const { return HasMOVBE; } + bool hasRDRAND() const { return HasRDRAND; } + bool hasF16C() const { return HasF16C; } + bool hasLZCNT() const { return HasLZCNT; } + bool hasBMI() const { return HasBMI; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } + bool hasCmpxchg16b() const { return HasCmpxchg16b; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -185,6 +213,11 @@ public: return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + bool isTargetNaCl() const { + return TargetTriple.getOS() == Triple::NativeClient; + } + bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } + bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } @@ -199,7 +232,8 @@ public: } bool isTargetWin64() const { - return In64BitMode && (isTargetMingw() || isTargetWindows()); + // FIXME: x86_64-cygwin has not been released yet. + return In64BitMode && (isTargetCygMing() || isTargetWindows()); } bool isTargetEnvMacho() const { diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 9cab0e0..15c6c4e 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -16,65 +16,32 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; -static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, - MCContext &Ctx, TargetAsmBackend &TAB, - raw_ostream &_OS, - MCCodeEmitter *_Emitter, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) - return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - - if (TheTriple.isOSWindows()) - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); - - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); -} - extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - - // Register the code emitter. - TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createX86MCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createX86MCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterAsmBackend(TheX86_32Target, - createX86_32AsmBackend); - TargetRegistry::RegisterAsmBackend(TheX86_64Target, - createX86_64AsmBackend); - - // Register the object streamer. - TargetRegistry::RegisterObjectStreamer(TheX86_32Target, - createMCStreamer); - TargetRegistry::RegisterObjectStreamer(TheX86_64Target, - createMCStreamer); } -X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : X86TargetMachine(T, TT, CPU, FS, false), +X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? - "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" + "n8:16:32-S128" : (getSubtargetImpl()->isTargetCygMing() || getSubtargetImpl()->isTargetWindows()) ? - "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" : - "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"), + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-" + "n8:16:32-S32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" + "n8:16:32-S128"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -82,11 +49,12 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, } -X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : X86TargetMachine(T, TT, CPU, FS, true), - DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), +X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, true), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + "n8:16:32:64-S128"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -95,52 +63,14 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, /// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), +X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { - DefRelocModel = getRelocationModel(); - - // If no relocation model was picked, default as appropriate for the target. - if (getRelocationModel() == Reloc::Default) { - // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. - // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we - // use static relocation model by default. - if (Subtarget.isTargetDarwin()) { - if (Subtarget.is64Bit()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::DynamicNoPIC); - } else if (Subtarget.isTargetWin64()) - setRelocationModel(Reloc::PIC_); - else - setRelocationModel(Reloc::Static); - } - - assert(getRelocationModel() != Reloc::Default && - "Relocation mode not picked"); - - // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC - // is defined as a model for code which may be used in static or dynamic - // executables but not necessarily a shared library. On X86-32 we just - // compile in -static mode, in x86-64 we use PIC. - if (getRelocationModel() == Reloc::DynamicNoPIC) { - if (is64Bit) - setRelocationModel(Reloc::PIC_); - else if (!Subtarget.isTargetDarwin()) - setRelocationModel(Reloc::Static); - } - - // If we are on Darwin, disallow static relocation model in X86-64 mode, since - // the Mach-O file format doesn't support it. - if (getRelocationModel() == Reloc::Static && - Subtarget.isTargetDarwin() && - is64Bit) - setRelocationModel(Reloc::PIC_); - // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. @@ -161,16 +91,20 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.setPICStyle(PICStyles::GOT); } - // Finally, if we have "none" as our PIC style, force to static mode. - if (Subtarget.getPICStyle() == PICStyles::None) - setRelocationModel(Reloc::Static); - // default to hard float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// +// Command line options for x86 +//===----------------------------------------------------------------------===// +static cl::opt<bool> +UseVZeroUpper("x86-use-vzeroupper", + cl::desc("Minimize AVX to SSE transition penalty"), + cl::init(false)); + +//===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// @@ -200,46 +134,25 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { - PM.add(createSSEDomainFixPass()); - return true; + bool ShouldPrint = false; + if (OptLevel != CodeGenOpt::None && + (Subtarget.hasSSE2() || Subtarget.hasAVX())) { + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + ShouldPrint = true; } - return false; + + if (Subtarget.hasAVX() && UseVZeroUpper) { + PM.add(createX86IssueVZeroUpperPass()); + ShouldPrint = true; + } + + return ShouldPrint; } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { - // FIXME: Move this to TargetJITInfo! - // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { - setRelocationModel(Reloc::Static); - Subtarget.setPICStyle(PICStyles::None); - } - - PM.add(createX86JITCodeEmitterPass(*this, JCE)); return false; } - -void X86TargetMachine::setCodeModelForStatic() { - - if (getCodeModel() != CodeModel::Default) return; - - // For static codegen, if we're not already set, use Small codegen. - setCodeModel(CodeModel::Small); -} - - -void X86TargetMachine::setCodeModelForJIT() { - - if (getCodeModel() != CodeModel::Default) return; - - // 64-bit JIT places everything in the same buffer except external functions. - if (Subtarget.is64Bit()) - setCodeModel(CodeModel::Large); - else - setCodeModel(CodeModel::Small); -} diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h index 885334a..d1569aa 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h @@ -29,21 +29,17 @@ namespace llvm { class formatted_raw_ostream; +class StringRef; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; X86FrameLowering FrameLowering; X86ELFWriterInfo ELFWriterInfo; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. -private: - // We have specific defaults for X86. - virtual void setCodeModelForJIT(); - virtual void setCodeModelForStatic(); - public: - X86TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + X86TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { @@ -87,8 +83,9 @@ class X86_32TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; public: - X86_32TargetMachine(const Target &T, const std::string &M, - const std::string &CPU, const std::string &FS); + X86_32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -113,8 +110,9 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; public: - X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + X86_64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp index 1231798..991f322 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp @@ -43,79 +43,3 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } - -unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; - else - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small || Model == CodeModel::Medium) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | (Model == CodeModel::Small ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} - -unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const { - if (CFI) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - - return DW_EH_PE_udata4; -} - -unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); - - if (Model == CodeModel::Small) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; -} diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h index e21b5bf..d7adf27 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h +++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h @@ -33,28 +33,6 @@ namespace llvm { MachineModuleInfo *MMI) const; }; - class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - const X86TargetMachine &TM; - public: - X8632_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { } - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding(bool CFI) const; - virtual unsigned getTTypeEncoding() const; - }; - - class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - const X86TargetMachine &TM; - public: - X8664_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { } - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding(bool CFI) const; - virtual unsigned getTTypeEncoding() const; - }; - } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp new file mode 100644 index 0000000..3958494 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -0,0 +1,105 @@ +//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which inserts x86 AVX vzeroupper instructions +// before calls to SSE encoded functions. This avoids transition latency +// penalty when tranfering control between AVX encoded instructions and old +// SSE encoding mode. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-codegen" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); + +namespace { + struct VZeroUpperInserter : public MachineFunctionPass { + static char ID; + VZeroUpperInserter() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); + + virtual const char *getPassName() const { return "X86 vzeroupper inserter";} + + private: + const TargetInstrInfo *TII; // Machine instruction info. + MachineBasicBlock *MBB; // Current basic block + }; + char VZeroUpperInserter::ID = 0; +} + +FunctionPass *llvm::createX86IssueVZeroUpperPass() { + return new VZeroUpperInserter(); +} + +/// runOnMachineFunction - Loop over all of the basic blocks, inserting +/// vzero upper instructions before function calls. +bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + bool Changed = false; + + // Process any unreachable blocks in arbitrary order now. + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + Changed |= processBasicBlock(MF, *BB); + + return Changed; +} + +static bool isCallToModuleFn(const MachineInstr *MI) { + assert(MI->getDesc().isCall() && "Isn't a call instruction"); + + for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (!MO.isGlobal()) + continue; + + const GlobalValue *GV = MO.getGlobal(); + GlobalValue::LinkageTypes LT = GV->getLinkage(); + if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) || + (GV->isExternalLinkage(LT) && !GV->isDeclaration())) + return true; + + return false; + } + return false; +} + +/// processBasicBlock - Loop over all of the instructions in the basic block, +/// inserting vzero upper instructions before function calls. +bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, + MachineBasicBlock &BB) { + bool Changed = false; + MBB = &BB; + + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineInstr *MI = I; + DebugLoc dl = I->getDebugLoc(); + + // Insert a vzeroupper instruction before each control transfer + // to functions outside this module + if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) { + BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + ++NumVZU; + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index c3b3dc9..0000000 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_llvm_library(LLVMXCoreDesc - XCoreMCTargetDesc.cpp - XCoreMCAsmInfo.cpp - ) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile deleted file mode 100644 index de61543..0000000 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMXCoreDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 939d97c..276e841 100644 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -13,10 +13,11 @@ #include "XCoreMCTargetDesc.h" #include "XCoreMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "XCoreGenInstrInfo.inc" @@ -35,8 +36,10 @@ static MCInstrInfo *createXCoreMCInstrInfo() { return X; } -extern "C" void LLVMInitializeXCoreMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo); +static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitXCoreMCRegisterInfo(X, XCore::LR); + return X; } static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -46,11 +49,40 @@ static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializeXCoreMCSubtargetInfo() { - TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, - createXCoreMCSubtargetInfo); +static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new XCoreMCAsmInfo(T, TT); + + // Initial state of the frame pointer is SP. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(XCore::SP, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; } -extern "C" void LLVMInitializeXCoreMCAsmInfo() { - RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget); +// Force static initialization. +extern "C" void LLVMInitializeXCoreTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheXCoreTarget, + createXCoreMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, + createXCoreMCSubtargetInfo); } diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp index 7aa8965..9a0971d 100644 --- a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp @@ -9,7 +9,7 @@ #include "XCore.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; Target llvm::TheXCoreTarget; diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 1a43714..8906b24 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -32,11 +33,11 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cctype> @@ -51,6 +52,7 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); public: explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){} @@ -79,6 +81,7 @@ namespace { void EmitFunctionEntryLabel(); void EmitInstruction(const MachineInstr *MI); void EmitFunctionBodyEnd(); + virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -88,7 +91,7 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { assert(((GV->hasExternalLinkage() || GV->hasWeakLinkage()) || GV->hasLinkOnceLinkage()) && "Unexpected linkage"); - if (const ArrayType *ATy = dyn_cast<ArrayType>( + if (ArrayType *ATy = dyn_cast<ArrayType>( cast<PointerType>(GV->getType())->getElementType())) { OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); // FIXME: MCStreamerize. @@ -261,16 +264,57 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } +void XCoreAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps == 4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); +} + +MachineLocation XCoreAsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { + // Handles frame addresses emitted in XCoreInstrInfo::emitFrameIndexDebugValue. + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Unexpected MachineOperand types"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); - // Check for mov mnemonic - if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) - O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " - << getRegisterName(MI->getOperand(1).getReg()); - else - printInstruction(MI, O); + switch (MI->getOpcode()) { + case XCore::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + case XCore::ADD_2rus: + if (MI->getOperand(2).getImm() == 0) { + O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " + << getRegisterName(MI->getOperand(1).getReg()); + OutStreamer.EmitRawText(O.str()); + return; + } + break; + } + printInstruction(MI, O); OutStreamer.EmitRawText(O.str()); } diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 0578220..7f8b169 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -100,7 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool FP = hasFP(MF); - bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest); + bool Nested = MF.getFunction()-> + getAttributes().hasAttrSomewhere(Attribute::Nest); if (Nested) { loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); @@ -270,14 +271,6 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, } } -void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is SP. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(XCore::SP, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h index 7da19f0..c591e93 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h @@ -42,8 +42,6 @@ namespace llvm { bool hasFP(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index a8dd847..4dac1ce 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -169,9 +169,14 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), TLI.getPointerTy()); - return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, - MVT::Other, CPIdx, - CurDAG->getEntryNode()); + SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4); + cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1); + return node; } break; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 6d040e0..2afe0e3 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -81,6 +81,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // XCore does not have the NodeTypes below. setOperationAction(ISD::BR_CC, MVT::Other, Expand); @@ -147,7 +148,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // TRAMPOLINE is custom lowered. - setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; maxStoresPerMemmove = maxStoresPerMemmoveOptSize @@ -180,7 +182,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); + case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -252,8 +255,8 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); } -static inline bool isZeroLengthArray(const Type *Ty) { - const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty); +static inline bool isZeroLengthArray(Type *Ty) { + ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty); return AT && (AT->getNumElements() == 0); } @@ -275,7 +278,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const llvm_unreachable("Thread local object not a GlobalVariable?"); return SDValue(); } - const Type *Ty = cast<PointerType>(GV->getType())->getElementType(); + Type *Ty = cast<PointerType>(GV->getType())->getElementType(); if (!Ty->isSized() || isZeroLengthArray(Ty)) { #ifndef NDEBUG errs() << "Size of thread local object " << GVar->getName() @@ -465,7 +468,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -524,7 +527,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -789,7 +792,12 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, } SDValue XCoreTargetLowering:: -LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { +LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + return Op.getOperand(0); +} + +SDValue XCoreTargetLowering:: +LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -841,9 +849,7 @@ LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - SDValue Ops[] = - { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); } //===----------------------------------------------------------------------===// @@ -1148,10 +1154,10 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, int offset = 0; // Save remaining registers, storing higher register numbers at a higher // address - for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { + for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) { // Create a stack slot int FI = MFI->CreateFixedObject(4, offset, true); - if (i == FirstVAReg) { + if (i == (int)FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } offset -= StackSlotSize; @@ -1409,7 +1415,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // operands are constant canonicalize smallest to RHS. if ((N0C && !N1C) || (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue())) - return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3); + return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), + N1, N0, N2, N3); // lmul(x, 0, a, b) if (N1C && N1C->isNullValue()) { @@ -1548,7 +1555,7 @@ static inline bool isImmUs4(int64_t val) /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h index 9c803be..d6c5b32 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -101,7 +101,7 @@ namespace llvm { MachineBasicBlock *MBB) const; virtual bool isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const; + Type *Ty) const; private: const XCoreTargetMachine &TM; @@ -145,7 +145,8 @@ namespace llvm { SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::pair<unsigned, const TargetRegisterClass*> diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp index f90481f..a0946a1 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp @@ -17,11 +17,10 @@ #include "llvm/MC/MCContext.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_CTOR #include "XCoreGenInstrInfo.inc" @@ -387,6 +386,15 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0); } +MachineInstr* +XCoreInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(XCore::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h index 840b1e1..d354802 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h @@ -78,6 +78,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td index 55c7527..4d2e93b 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -572,7 +572,7 @@ def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr), [(store GRRegs:$val, ADDRdpii:$addr)]>; //let Uses = [CP] in .. -let mayLoad = 1, isReMaterializable = 1 in +let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in defm LDWCP : FRU6_LRU6_cp<"ldw">; let Uses = [SP] in { @@ -739,7 +739,7 @@ def LDAP_lu10_ba : _FLU10<(outs), let isCall=1, // All calls clobber the link register and the non-callee-saved registers: -Defs = [R0, R1, R2, R3, R11, LR] in { +Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { def BL_u10 : _FU10< (outs), (ins calltarget:$target, variable_ops), @@ -754,7 +754,7 @@ def BL_lu10 : _FLU10< } // Two operand short -// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg) +// TODO eet, eef, tsetmr def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -764,15 +764,25 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), [(set GRRegs:$dst, (ineg GRRegs:$b))]>; let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "sext $dst, $src2", - []>; + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, + immBitp:$src2))]>; + +def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, + GRRegs:$src2))]>; -let neverHasSideEffects = 1 in def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "zext $dst, $src2", - []>; + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, + immBitp:$src2))]>; + +def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, + GRRegs:$src2))]>; def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), "andnot $dst, $src2", @@ -819,7 +829,8 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), let Constraints = "$src = $dst" in def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), "outshr res[$r], $src", - [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, + GRRegs:$src))]>; def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", @@ -836,7 +847,8 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), let Constraints = "$src = $dst" in def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), "inshr $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, + GRRegs:$src))]>; def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", @@ -846,6 +858,14 @@ def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, immUs:$val)]>; +def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "testct $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>; + +def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "testwct $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>; + def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; @@ -871,7 +891,6 @@ def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>; // Two operand long -// TODO endin, peek, // getd, testlcl def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -917,6 +936,14 @@ def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), "setpsc res[$src1], $src2", [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; +def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "peek $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>; + +def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "endin $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; + // One operand short // TODO edu, eeu, waitet, waitef, tstart, clrtp // setdp, setcp, setev, kcall @@ -960,7 +987,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), let isCall=1, // All calls clobber the link register and the non-callee-saved registers: -Defs = [R0, R1, R2, R3, R11, LR] in { +Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), "bla $addr", [(XCoreBranchLink GRRegs:$addr)]>; @@ -974,10 +1001,15 @@ def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; -let Uses=[R11] in +let Uses=[R11] in { def SETV_1r : _F1R<(outs), (ins GRRegs:$r), - "setv res[$r], r11", - [(int_xcore_setv GRRegs:$r, R11)]>; + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def SETEV_1r : _F1R<(outs), (ins GRRegs:$r), + "setev res[$r], r11", + [(int_xcore_setev GRRegs:$r, R11)]>; +} def EEU_1r : _F1R<(outs), (ins GRRegs:$r), "eeu res[$r]", @@ -985,15 +1017,24 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r), // Zero operand short // TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, -// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, +// stet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; -let Defs = [R11] in +let Defs = [R11] in { def GETID_0R : _F0R<(outs), (ins), - "get r11, id", - [(set R11, (int_xcore_getid))]>; + "get r11, id", + [(set R11, (int_xcore_getid))]>; + +def GETED_0R : _F0R<(outs), (ins), + "get r11, ed", + [(set R11, (int_xcore_geted))]>; + +def GETET_0R : _F0R<(outs), (ins), + "get r11, et", + [(set R11, (int_xcore_getet))]>; +} def SSYNC_0r : _F0R<(outs), (ins), "ssync", diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp index 357a4a0..1b78b37 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -39,7 +38,7 @@ using namespace llvm; XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii) - : XCoreGenRegisterInfo(), TII(tii) { + : XCoreGenRegisterInfo(XCore::LR), TII(tii) { } // helper functions @@ -321,20 +320,8 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); } -int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -int XCoreRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { - return XCoreGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); -} - unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); return TFI->hasFP(MF) ? XCore::R10 : XCore::SP; } - -unsigned XCoreRegisterInfo::getRARegister() const { - return XCore::LR; -} diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h index 801d9eb..5c28f39 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h @@ -60,7 +60,6 @@ public: int SPAdj, RegScavenger *RS = NULL) const; // Debug information queries. - unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; //! Return the array of argument passing registers @@ -74,10 +73,6 @@ public: //! Return whether to emit frame moves static bool needsFrameMoves(const MachineFunction &MF); - - //! Get DWARF debugging register number - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - int getLLVMRegNum(unsigned RegNum, bool isEH) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp index ad069bf..b4e9927 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreSubtarget.cpp @@ -13,7 +13,7 @@ #include "XCoreSubtarget.h" #include "XCore.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 342966a..fdc5d35 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -14,15 +14,15 @@ #include "XCore.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; /// XCoreTargetMachine ctor - Create an ILP32 architecture model /// -XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, - const std::string &FS) - : LLVMTargetMachine(T, TT, CPU, FS), +XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h index 6235ac3..83d09d6d 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -32,8 +32,9 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; public: - XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS); + XCoreTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { |