diff options
Diffstat (limited to 'include/llvm/Target')
-rw-r--r-- | include/llvm/Target/Target.td | 74 | ||||
-rw-r--r-- | include/llvm/Target/TargetCallingConv.h | 6 | ||||
-rw-r--r-- | include/llvm/Target/TargetData.h | 8 | ||||
-rw-r--r-- | include/llvm/Target/TargetELFWriterInfo.h | 3 | ||||
-rw-r--r-- | include/llvm/Target/TargetInstrInfo.h | 188 | ||||
-rw-r--r-- | include/llvm/Target/TargetItinerary.td | 136 | ||||
-rw-r--r-- | include/llvm/Target/TargetLibraryInfo.h | 187 | ||||
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 152 | ||||
-rw-r--r-- | include/llvm/Target/TargetMachine.h | 9 | ||||
-rw-r--r-- | include/llvm/Target/TargetOptions.h | 51 | ||||
-rw-r--r-- | include/llvm/Target/TargetRegisterInfo.h | 189 | ||||
-rw-r--r-- | include/llvm/Target/TargetSchedule.td | 131 | ||||
-rw-r--r-- | include/llvm/Target/TargetSelectionDAG.td | 12 |
13 files changed, 822 insertions, 324 deletions
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index fa1ec55..24be2b1 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -64,18 +64,6 @@ class Register<string n, list<string> altNames = []> { // register. list<RegAltNameIndex> RegAltNameIndices = []; - // CompositeIndices - Specify subreg indices that don't correspond directly to - // a register in SubRegs and are not inherited. The following formats are - // supported: - // - // (a) Identity - Reg:a == Reg - // (a b) Alias - Reg:a == Reg:b - // (a b,c) Composite - Reg:a == (Reg:b):c - // - // This can be used to disambiguate a sub-sub-register that exists in more - // than one subregister and other weird stuff. - list<dag> CompositeIndices = []; - // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register. // These values can be determined by locating the <target>.h file in the // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES. The @@ -96,6 +84,9 @@ class Register<string n, list<string> altNames = []> { // x86 register AX is covered by its sub-registers AL and AH, but EAX is not // covered by its sub-register AX. bit CoveredBySubRegs = 0; + + // HWEncoding - The target specific hardware encoding for this register. + bits<16> HWEncoding = 0; } // RegisterWithSubRegs - This can be used to define instances of Register which @@ -108,13 +99,20 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> { let SubRegs = subregs; } +// DAGOperand - An empty base class that unifies RegisterClass's and other forms +// of Operand's that are legal as type qualifiers in DAG patterns. This should +// only ever be used for defining multiclasses that are polymorphic over both +// RegisterClass's and other Operand's. +class DAGOperand { } + // RegisterClass - Now that all of the registers are defined, and aliases // between registers are defined, specify which registers belong to which // register classes. This also defines the default allocation order of // registers by register allocators. // class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, - dag regList, RegAltNameIndex idx = NoRegAltName> { + dag regList, RegAltNameIndex idx = NoRegAltName> + : DAGOperand { string Namespace = namespace; // RegType - Specify the list ValueType of the registers in this register @@ -151,10 +149,6 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, // a valid alternate name for the given index. RegAltNameIndex altNameIndex = idx; - // SubRegClasses - Specify the register class of subregisters as a list of - // dags: (RegClass SubRegIndex, SubRegindex, ...) - list<dag> SubRegClasses = []; - // isAllocatable - Specify that the register class can be used for virtual // registers and register allocation. Some register classes are only used to // model instruction operand constraints, and should have isAllocatable = 0. @@ -192,7 +186,8 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, // also in the second set. // // (sequence "R%u", 0, 15) -> [R0, R1, ..., R15]. Generate a sequence of -// numbered registers. +// numbered registers. Takes an optional 4th operand which is a stride to use +// when generating the sequence. // // (shl GPR, 4) - Remove the first N elements. // @@ -245,9 +240,6 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> { // SubRegIndices - N SubRegIndex instances. This provides the names of the // sub-registers in the synthesized super-registers. list<SubRegIndex> SubRegIndices = Indices; - - // Compose sub-register indices like in a normal Register. - list<dag> CompositeIndices = []; } @@ -402,6 +394,13 @@ class Instruction { string AsmMatchConverter = ""; + /// TwoOperandAliasConstraint - Enable TableGen to auto-generate a + /// two-operand matcher inst-alias for a three operand instruction. + /// For example, the arm instruction "add r3, r3, r5" can be written + /// as "add r3, r5". The constraint is of the same form as a tied-operand + /// constraint. For example, "$Rn = $Rd". + string TwoOperandAliasConstraint = ""; + ///@} } @@ -431,6 +430,10 @@ class Predicate<string cond> { /// e.g. "ModeThumb,FeatureThumb2" is translated to /// "(Bits & ModeThumb) != 0 && (Bits & FeatureThumb2) != 0". string AssemblerCondString = ""; + + /// PredicateName - User-level name to use for the predicate. Mainly for use + /// in diagnostics such as missing feature errors in the asm matcher. + string PredicateName = ""; } /// NoHonorSignDependentRounding - This predicate is true if support for @@ -512,6 +515,11 @@ class AsmOperandClass { /// to immediates or registers and are very instruction specific (as flags to /// set in a processor register, coprocessor number, ...). string ParserMethod = ?; + + // The diagnostic type to present when referencing this operand in a + // match failure error message. By default, use a generic "invalid operand" + // diagnostic. The target AsmParser maps these codes to text. + string DiagnosticType = ""; } def ImmAsmOperand : AsmOperandClass { @@ -521,7 +529,7 @@ def ImmAsmOperand : AsmOperandClass { /// Operand Types - These provide the built-in operand types that may be used /// by a target. Targets can optionally provide their own operand types as /// needed, though this should not be needed for RISC targets. -class Operand<ValueType ty> { +class Operand<ValueType ty> : DAGOperand { ValueType Type = ty; string PrintMethod = "printOperand"; string EncoderMethod = ""; @@ -541,7 +549,8 @@ class Operand<ValueType ty> { AsmOperandClass ParserMatchClass = ImmAsmOperand; } -class RegisterOperand<RegisterClass regclass, string pm = "printOperand"> { +class RegisterOperand<RegisterClass regclass, string pm = "printOperand"> + : DAGOperand { // RegClass - The register class of the operand. RegisterClass RegClass = regclass; // PrintMethod - The target method to call to print register operands of @@ -729,7 +738,7 @@ class AsmParser { def DefaultAsmParser : AsmParser; //===----------------------------------------------------------------------===// -// AsmParserVariant - Subtargets can have multiple different assembly parsers +// AsmParserVariant - Subtargets can have multiple different assembly parsers // (e.g. AT&T vs Intel syntax on X86 for example). This class can be // implemented by targets to describe such variants. // @@ -754,9 +763,10 @@ def DefaultAsmParserVariant : AsmParserVariant; /// AssemblerPredicate - This is a Predicate that can be used when the assembler /// matches instructions and aliases. -class AssemblerPredicate<string cond> { +class AssemblerPredicate<string cond, string name = ""> { bit AssemblerMatcherPredicate = 1; string AssemblerCondString = cond; + string PredicateName = name; } /// TokenAlias - This class allows targets to define assembler token @@ -861,7 +871,7 @@ class Target { // AssemblyParsers - The AsmParser instances available for this target. list<AsmParser> AssemblyParsers = [DefaultAsmParser]; - /// AssemblyParserVariants - The AsmParserVariant instances available for + /// AssemblyParserVariants - The AsmParserVariant instances available for /// this target. list<AsmParserVariant> AssemblyParserVariants = [DefaultAsmParserVariant]; @@ -909,6 +919,10 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> { // string Name = n; + // SchedModel - The machine model for scheduling and instruction cost. + // + SchedMachineModel SchedModel = NoSchedModel; + // ProcItin - The scheduling information for the target processor. // ProcessorItineraries ProcItin = pi; @@ -917,6 +931,14 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> { list<SubtargetFeature> Features = f; } +// ProcessorModel allows subtargets to specify the more general +// SchedMachineModel instead if a ProcessorItinerary. Subtargets will +// gradually move to this newer form. +class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f> + : Processor<n, NoItineraries, f> { + let SchedModel = m; +} + //===----------------------------------------------------------------------===// // Pull in the common support for calling conventions. // diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h index a6251e7..f8cebef 100644 --- a/include/llvm/Target/TargetCallingConv.h +++ b/include/llvm/Target/TargetCallingConv.h @@ -36,16 +36,16 @@ namespace ISD { static const uint64_t ByValOffs = 4; static const uint64_t Nest = 1ULL<<5; ///< Nested fn static chain static const uint64_t NestOffs = 5; - static const uint64_t ByValAlign = 0xFULL << 6; //< Struct alignment + static const uint64_t ByValAlign = 0xFULL << 6; ///< Struct alignment static const uint64_t ByValAlignOffs = 6; static const uint64_t Split = 1ULL << 10; static const uint64_t SplitOffs = 10; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; - static const uint64_t ByValSize = 0xffffffffULL << 32; //< Struct size + static const uint64_t ByValSize = 0xffffffffULL << 32; ///< Struct size static const uint64_t ByValSizeOffs = 32; - static const uint64_t One = 1ULL; //< 1 of this type, for shifts + static const uint64_t One = 1ULL; ///< 1 of this type, for shifts uint64_t Flags; public: diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h index d116f39..4f94ab7 100644 --- a/include/llvm/Target/TargetData.h +++ b/include/llvm/Target/TargetData.h @@ -53,10 +53,10 @@ enum AlignTypeEnum { /// @note The unusual order of elements in the structure attempts to reduce /// padding and make the structure slightly more cache friendly. struct TargetAlignElem { - AlignTypeEnum AlignType : 8; //< Alignment type (AlignTypeEnum) - unsigned ABIAlign; //< ABI alignment for this type/bitw - unsigned PrefAlign; //< Pref. alignment for this type/bitw - uint32_t TypeBitWidth; //< Type bit width + AlignTypeEnum AlignType : 8; ///< Alignment type (AlignTypeEnum) + unsigned ABIAlign; ///< ABI alignment for this type/bitw + unsigned PrefAlign; ///< Pref. alignment for this type/bitw + uint32_t TypeBitWidth; ///< Type bit width /// Initializer static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align, diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h index 114295e..5e48629 100644 --- a/include/llvm/Target/TargetELFWriterInfo.h +++ b/include/llvm/Target/TargetELFWriterInfo.h @@ -43,7 +43,8 @@ namespace llvm { EM_ARM = 40, // ARM EM_ALPHA = 41, // DEC Alpha EM_SPARCV9 = 43, // SPARC V9 - EM_X86_64 = 62 // AMD64 + EM_X86_64 = 62, // AMD64 + EM_HEXAGON = 164 // Qualcomm Hexagon }; // ELF File classes diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index d1e380c..a18b030 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_TARGET_TARGETINSTRINFO_H #define LLVM_TARGET_TARGETINSTRINFO_H +#include "llvm/ADT/SmallSet.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,7 @@ class MachineMemOperand; class MachineRegisterInfo; class MDNode; class MCInst; +class MCSchedModel; class SDNode; class ScheduleHazardRecognizer; class SelectionDAG; @@ -57,7 +59,8 @@ public: /// class constraint for OpNum, or NULL. const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const; /// isTriviallyReMaterializable - Return true if the instruction is trivially /// rematerializable, meaning it has no side effects and requires no operands @@ -185,14 +188,6 @@ public: const MachineInstr *Orig, const TargetRegisterInfo &TRI) const = 0; - /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the - /// two-addrss instruction inserted by two-address pass. - virtual void scheduleTwoAddrSource(MachineInstr *SrcMI, - MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const { - // Do nothing. - } - /// duplicate - Create a duplicate of the Orig instruction in MF. This is like /// MachineFunction::CloneMachineInstr(), but the target may update operands /// that are required to be unique. @@ -319,7 +314,7 @@ public: /// being executed is given by Probability, and Confidence is a measure /// of our confidence that it will be properly predicted. virtual - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, const BranchProbability &Probability) const { return false; @@ -347,7 +342,7 @@ public: /// Probability, and Confidence is a measure of our confidence that it /// will be properly predicted. virtual bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability &Probability) const { return false; } @@ -368,6 +363,56 @@ public: return false; } + /// canInsertSelect - Return true if it is possible to insert a select + /// instruction that chooses between TrueReg and FalseReg based on the + /// condition code in Cond. + /// + /// When successful, also return the latency in cycles from TrueReg, + /// FalseReg, and Cond to the destination register. The Cond latency should + /// compensate for a conditional branch being removed. For example, if a + /// conditional branch has a 3 cycle latency from the condition code read, + /// and a cmov instruction has a 2 cycle latency from the condition code + /// read, CondCycles should be returned as -1. + /// + /// @param MBB Block where select instruction would be inserted. + /// @param Cond Condition returned by AnalyzeBranch. + /// @param TrueReg Virtual register to select when Cond is true. + /// @param FalseReg Virtual register to select when Cond is false. + /// @param CondCycles Latency from Cond+Branch to select output. + /// @param TrueCycles Latency from TrueReg to select output. + /// @param FalseCycles Latency from FalseReg to select output. + virtual bool canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, + int &TrueCycles, int &FalseCycles) const { + return false; + } + + /// insertSelect - Insert a select instruction into MBB before I that will + /// copy TrueReg to DstReg when Cond is true, and FalseReg to DstReg when + /// Cond is false. + /// + /// This function can only be called after canInsertSelect() returned true. + /// The condition in Cond comes from AnalyzeBranch, and it can be assumed + /// that the same flags or registers required by Cond are available at the + /// insertion point. + /// + /// @param MBB Block where select instruction should be inserted. + /// @param I Insertion point. + /// @param DL Source location for debugging. + /// @param DstReg Virtual register to be defined by select instruction. + /// @param Cond Condition as computed by AnalyzeBranch. + /// @param TrueReg Virtual register to copy when Cond is true. + /// @param FalseReg Virtual register to copy when Cons is false. + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!"); + } + /// copyPhysReg - Emit instructions to copy a pair of physical registers. virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, @@ -608,6 +653,13 @@ public: CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const = 0; + /// CreateTargetMIHazardRecognizer - Allocate and return a hazard recognizer + /// to use for this target when scheduling the machine instructions before + /// register allocation. + virtual ScheduleHazardRecognizer* + CreateTargetMIHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG *DAG) const = 0; + /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard /// recognizer to use for this target when scheduling the machine instructions /// after register allocation. @@ -615,23 +667,40 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, const ScheduleDAG *DAG) const = 0; - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, int &Mask, int &Value) const { + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { return false; } - /// OptimizeCompareInstr - See if the comparison instruction can be converted + /// optimizeCompareInstr - See if the comparison instruction can be converted /// into something more efficient. E.g., on ARM most instructions can set the /// flags register, obviating the need for a separate CMP. - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, int Mask, int Value, + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, const MachineRegisterInfo *MRI) const { return false; } + /// optimizeLoadInstr - Try to remove the load by folding it to a register + /// operand at the use. We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + return 0; + } + /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, @@ -640,9 +709,11 @@ public: } /// getNumMicroOps - Return the number of u-operations the given machine - /// instruction will be decoded to on the target cpu. + /// instruction will be decoded to on the target cpu. The itinerary's + /// IssueWidth is the number of microops that can be dispatched each + /// cycle. An instruction with zero microops takes no dispatch resources. virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const; + const MachineInstr *MI) const = 0; /// isZeroCost - Return true for pseudo instructions that don't consume any /// machine resources in their current form. These are common cases that the @@ -652,18 +723,45 @@ public: return Opcode <= TargetOpcode::COPY; } + virtual int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const = 0; + /// getOperandLatency - Compute and return the use operand latency of a given /// pair of def and use. /// In most cases, the static scheduling itinerary was enough to determine the /// operand latency. But it may not be possible for instructions with variable /// number of defs / uses. + /// + /// This is a raw interface to the itinerary that may be directly overriden by + /// a target. Use computeOperandLatency to get the best estimate of latency. virtual int getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; - - virtual int getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const = 0; + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const = 0; + + /// computeOperandLatency - Compute and return the latency of the given data + /// dependent def and use when the operand indices are already known. + /// + /// FindMin may be set to get the minimum vs. expected latency. + unsigned computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx, + bool FindMin = false) const; + + /// computeOperandLatency - Compute and return the latency of the given data + /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for + /// an unknown use. If the subtarget allows, this may or may not need to call + /// getOperandLatency(). + /// + /// FindMin may be set to get the minimum vs. expected latency. Minimum + /// latency is used for scheduling groups, while expected latency is for + /// instruction cost and critical path. + unsigned computeOperandLatency(const InstrItineraryData *ItinData, + const TargetRegisterInfo *TRI, + const MachineInstr *DefMI, + const MachineInstr *UseMI, + unsigned Reg, bool FindMin) const; /// getOutputLatency - Compute and return the output dependency latency of a /// a given pair of defs which both target the same register. This is usually @@ -677,13 +775,17 @@ public: /// getInstrLatency - Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via /// PredCost. - virtual int getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost = 0) const; + virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const = 0; virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const = 0; + /// Return the default expected latency for a def based on it's opcode. + unsigned defaultDefLatency(const MCSchedModel *SchedModel, + const MachineInstr *DefMI) const; + /// isHighLatencyDef - Return true if this opcode has high latency to its /// result. virtual bool isHighLatencyDef(int opc) const { return false; } @@ -705,7 +807,7 @@ public: /// if the target considered it 'low'. virtual bool hasLowDefLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx) const; + const MachineInstr *DefMI, unsigned DefIdx) const = 0; /// verifyInstruction - Perform target specific instruction verification. virtual @@ -862,20 +964,40 @@ public: virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const; - using TargetInstrInfo::getOperandLatency; + virtual int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; - using TargetInstrInfo::getInstrLatency; + virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const; + virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const; + + virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const; + + virtual + bool hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx) const; + + virtual int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const; + bool usePreRAHazardRecognizer() const; virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const; virtual ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG*) const; + + virtual ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, const ScheduleDAG*) const; }; diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td new file mode 100644 index 0000000..cc74006 --- /dev/null +++ b/include/llvm/Target/TargetItinerary.td @@ -0,0 +1,136 @@ +//===- TargetItinerary.td - Target Itinierary Description --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the target-independent scheduling interfaces +// which should be implemented by each target that uses instruction +// itineraries for scheduling. Itineraries are details reservation +// tables for each instruction class. They are most appropriate for +// in-order machine with complicated scheduling or bundling constraints. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Processor functional unit - These values represent the function units +// available across all chip sets for the target. Eg., IntUnit, FPUnit, ... +// These may be independent values for each chip set or may be shared across +// all chip sets of the target. Each functional unit is treated as a resource +// during scheduling and has an affect instruction order based on availability +// during a time interval. +// +class FuncUnit; + +//===----------------------------------------------------------------------===// +// Pipeline bypass / forwarding - These values specifies the symbolic names of +// pipeline bypasses which can be used to forward results of instructions +// that are forwarded to uses. +class Bypass; +def NoBypass : Bypass; + +class ReservationKind<bits<1> val> { + int Value = val; +} + +def Required : ReservationKind<0>; +def Reserved : ReservationKind<1>; + +//===----------------------------------------------------------------------===// +// Instruction stage - These values represent a non-pipelined step in +// the execution of an instruction. Cycles represents the number of +// discrete time slots needed to complete the stage. Units represent +// the choice of functional units that can be used to complete the +// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +// cycles should elapse from the start of this stage to the start of +// the next stage in the itinerary. For example: +// +// A stage is specified in one of two ways: +// +// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles +// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit +// + +class InstrStage<int cycles, list<FuncUnit> units, + int timeinc = -1, + ReservationKind kind = Required> { + int Cycles = cycles; // length of stage in machine cycles + list<FuncUnit> Units = units; // choice of functional units + int TimeInc = timeinc; // cycles till start of next stage + int Kind = kind.Value; // kind of FU reservation +} + +//===----------------------------------------------------------------------===// +// Instruction itinerary - An itinerary represents a sequential series of steps +// required to complete an instruction. Itineraries are represented as lists of +// instruction stages. +// + +//===----------------------------------------------------------------------===// +// Instruction itinerary classes - These values represent 'named' instruction +// itinerary. Using named itineraries simplifies managing groups of +// instructions across chip sets. An instruction uses the same itinerary class +// across all chip sets. Thus a new chip set can be added without modifying +// instruction information. +// +class InstrItinClass; +def NoItinerary : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Instruction itinerary data - These values provide a runtime map of an +// instruction itinerary class (name) to its itinerary data. +// +// NumMicroOps represents the number of micro-operations that each instruction +// in the class are decoded to. If the number is zero, then it means the +// instruction can decode into variable number of micro-ops and it must be +// determined dynamically. This directly relates to the itineraries +// global IssueWidth property, which constrains the number of microops +// that can issue per cycle. +// +// OperandCycles are optional "cycle counts". They specify the cycle after +// instruction issue the values which correspond to specific operand indices +// are defined or read. Bypasses are optional "pipeline forwarding pathes", if +// a def by an instruction is available on a specific bypass and the use can +// read from the same bypass, then the operand use latency is reduced by one. +// +// InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>, +// InstrStage<1, [A9_AGU]>], +// [3, 1], [A9_LdBypass]>, +// InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], +// [1, 1], [NoBypass, A9_LdBypass]>, +// +// In this example, the instruction of IIC_iLoadi reads its input on cycle 1 +// (after issue) and the result of the load is available on cycle 3. The result +// is available via forwarding path A9_LdBypass. If it's used by the first +// source operand of instructions of IIC_iMVNr class, then the operand latency +// is reduced by 1. +class InstrItinData<InstrItinClass Class, list<InstrStage> stages, + list<int> operandcycles = [], + list<Bypass> bypasses = [], int uops = 1> { + InstrItinClass TheClass = Class; + int NumMicroOps = uops; + list<InstrStage> Stages = stages; + list<int> OperandCycles = operandcycles; + list<Bypass> Bypasses = bypasses; +} + +//===----------------------------------------------------------------------===// +// Processor itineraries - These values represent the set of all itinerary +// classes for a given chip set. +// +// Set property values to -1 to use the default. +// See InstrItineraryProps for comments and defaults. +class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp, + list<InstrItinData> iid> { + list<FuncUnit> FU = fu; + list<Bypass> BP = bp; + list<InstrItinData> IID = iid; +} + +// NoItineraries - A marker that can be used by processors without schedule +// info. Subtargets using NoItineraries can bypass the scheduler's +// expensive HazardRecognizer because no reservation table is needed. +def NoItineraries : ProcessorItineraries<[], [], []>; diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index c8cacf2..ea2874f 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -18,36 +18,47 @@ namespace llvm { namespace LibFunc { enum Func { + /// int __cxa_atexit(void (*f)(void *), void *p, void *d); + cxa_atexit, + /// void __cxa_guard_abort(guard_t *guard); + /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi. + cxa_guard_abort, + /// int __cxa_guard_acquire(guard_t *guard); + cxa_guard_acquire, + /// void __cxa_guard_release(guard_t *guard); + cxa_guard_release, + /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); + memcpy_chk, /// double acos(double x); acos, - /// long double acosl(long double x); - acosl, /// float acosf(float x); acosf, + /// long double acosl(long double x); + acosl, /// double asin(double x); asin, - /// long double asinl(long double x); - asinl, /// float asinf(float x); asinf, + /// long double asinl(long double x); + asinl, /// double atan(double x); atan, - /// long double atanl(long double x); - atanl, - /// float atanf(float x); - atanf, /// double atan2(double y, double x); atan2, - /// long double atan2l(long double y, long double x); - atan2l, /// float atan2f(float y, float x); atan2f, + /// long double atan2l(long double y, long double x); + atan2l, + /// float atanf(float x); + atanf, + /// long double atanl(long double x); + atanl, /// double ceil(double x); ceil, - /// long double ceill(long double x); - ceill, /// float ceilf(float x); ceilf, + /// long double ceill(long double x); + ceill, /// double copysign(double x, double y); copysign, /// float copysignf(float x, float y); @@ -56,54 +67,56 @@ namespace llvm { copysignl, /// double cos(double x); cos, - /// long double cosl(long double x); - cosl, /// float cosf(float x); cosf, /// double cosh(double x); cosh, - /// long double coshl(long double x); - coshl, /// float coshf(float x); coshf, + /// long double coshl(long double x); + coshl, + /// long double cosl(long double x); + cosl, /// double exp(double x); exp, - /// long double expl(long double x); - expl, - /// float expf(float x); - expf, /// double exp2(double x); exp2, - /// long double exp2l(long double x); - exp2l, /// float exp2f(float x); exp2f, + /// long double exp2l(long double x); + exp2l, + /// float expf(float x); + expf, + /// long double expl(long double x); + expl, /// double expm1(double x); expm1, - /// long double expm1l(long double x); - expm1l, /// float expm1f(float x); expm1f, + /// long double expm1l(long double x); + expm1l, /// double fabs(double x); fabs, - /// long double fabsl(long double x); - fabsl, /// float fabsf(float x); fabsf, + /// long double fabsl(long double x); + fabsl, + /// int fiprintf(FILE *stream, const char *format, ...); + fiprintf, /// double floor(double x); floor, - /// long double floorl(long double x); - floorl, /// float floorf(float x); floorf, - /// int fiprintf(FILE *stream, const char *format, ...); - fiprintf, + /// long double floorl(long double x); + floorl, /// double fmod(double x, double y); fmod, - /// long double fmodl(long double x, long double y); - fmodl, /// float fmodf(float x, float y); fmodf, + /// long double fmodl(long double x, long double y); + fmodl, + /// int fputc(int c, FILE *stream); + fputc, /// int fputs(const char *s, FILE *stream); fputs, /// size_t fwrite(const void *ptr, size_t size, size_t nitems, @@ -113,28 +126,32 @@ namespace llvm { iprintf, /// double log(double x); log, - /// long double logl(long double x); - logl, - /// float logf(float x); - logf, - /// double log2(double x); - log2, - /// double long double log2l(long double x); - log2l, - /// float log2f(float x); - log2f, /// double log10(double x); log10, - /// long double log10l(long double x); - log10l, /// float log10f(float x); log10f, + /// long double log10l(long double x); + log10l, /// double log1p(double x); log1p, - /// long double log1pl(long double x); - log1pl, /// float log1pf(float x); log1pf, + /// long double log1pl(long double x); + log1pl, + /// double log2(double x); + log2, + /// float log2f(float x); + log2f, + /// double long double log2l(long double x); + log2l, + /// float logf(float x); + logf, + /// long double logl(long double x); + logl, + /// void *memchr(const void *s, int c, size_t n); + memchr, + /// int memcmp(const void *s1, const void *s2, size_t n); + memcmp, /// void *memcpy(void *s1, const void *s2, size_t n); memcpy, /// void *memmove(void *s1, const void *s2, size_t n); @@ -155,6 +172,10 @@ namespace llvm { powf, /// long double powl(long double x, long double y); powl, + /// int putchar(int c); + putchar, + /// int puts(const char *s); + puts, /// double rint(double x); rint, /// float rintf(float x); @@ -169,51 +190,58 @@ namespace llvm { roundl, /// double sin(double x); sin, - /// long double sinl(long double x); - sinl, /// float sinf(float x); sinf, /// double sinh(double x); sinh, - /// long double sinhl(long double x); - sinhl, /// float sinhf(float x); sinhf, + /// long double sinhl(long double x); + sinhl, + /// long double sinl(long double x); + sinl, /// int siprintf(char *str, const char *format, ...); siprintf, /// double sqrt(double x); sqrt, - /// long double sqrtl(long double x); - sqrtl, /// float sqrtf(float x); sqrtf, + /// long double sqrtl(long double x); + sqrtl, + /// char *strcat(char *s1, const char *s2); + strcat, + /// char *strchr(const char *s, int c); + strchr, + /// char *strcpy(char *s1, const char *s2); + strcpy, + /// size_t strlen(const char *s); + strlen, + /// char *strncat(char *s1, const char *s2, size_t n); + strncat, + /// int strncmp(const char *s1, const char *s2, size_t n); + strncmp, + /// char *strncpy(char *s1, const char *s2, size_t n); + strncpy, + /// size_t strnlen(const char *s, size_t maxlen); + strnlen, /// double tan(double x); tan, - /// long double tanl(long double x); - tanl, /// float tanf(float x); tanf, /// double tanh(double x); tanh, - /// long double tanhl(long double x); - tanhl, /// float tanhf(float x); tanhf, + /// long double tanhl(long double x); + tanhl, + /// long double tanl(long double x); + tanl, /// double trunc(double x); trunc, /// float truncf(float x); truncf, /// long double truncl(long double x); truncl, - /// int __cxa_atexit(void (*f)(void *), void *p, void *d); - cxa_atexit, - /// void __cxa_guard_abort(guard_t *guard); - /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi. - cxa_guard_abort, - /// int __cxa_guard_acquire(guard_t *guard); - cxa_guard_acquire, - /// void __cxa_guard_release(guard_t *guard); - cxa_guard_release, NumLibFuncs }; @@ -247,12 +275,41 @@ public: TargetLibraryInfo(const Triple &T); explicit TargetLibraryInfo(const TargetLibraryInfo &TLI); + /// getLibFunc - Search for a particular function name. If it is one of the + /// known library functions, return true and set F to the corresponding value. + bool getLibFunc(StringRef funcName, LibFunc::Func &F) const; + /// has - This function is used by optimizations that want to match on or form /// a given library function. bool has(LibFunc::Func F) const { return getState(F) != Unavailable; } + /// hasOptimizedCodeGen - Return true if the function is both available as + /// a builtin and a candidate for optimized code generation. + bool hasOptimizedCodeGen(LibFunc::Func F) const { + if (getState(F) == Unavailable) + return false; + switch (F) { + default: break; + case LibFunc::copysign: case LibFunc::copysignf: case LibFunc::copysignl: + case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: + case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: + case LibFunc::cos: case LibFunc::cosf: case LibFunc::cosl: + case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: + case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: + case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: + case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: + case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: + case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: + case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: + case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: + case LibFunc::memcmp: + return true; + } + return false; + } + StringRef getName(LibFunc::Func F) const { AvailabilityState State = getState(F); if (State == Unavailable) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 720c9df..acf0419 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -25,6 +25,7 @@ #include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" #include "llvm/Attributes.h" +#include "llvm/Support/CallSite.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Support/DebugLoc.h" @@ -50,6 +51,7 @@ namespace llvm { template<typename T> class SmallVectorImpl; class TargetData; class TargetRegisterClass; + class TargetLibraryInfo; class TargetLoweringObjectFile; class Value; @@ -150,6 +152,12 @@ public: /// that should be avoided. bool isJumpExpensive() const { return JumpIsExpensive; } + /// isPredictableSelectExpensive - Return true if selects are only cheaper + /// than branches if the branch is unlikely to be predicted right. + bool isPredictableSelectExpensive() const { + return predictableSelectIsExpensive; + } + /// getSetCCResultType - Return the ValueType of the result of SETCC /// operations. Also used to obtain the target's preferred type for /// the condition operand of SELECT and BRCOND nodes. In the case of @@ -358,7 +366,9 @@ public: /// for it. LegalizeAction getOperationAction(unsigned Op, EVT VT) const { if (VT.isExtended()) return Expand; - assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); + // If a target-specific SDNode requires legalization, require the target + // to provide custom legalization for it. + if (Op > array_lengthof(OpActions[0])) return Custom; unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; return (LegalizeAction)OpActions[I][Op]; } @@ -670,6 +680,12 @@ public: return UseUnderscoreLongJmp; } + /// supportJumpTables - return whether the target can generate code for + /// jump tables. + bool supportJumpTables() const { + return SupportJumpTables; + } + /// getStackPointerRegisterToSaveRestore - If a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -984,6 +1000,12 @@ protected: UseUnderscoreLongJmp = Val; } + /// setSupportJumpTables - Indicate whether the target can generate code for + /// jump tables. + void setSupportJumpTables(bool Val) { + SupportJumpTables = Val; + } + /// setStackPointerRegisterToSaveRestore - If set to a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1169,7 +1191,7 @@ protected: ShouldFoldAtomicFences = fold; } - /// setInsertFencesForAtomic - Set if the the DAG builder should + /// setInsertFencesForAtomic - Set if the DAG builder should /// automatically insert fences and reduce the order of atomic memory /// operations to Monotonic. void setInsertFencesForAtomic(bool fence) { @@ -1197,11 +1219,6 @@ public: llvm_unreachable("Not Implemented"); } - /// LowerCallTo - This function lowers an abstract call to a function into an - /// actual call. This returns a pair of operands. The first element is the - /// return value for the function (if RetTy is not VoidTy). The second - /// element is the outgoing token chain. It calls LowerCall to do the actual - /// lowering. struct ArgListEntry { SDValue Node; Type* Ty; @@ -1217,13 +1234,72 @@ public: isSRet(false), isNest(false), isByVal(false), Alignment(0) { } }; typedef std::vector<ArgListEntry> ArgListTy; - std::pair<SDValue, SDValue> - LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, - bool isVarArg, bool isInreg, unsigned NumFixedArgs, - CallingConv::ID CallConv, bool isTailCall, - bool doesNotRet, bool isReturnValueUsed, - SDValue Callee, ArgListTy &Args, - SelectionDAG &DAG, DebugLoc dl) const; + + /// CallLoweringInfo - This structure contains all information that is + /// necessary for lowering calls. It is passed to TLI::LowerCallTo when the + /// SelectionDAG builder needs to lower a call, and targets will see this + /// struct in their LowerCall implementation. + struct CallLoweringInfo { + SDValue Chain; + Type *RetTy; + bool RetSExt : 1; + bool RetZExt : 1; + bool IsVarArg : 1; + bool IsInReg : 1; + bool DoesNotReturn : 1; + bool IsReturnValueUsed : 1; + + // IsTailCall should be modified by implementations of + // TargetLowering::LowerCall that perform tail call conversions. + bool IsTailCall; + + unsigned NumFixedArgs; + CallingConv::ID CallConv; + SDValue Callee; + ArgListTy &Args; + SelectionDAG &DAG; + DebugLoc DL; + ImmutableCallSite *CS; + SmallVector<ISD::OutputArg, 32> Outs; + SmallVector<SDValue, 32> OutVals; + SmallVector<ISD::InputArg, 32> Ins; + + + /// CallLoweringInfo - Constructs a call lowering context based on the + /// ImmutableCallSite \p cs. + CallLoweringInfo(SDValue chain, Type *retTy, + FunctionType *FTy, bool isTailCall, SDValue callee, + ArgListTy &args, SelectionDAG &dag, DebugLoc dl, + ImmutableCallSite &cs) + : Chain(chain), RetTy(retTy), RetSExt(cs.paramHasAttr(0, Attribute::SExt)), + RetZExt(cs.paramHasAttr(0, Attribute::ZExt)), IsVarArg(FTy->isVarArg()), + IsInReg(cs.paramHasAttr(0, Attribute::InReg)), + DoesNotReturn(cs.doesNotReturn()), + IsReturnValueUsed(!cs.getInstruction()->use_empty()), + IsTailCall(isTailCall), NumFixedArgs(FTy->getNumParams()), + CallConv(cs.getCallingConv()), Callee(callee), Args(args), DAG(dag), + DL(dl), CS(&cs) {} + + /// CallLoweringInfo - Constructs a call lowering context based on the + /// provided call information. + CallLoweringInfo(SDValue chain, Type *retTy, bool retSExt, bool retZExt, + bool isVarArg, bool isInReg, unsigned numFixedArgs, + CallingConv::ID callConv, bool isTailCall, + bool doesNotReturn, bool isReturnValueUsed, SDValue callee, + ArgListTy &args, SelectionDAG &dag, DebugLoc dl) + : Chain(chain), RetTy(retTy), RetSExt(retSExt), RetZExt(retZExt), + IsVarArg(isVarArg), IsInReg(isInReg), DoesNotReturn(doesNotReturn), + IsReturnValueUsed(isReturnValueUsed), IsTailCall(isTailCall), + NumFixedArgs(numFixedArgs), CallConv(callConv), Callee(callee), + Args(args), DAG(dag), DL(dl), CS(NULL) {} + }; + + /// LowerCallTo - This function lowers an abstract call to a function into an + /// actual call. This returns a pair of operands. The first element is the + /// return value for the function (if RetTy is not VoidTy). The second + /// element is the outgoing token chain. It calls LowerCall to do the actual + /// lowering. + std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; /// LowerCall - This hook must be implemented to lower calls into the /// the specified DAG. The outgoing arguments to the call are described @@ -1232,13 +1308,7 @@ public: /// InVals array with legal-type return values from the call, and return /// the resulting token chain value. virtual SDValue - LowerCall(SDValue /*Chain*/, SDValue /*Callee*/, - CallingConv::ID /*CallConv*/, bool /*isVarArg*/, - bool /*doesNotRet*/, bool &/*isTailCall*/, - const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, - const SmallVectorImpl<SDValue> &/*OutVals*/, - const SmallVectorImpl<ISD::InputArg> &/*Ins*/, - DebugLoc /*dl*/, SelectionDAG &/*DAG*/, + LowerCall(CallLoweringInfo &/*CLI*/, SmallVectorImpl<SDValue> &/*InVals*/) const { llvm_unreachable("Not Implemented"); } @@ -1251,7 +1321,7 @@ public: /// registers. If false is returned, an sret-demotion is performed. /// virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, - MachineFunction &/*MF*/, bool /*isVarArg*/, + MachineFunction &/*MF*/, bool /*isVarArg*/, const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, LLVMContext &/*Context*/) const { @@ -1346,7 +1416,8 @@ public: /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &) const { + virtual FastISel *createFastISel(FunctionLoweringInfo &, + const TargetLibraryInfo *) const { return 0; } @@ -1602,6 +1673,14 @@ public: return false; } + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { + return false; + } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. @@ -1665,13 +1744,6 @@ private: const TargetData *TD; const TargetLoweringObjectFile &TLOF; - /// We are in the process of implementing a new TypeLegalization action - /// which is the promotion of vector elements. This feature is under - /// development. Until this feature is complete, it is only enabled using a - /// flag. We pass this flag using a member because of circular dep issues. - /// This member will be removed with the flag once we complete the transition. - bool mayPromoteElements; - /// PointerTy - The type to use for pointers, usually i32 or i64. /// MVT PointerTy; @@ -1708,6 +1780,10 @@ private: /// llvm.longjmp. Defaults to false. bool UseUnderscoreLongJmp; + /// SupportJumpTables - Whether the target can generate code for jumptables. + /// If it's not true, then each jumptable must be lowered into if-then-else's. + bool SupportJumpTables; + /// BooleanContents - Information about the contents of the high-bits in /// boolean values held in a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; @@ -1875,9 +1951,8 @@ private: if (NumElts == 1) return LegalizeKind(TypeScalarizeVector, EltVT); - // If we allow the promotion of vector elements using a flag, - // then try to widen vector elements until a legal type is found. - if (mayPromoteElements && EltVT.isInteger()) { + // Try to widen vector elements until a legal type is found. + if (EltVT.isInteger()) { // Vectors with a number of elements that is not a power of two are always // widened, for example <3 x float> -> <4 x float>. if (!VT.isPow2VectorType()) { @@ -2028,14 +2103,14 @@ protected: /// optimization. bool benefitFromCodePlacementOpt; + /// predictableSelectIsExpensive - Tells the code generator that select is + /// more expensive than a branch if the branch is usually predicted right. + bool predictableSelectIsExpensive; + private: /// isLegalRC - Return true if the value types that can be represented by the /// specified register class are all legal. bool isLegalRC(const TargetRegisterClass *RC) const; - - /// hasLegalSuperRegRegClasses - Return true if the specified register class - /// has one or more super-reg register classes that are legal. - bool hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const; }; /// GetReturnInfo - Given an LLVM IR type and return type attributes, @@ -2043,8 +2118,7 @@ private: /// the offsets, if the return value is being lowered to memory. void GetReturnInfo(Type* ReturnType, Attributes attr, SmallVectorImpl<ISD::OutputArg> &Outs, - const TargetLowering &TLI, - SmallVectorImpl<uint64_t> *Offsets = 0); + const TargetLowering &TLI); } // end llvm namespace diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 1a05604..e4bf32b 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -14,6 +14,7 @@ #ifndef LLVM_TARGET_TARGETMACHINE_H #define LLVM_TARGET_TARGETMACHINE_H +#include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringRef.h" @@ -247,7 +248,9 @@ public: virtual bool addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &, CodeGenFileType, - bool /*DisableVerify*/ = true) { + bool /*DisableVerify*/ = true, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0) { return true; } @@ -297,7 +300,9 @@ public: virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify = true); + bool DisableVerify = true, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0); /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 12a2757..d1a07d1 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -30,20 +30,28 @@ namespace llvm { }; } + namespace FPOpFusion { + enum FPOpFusionMode { + Fast, // Enable fusion of FP ops wherever it's profitable. + Standard, // Only allow fusion of 'blessed' ops (currently just fmuladd). + Strict // Never fuse FP-ops. + }; + } + class TargetOptions { public: TargetOptions() : PrintMachineCode(false), NoFramePointerElim(false), NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false), - NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false), + UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false), UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false), JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), - StackAlignmentOverride(0), RealignStack(true), - DisableJumpTables(false), EnableFastISel(false), + StackAlignmentOverride(0), RealignStack(true), EnableFastISel(false), PositionIndependentExecutable(false), EnableSegmentedStacks(false), - TrapFuncName(""), FloatABIType(FloatABI::Default) + UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default), + AllowFPOpFusion(FPOpFusion::Standard) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs @@ -74,14 +82,6 @@ namespace llvm { unsigned LessPreciseFPMADOption : 1; bool LessPreciseFPMAD() const; - /// NoExcessFPPrecision - This flag is enabled when the - /// -disable-excess-fp-precision flag is specified on the command line. - /// When this flag is off (the default), the code generator is allowed to - /// produce results that are "more precise" than IEEE allows. This includes - /// use of FMA-like operations and use of the X86 FP registers without - /// rounding all over the place. - unsigned NoExcessFPPrecision : 1; - /// UnsafeFPMath - This flag is enabled when the /// -enable-unsafe-fp-math flag is specified on the command line. When /// this flag is off (the default), the code generator is not allowed to @@ -155,10 +155,6 @@ namespace llvm { /// automatically realigned, if needed. unsigned RealignStack : 1; - /// DisableJumpTables - This flag indicates jump tables should not be - /// generated. - unsigned DisableJumpTables : 1; - /// EnableFastISel - This flag enables fast-path instruction selection /// which trades away generated code quality in favor of reducing /// compile time. @@ -172,6 +168,10 @@ namespace llvm { unsigned EnableSegmentedStacks : 1; + /// UseInitArray - Use .init_array instead of .ctors for static + /// constructors. + unsigned UseInitArray : 1; + /// getTrapFunctionName - If this returns a non-empty string, this means /// isel should lower Intrinsic::trap to a call to the specified function /// name instead of an ISD::TRAP node. @@ -185,6 +185,25 @@ namespace llvm { /// Such a combination is unfortunately popular (e.g. arm-apple-darwin). /// Hard presumes that the normal FP ABI is used. FloatABI::ABIType FloatABIType; + + /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option. + /// This controls the creation of fused FP ops that store intermediate + /// results in higher precision than IEEE allows (E.g. FMAs). + /// + /// Fast mode - allows formation of fused FP ops whenever they're + /// profitable. + /// Standard mode - allow fusion only for 'blessed' FP ops. At present the + /// only blessed op is the fmuladd intrinsic. In the future more blessed ops + /// may be added. + /// Strict mode - allow fusion only if/when it can be proven that the excess + /// precision won't effect the result. + /// + /// Note: This option only controls formation of fused ops by the optimizers. + /// Fused operations that are explicitly specified (e.g. FMA via the + /// llvm.fma.* intrinsic) will always be honored, regardless of the value of + /// this option. + FPOpFusion::FPOpFusionMode AllowFPOpFusion; + }; } // End llvm namespace diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 6ddd364..df4d900 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -42,9 +42,9 @@ public: // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; const vt_iterator VTs; - const unsigned *SubClassMask; + const uint32_t *SubClassMask; + const uint16_t *SuperRegIndices; const sc_iterator SuperClasses; - const sc_iterator SuperRegClasses; ArrayRef<uint16_t> (*OrderFunc)(const MachineFunction&); /// getID() - Return the register class ID number. @@ -119,18 +119,6 @@ public: return I; } - /// superregclasses_begin / superregclasses_end - Loop over all of - /// the superreg register classes of this register class. - sc_iterator superregclasses_begin() const { - return SuperRegClasses; - } - - sc_iterator superregclasses_end() const { - sc_iterator I = SuperRegClasses; - while (*I != NULL) ++I; - return I; - } - /// hasSubClass - return true if the specified TargetRegisterClass /// is a proper sub-class of this TargetRegisterClass. bool hasSubClass(const TargetRegisterClass *RC) const { @@ -163,6 +151,18 @@ public: return SubClassMask; } + /// getSuperRegIndices - Returns a 0-terminated list of sub-register indices + /// that project some super-register class into this register class. The list + /// has an entry for each Idx such that: + /// + /// There exists SuperRC where: + /// For all Reg in SuperRC: + /// this->contains(Reg:Idx) + /// + const uint16_t *getSuperRegIndices() const { + return SuperRegIndices; + } + /// getSuperClasses - Returns a NULL terminated list of super-classes. The /// classes are ordered by ID which is also a topological ordering from large /// to small classes. The list does NOT include the current class. @@ -301,6 +301,11 @@ public: const TargetRegisterClass * getMinimalPhysRegClass(unsigned Reg, EVT VT = MVT::Other) const; + /// getAllocatableClass - Return the maximal subclass of the given register + /// class that is alloctable, or NULL. + const TargetRegisterClass * + getAllocatableClass(const TargetRegisterClass *RC) const; + /// getAllocatableSet - Returns a bitset indexed by register number /// indicating if a register is allocatable or not. If a register class is /// specified, returns the subset for the class. @@ -332,9 +337,23 @@ public: if (regA == regB) return true; if (isVirtualRegister(regA) || isVirtualRegister(regB)) return false; - for (const uint16_t *regList = getOverlaps(regA)+1; *regList; ++regList) { - if (*regList == regB) return true; - } + + // Regunits are numerically ordered. Find a common unit. + MCRegUnitIterator RUA(regA, this); + MCRegUnitIterator RUB(regB, this); + do { + if (*RUA == *RUB) return true; + if (*RUA < *RUB) ++RUA; + else ++RUB; + } while (RUA.isValid() && RUB.isValid()); + return false; + } + + /// hasRegUnit - Returns true if Reg contains RegUnit. + bool hasRegUnit(unsigned Reg, unsigned RegUnit) const { + for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) + if (*Units == RegUnit) + return true; return false; } @@ -346,10 +365,10 @@ public: /// isSuperRegister - Returns true if regB is a super-register of regA. /// - bool isSuperRegister(unsigned regA, unsigned regB) const { - for (const uint16_t *regList = getSuperRegisters(regA); *regList;++regList){ - if (*regList == regB) return true; - } + bool isSuperRegister(unsigned RegA, unsigned RegB) const { + for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I) + if (*I == RegB) + return true; return false; } @@ -416,7 +435,7 @@ public: /// TableGen will synthesize missing A sub-classes. virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const =0; + const TargetRegisterClass *B, unsigned Idx) const; /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that /// supports the sub-register index Idx. @@ -431,7 +450,10 @@ public: /// /// TableGen will synthesize missing RC sub-classes. virtual const TargetRegisterClass * - getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const =0; + getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const { + assert(Idx == 0 && "Target has no sub-registers"); + return RC; + } /// composeSubRegIndices - Return the subregister index you get from composing /// two subregister indices. @@ -450,6 +472,34 @@ public: return b; } + /// getCommonSuperRegClass - Find a common super-register class if it exists. + /// + /// Find a register class, SuperRC and two sub-register indices, PreA and + /// PreB, such that: + /// + /// 1. PreA + SubA == PreB + SubB (using composeSubRegIndices()), and + /// + /// 2. For all Reg in SuperRC: Reg:PreA in RCA and Reg:PreB in RCB, and + /// + /// 3. SuperRC->getSize() >= max(RCA->getSize(), RCB->getSize()). + /// + /// SuperRC will be chosen such that no super-class of SuperRC satisfies the + /// requirements, and there is no register class with a smaller spill size + /// that satisfies the requirements. + /// + /// SubA and SubB must not be 0. Use getMatchingSuperRegClass() instead. + /// + /// Either of the PreA and PreB sub-register indices may be returned as 0. In + /// that case, the returned register class will be a sub-class of the + /// corresponding argument register class. + /// + /// The function returns NULL if no register class can be found. + /// + const TargetRegisterClass* + getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const; + //===--------------------------------------------------------------------===// // Register Class Information // @@ -479,7 +529,8 @@ public: /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. If a target supports multiple different pointer register classes, /// kind specifies which one is indicated. - virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const { + virtual const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const { llvm_unreachable("Target didn't implement getPointerRegClass!"); } @@ -515,13 +566,16 @@ public: return 0; } - /// Get the weight in units of pressure for this register class. +// Get the weight in units of pressure for this register class. virtual const RegClassWeight &getRegClassWeight( const TargetRegisterClass *RC) const = 0; /// Get the number of dimensions of register pressure. virtual unsigned getNumRegPressureSets() const = 0; + /// Get the name of this register unit pressure set. + virtual const char *getRegPressureSetName(unsigned Idx) const = 0; + /// Get the register unit pressure limit for this dimension. /// This limit must be adjusted dynamically for reserved registers. virtual unsigned getRegPressureSetLimit(unsigned Idx) const = 0; @@ -609,6 +663,12 @@ public: return false; } + /// trackLivenessAfterRegAlloc - returns true if the live-ins should be tracked + /// after register allocation. + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return false; + } + /// needsStackRealignment - true if storage within the function requires the /// stack pointer to be aligned more than the normal calling convention calls /// for. @@ -708,6 +768,62 @@ public: }; +//===----------------------------------------------------------------------===// +// SuperRegClassIterator +//===----------------------------------------------------------------------===// +// +// Iterate over the possible super-registers for a given register class. The +// iterator will visit a list of pairs (Idx, Mask) corresponding to the +// possible classes of super-registers. +// +// Each bit mask will have at least one set bit, and each set bit in Mask +// corresponds to a SuperRC such that: +// +// For all Reg in SuperRC: Reg:Idx is in RC. +// +// The iterator can include (O, RC->getSubClassMask()) as the first entry which +// also satisfies the above requirement, assuming Reg:0 == Reg. +// +class SuperRegClassIterator { + const unsigned RCMaskWords; + unsigned SubReg; + const uint16_t *Idx; + const uint32_t *Mask; + +public: + /// Create a SuperRegClassIterator that visits all the super-register classes + /// of RC. When IncludeSelf is set, also include the (0, sub-classes) entry. + SuperRegClassIterator(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + bool IncludeSelf = false) + : RCMaskWords((TRI->getNumRegClasses() + 31) / 32), + SubReg(0), + Idx(RC->getSuperRegIndices()), + Mask(RC->getSubClassMask()) { + if (!IncludeSelf) + ++*this; + } + + /// Returns true if this iterator is still pointing at a valid entry. + bool isValid() const { return Idx; } + + /// Returns the current sub-register index. + unsigned getSubReg() const { return SubReg; } + + /// Returns the bit mask if register classes that getSubReg() projects into + /// RC. + const uint32_t *getMask() const { return Mask; } + + /// Advance iterator to the next entry. + void operator++() { + assert(isValid() && "Cannot move iterator past end."); + Mask += RCMaskWords; + SubReg = *Idx++; + if (!SubReg) + Idx = 0; + } +}; + // This is useful when building IndexedMaps keyed on virtual registers struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> { unsigned operator()(unsigned Reg) const { @@ -742,6 +858,29 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { return OS; } +/// PrintRegUnit - Helper class for printing register units on a raw_ostream. +/// +/// Register units are named after their root registers: +/// +/// AL - Single root. +/// FP0~ST7 - Dual roots. +/// +/// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; +/// +class PrintRegUnit { + const TargetRegisterInfo *TRI; + unsigned Unit; +public: + PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) + : TRI(tri), Unit(unit) {} + void print(raw_ostream&) const; +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { + PR.print(OS); + return OS; +} + } // End llvm namespace #endif diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 97ea82a..4dc488d 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -1,10 +1,10 @@ //===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the target-independent scheduling interfaces which should @@ -12,119 +12,30 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Processor functional unit - These values represent the function units -// available across all chip sets for the target. Eg., IntUnit, FPUnit, ... -// These may be independent values for each chip set or may be shared across -// all chip sets of the target. Each functional unit is treated as a resource -// during scheduling and has an affect instruction order based on availability -// during a time interval. -// -class FuncUnit; - -//===----------------------------------------------------------------------===// -// Pipeline bypass / forwarding - These values specifies the symbolic names of -// pipeline bypasses which can be used to forward results of instructions -// that are forwarded to uses. -class Bypass; -def NoBypass : Bypass; - -class ReservationKind<bits<1> val> { - int Value = val; -} - -def Required : ReservationKind<0>; -def Reserved : ReservationKind<1>; +include "llvm/Target/TargetItinerary.td" -//===----------------------------------------------------------------------===// -// Instruction stage - These values represent a non-pipelined step in -// the execution of an instruction. Cycles represents the number of -// discrete time slots needed to complete the stage. Units represent -// the choice of functional units that can be used to complete the -// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many -// cycles should elapse from the start of this stage to the start of -// the next stage in the itinerary. For example: -// -// A stage is specified in one of two ways: -// -// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles -// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit +// The SchedMachineModel is defined by subtargets for three categories of data: +// 1) Basic properties for coarse grained instruction cost model. +// 2) Scheduler Read/Write resources for simple per-opcode cost model. +// 3) Instruction itineraties for detailed reservation tables. // +// Default values for basic properties are defined in MCSchedModel. "-1" +// indicates that the property is not overriden by the target description. +class SchedMachineModel { + int IssueWidth = -1; // Max instructions that may be scheduled per cycle. + int MinLatency = -1; // Determines which instrucions are allowed in a group. + // (-1) inorder (0) ooo, (1): inorder +var latencies. + int LoadLatency = -1; // Cycles for loads to access the cache. + int HighLatency = -1; // Approximation of cycles for "high latency" ops. + int MispredictPenalty = -1; // Extra cycles for a mispredicted branch. -class InstrStage<int cycles, list<FuncUnit> units, - int timeinc = -1, - ReservationKind kind = Required> { - int Cycles = cycles; // length of stage in machine cycles - list<FuncUnit> Units = units; // choice of functional units - int TimeInc = timeinc; // cycles till start of next stage - int Kind = kind.Value; // kind of FU reservation -} + ProcessorItineraries Itineraries = NoItineraries; -//===----------------------------------------------------------------------===// -// Instruction itinerary - An itinerary represents a sequential series of steps -// required to complete an instruction. Itineraries are represented as lists of -// instruction stages. -// - -//===----------------------------------------------------------------------===// -// Instruction itinerary classes - These values represent 'named' instruction -// itinerary. Using named itineraries simplifies managing groups of -// instructions across chip sets. An instruction uses the same itinerary class -// across all chip sets. Thus a new chip set can be added without modifying -// instruction information. -// -// NumMicroOps represents the number of micro-operations that each instruction -// in the class are decoded to. If the number is zero, then it means the -// instruction can decode into variable number of micro-ops and it must be -// determined dynamically. -// -class InstrItinClass<int ops = 1> { - int NumMicroOps = ops; + bit NoModel = 0; // Special tag to indicate missing machine model. } -def NoItinerary : InstrItinClass; -//===----------------------------------------------------------------------===// -// Instruction itinerary data - These values provide a runtime map of an -// instruction itinerary class (name) to its itinerary data. -// -// OperandCycles are optional "cycle counts". They specify the cycle after -// instruction issue the values which correspond to specific operand indices -// are defined or read. Bypasses are optional "pipeline forwarding pathes", if -// a def by an instruction is available on a specific bypass and the use can -// read from the same bypass, then the operand use latency is reduced by one. -// -// InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>, -// InstrStage<1, [A9_AGU]>], -// [3, 1], [A9_LdBypass]>, -// InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], -// [1, 1], [NoBypass, A9_LdBypass]>, -// -// In this example, the instruction of IIC_iLoadi reads its input on cycle 1 -// (after issue) and the result of the load is available on cycle 3. The result -// is available via forwarding path A9_LdBypass. If it's used by the first -// source operand of instructions of IIC_iMVNr class, then the operand latency -// is reduced by 1. -class InstrItinData<InstrItinClass Class, list<InstrStage> stages, - list<int> operandcycles = [], - list<Bypass> bypasses = []> { - InstrItinClass TheClass = Class; - list<InstrStage> Stages = stages; - list<int> OperandCycles = operandcycles; - list<Bypass> Bypasses = bypasses; -} - -//===----------------------------------------------------------------------===// -// Processor itineraries - These values represent the set of all itinerary -// classes for a given chip set. -// -class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp, - list<InstrItinData> iid> { - list<FuncUnit> FU = fu; - list<Bypass> BP = bp; - list<InstrItinData> IID = iid; +def NoSchedModel : SchedMachineModel { + let NoModel = 1; } -// NoItineraries - A marker that can be used by processors without schedule -// info. -def NoItineraries : ProcessorItineraries<[], [], []>; - +// TODO: Define classes for processor and scheduler resources. diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index f55cf0e..3f81c06 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -404,11 +404,16 @@ def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>; def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; +def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone, + [SDNPHasChain, SDNPSideEffect]>; def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf, + [SDNPHasChain, SDNPSideEffect]>; + def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier, [SDNPHasChain, SDNPSideEffect]>; @@ -593,6 +598,13 @@ def not : PatFrag<(ops node:$in), (xor node:$in, -1)>; def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>; def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>; +// null_frag - The null pattern operator is used in multiclass instantiations +// which accept an SDPatternOperator for use in matching patterns for internal +// definitions. When expanding a pattern, if the null fragment is referenced +// in the expansion, the pattern is discarded and it is as-if '[]' had been +// specified. This allows multiclasses to have the isel patterns be optional. +def null_frag : SDPatternOperator; + // load fragments. def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; |