diff options
Diffstat (limited to 'lib/Target/ARM')
41 files changed, 17955 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h new file mode 100644 index 0000000..ac7de91 --- /dev/null +++ b/lib/Target/ARM/ARM.h @@ -0,0 +1,121 @@ +//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// ARM back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ARM_H +#define TARGET_ARM_H + +#include "llvm/Target/TargetMachine.h" +#include <cassert> + +namespace llvm { + +class ARMTargetMachine; +class FunctionPass; +class MachineCodeEmitter; +class JITCodeEmitter; +class raw_ostream; + +// Enums corresponding to ARM condition codes +namespace ARMCC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { + EQ, + NE, + HS, + LO, + MI, + PL, + VS, + VC, + HI, + LS, + GE, + LT, + GT, + LE, + AL + }; + + inline static CondCodes getOppositeCondition(CondCodes CC){ + switch (CC) { + default: assert(0 && "Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } + } +} + +inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case ARMCC::EQ: return "eq"; + case ARMCC::NE: return "ne"; + case ARMCC::HS: return "hs"; + case ARMCC::LO: return "lo"; + case ARMCC::MI: return "mi"; + case ARMCC::PL: return "pl"; + case ARMCC::VS: return "vs"; + case ARMCC::VC: return "vc"; + case ARMCC::HI: return "hi"; + case ARMCC::LS: return "ls"; + case ARMCC::GE: return "ge"; + case ARMCC::LT: return "lt"; + case ARMCC::GT: return "gt"; + case ARMCC::LE: return "le"; + case ARMCC::AL: return "al"; + } +} + +FunctionPass *createARMISelDag(ARMTargetMachine &TM); +FunctionPass *createARMCodePrinterPass(raw_ostream &O, + ARMTargetMachine &TM, + CodeGenOpt::Level OptLevel, + bool Verbose); +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE); + +FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, + JITCodeEmitter &JCE); + +FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMConstantIslandPass(); + +} // end namespace llvm; + +// Defines symbolic names for ARM registers. This defines a mapping from +// register name to register number. +// +#include "ARMGenRegisterNames.inc" + +// Defines symbolic names for the ARM instructions. +// +#include "ARMGenInstrNames.inc" + + +#endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td new file mode 100644 index 0000000..4ac6857 --- /dev/null +++ b/lib/Target/ARM/ARM.td @@ -0,0 +1,136 @@ +//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", + "ARM v7A">; +def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", + "Enable VFP2 instructions">; +def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", + "Enable VFP3 instructions">; +def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", + "Enable NEON instructions">; +def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", + "Enable Thumb2 instructions">; + +//===----------------------------------------------------------------------===// +// ARM Processors supported. +// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +// V4 Processors. +def : Proc<"generic", []>; +def : Proc<"arm8", []>; +def : Proc<"arm810", []>; +def : Proc<"strongarm", []>; +def : Proc<"strongarm110", []>; +def : Proc<"strongarm1100", []>; +def : Proc<"strongarm1110", []>; + +// V4T Processors. +def : Proc<"arm7tdmi", [ArchV4T]>; +def : Proc<"arm7tdmi-s", [ArchV4T]>; +def : Proc<"arm710t", [ArchV4T]>; +def : Proc<"arm720t", [ArchV4T]>; +def : Proc<"arm9", [ArchV4T]>; +def : Proc<"arm9tdmi", [ArchV4T]>; +def : Proc<"arm920", [ArchV4T]>; +def : Proc<"arm920t", [ArchV4T]>; +def : Proc<"arm922t", [ArchV4T]>; +def : Proc<"arm940t", [ArchV4T]>; +def : Proc<"ep9312", [ArchV4T]>; + +// V5T Processors. +def : Proc<"arm10tdmi", [ArchV5T]>; +def : Proc<"arm1020t", [ArchV5T]>; + +// V5TE Processors. +def : Proc<"arm9e", [ArchV5TE]>; +def : Proc<"arm926ej-s", [ArchV5TE]>; +def : Proc<"arm946e-s", [ArchV5TE]>; +def : Proc<"arm966e-s", [ArchV5TE]>; +def : Proc<"arm968e-s", [ArchV5TE]>; +def : Proc<"arm10e", [ArchV5TE]>; +def : Proc<"arm1020e", [ArchV5TE]>; +def : Proc<"arm1022e", [ArchV5TE]>; +def : Proc<"xscale", [ArchV5TE]>; +def : Proc<"iwmmxt", [ArchV5TE]>; + +// V6 Processors. +def : Proc<"arm1136j-s", [ArchV6]>; +def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"arm1176jz-s", [ArchV6]>; +def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"mpcorenovfp", [ArchV6]>; +def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; + +def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>; +def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>; + +def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "ARMRegisterInfo.td" + +include "ARMCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "ARMInstrInfo.td" + +def ARMInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. + let TSFlagsFields = ["AddrModeBits", + "SizeFlag", + "IndexModeBits", + "isUnaryDataProc", + "Form"]; + let TSFlagsShifts = [0, + 4, + 7, + 9, + 10]; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def ARM : Target { + // Pull in Instruction Info: + let InstructionSet = ARMInstrInfo; +} diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h new file mode 100644 index 0000000..6d9b9ee --- /dev/null +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -0,0 +1,394 @@ +//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM addressing mode implementation stuff. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H +#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H + +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> + +namespace llvm { + +/// ARM_AM - ARM Addressing Mode Stuff +namespace ARM_AM { + enum ShiftOpc { + no_shift = 0, + asr, + lsl, + lsr, + ror, + rrx + }; + + enum AddrOpc { + add = '+', sub = '-' + }; + + static inline const char *getShiftOpcStr(ShiftOpc Op) { + switch (Op) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return "asr"; + case ARM_AM::lsl: return "lsl"; + case ARM_AM::lsr: return "lsr"; + case ARM_AM::ror: return "ror"; + case ARM_AM::rrx: return "rrx"; + } + } + + static inline ShiftOpc getShiftOpcForNode(SDValue N) { + switch (N.getOpcode()) { + default: return ARM_AM::no_shift; + case ISD::SHL: return ARM_AM::lsl; + case ISD::SRL: return ARM_AM::lsr; + case ISD::SRA: return ARM_AM::asr; + case ISD::ROTR: return ARM_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ARMISD::RRX: return ARM_AM::rrx; + } + } + + enum AMSubMode { + bad_am_submode = 0, + ia, + ib, + da, + db + }; + + static inline const char *getAMSubModeStr(AMSubMode Mode) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return "ia"; + case ARM_AM::ib: return "ib"; + case ARM_AM::da: return "da"; + case ARM_AM::db: return "db"; + } + } + + static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return isLD ? "fd" : "ea"; + case ARM_AM::ib: return isLD ? "ed" : "fa"; + case ARM_AM::da: return isLD ? "fa" : "ed"; + case ARM_AM::db: return isLD ? "ea" : "fd"; + } + } + + /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. + /// + static inline unsigned rotr32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val >> Amt) | (Val << ((32-Amt)&31)); + } + + /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. + /// + static inline unsigned rotl32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val << Amt) | (Val >> ((32-Amt)&31)); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #1: shift_operand with registers + //===--------------------------------------------------------------------===// + // + // This 'addressing mode' is used for arithmetic instructions. It can + // represent things like: + // reg + // reg [asr|lsl|lsr|ror|rrx] reg + // reg [asr|lsl|lsr|ror|rrx] imm + // + // This is stored three operands [rega, regb, opc]. The first is the base + // reg, the second is the shift amount (or reg0 if not present or imm). The + // third operand encodes the shift opcode and the imm if a reg isn't present. + // + static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { + return ShOp | (Imm << 3); + } + static inline unsigned getSORegOffset(unsigned Op) { + return Op >> 3; + } + static inline ShiftOpc getSORegShOp(unsigned Op) { + return (ShiftOpc)(Op & 7); + } + + /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return + /// the 8-bit imm value. + static inline unsigned getSOImmValImm(unsigned Imm) { + return Imm & 0xFF; + } + /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return + /// the rotate amount. + static inline unsigned getSOImmValRot(unsigned Imm) { + return (Imm >> 8) * 2; + } + + /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, + /// computing the rotate amount to use. If this immediate value cannot be + /// handled with a single shifter-op, determine a good rotate amount that will + /// take a maximal chunk of bits out of the immediate. + static inline unsigned getSOImmValRotate(unsigned Imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the rotate amount. + unsigned TZ = CountTrailingZeros_32(Imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned RotAmt = TZ & ~1; + + // If we can handle this spread, return it. + if ((rotr32(Imm, RotAmt) & ~255U) == 0) + return (32-RotAmt)&31; // HW rotates right, not left. + + // For values like 0xF000000F, we should skip the first run of ones, then + // retry the hunt. + if (Imm & 1) { + unsigned TrailingOnes = CountTrailingZeros_32(~Imm); + if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF + // Restart the search for a high-order bit after the initial seconds of + // ones. + unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); + + // Rotate amount must be even. + unsigned RotAmt2 = TZ2 & ~1; + + // If this fits, use it. + if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. + } + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32-RotAmt)&31; // HW rotates right, not left. + } + + /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit + /// into an shifter_operand immediate operand, return the 12-bit encoding for + /// it. If not, return -1. + static inline int getSOImmVal(unsigned Arg) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Arg & ~255U) == 0) return Arg; + + unsigned RotAmt = getSOImmValRotate(Arg); + + // If this cannot be handled with a single shifter_op, bail out. + if (rotr32(~255U, RotAmt) & Arg) + return -1; + + // Encode this correctly. + return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8); + } + + /// isSOImmTwoPartVal - Return true if the specified value can be obtained by + /// or'ing together two SOImmVal's. + static inline bool isSOImmTwoPartVal(unsigned V) { + // If this can be handled with a single shifter_op, bail out. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled with two shifter_op's, accept. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + return V == 0; + } + + /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, + /// return the first chunk of it. + static inline unsigned getSOImmTwoPartFirst(unsigned V) { + return rotr32(255U, getSOImmValRotate(V)) & V; + } + + /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, + /// return the second chunk of it. + static inline unsigned getSOImmTwoPartSecond(unsigned V) { + // Mask out the first hunk. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + + // Take what's left. + assert(V == (rotr32(255U, getSOImmValRotate(V)) & V)); + return V; + } + + /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed + /// by a left shift. Returns the shift amount to use. + static inline unsigned getThumbImmValShift(unsigned Imm) { + // 8-bit (or less) immediates are trivially immediate operand with a shift + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the shift amount. + return CountTrailingZeros_32(Imm); + } + + /// isThumbImmShiftedVal - Return true if the specified value can be obtained + /// by left shifting a 8-bit immediate. + static inline bool isThumbImmShiftedVal(unsigned V) { + // If this can be handled with + V = (~255U << getThumbImmValShift(V)) & V; + return V == 0; + } + + /// getThumbImmNonShiftedVal - If V is a value that satisfies + /// isThumbImmShiftedVal, return the non-shiftd value. + static inline unsigned getThumbImmNonShiftedVal(unsigned V) { + return V >> getThumbImmValShift(V); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #2 + //===--------------------------------------------------------------------===// + // + // This is used for most simple load/store instructions. + // + // addrmode2 := reg +/- reg shop imm + // addrmode2 := reg +/- imm12 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // + // If this addressing mode is a frame index (before prolog/epilog insertion + // and code rewriting), this operand will have the form: FI#, reg0, <offs> + // with no shift amount for the frame offset. + // + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + assert(Imm12 < (1 << 12) && "Imm too large!"); + bool isSub = Opc == sub; + return Imm12 | ((int)isSub << 12) | (SO << 13); + } + static inline unsigned getAM2Offset(unsigned AM2Opc) { + return AM2Opc & ((1 << 12)-1); + } + static inline AddrOpc getAM2Op(unsigned AM2Opc) { + return ((AM2Opc >> 12) & 1) ? sub : add; + } + static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { + return (ShiftOpc)(AM2Opc >> 13); + } + + + //===--------------------------------------------------------------------===// + // Addressing Mode #3 + //===--------------------------------------------------------------------===// + // + // This is used for sign-extending loads, and load/store-pair instructions. + // + // addrmode3 := reg +/- reg + // addrmode3 := reg +/- imm8 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + + /// getAM3Opc - This function encodes the addrmode3 opc field. + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM3Offset(unsigned AM3Opc) { + return AM3Opc & 0xFF; + } + static inline AddrOpc getAM3Op(unsigned AM3Opc) { + return ((AM3Opc >> 8) & 1) ? sub : add; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #4 + //===--------------------------------------------------------------------===// + // + // This is used for load / store multiple instructions. + // + // addrmode4 := reg, <mode> + // + // The four modes are: + // IA - Increment after + // IB - Increment before + // DA - Decrement after + // DB - Decrement before + // + // If the 4th bit (writeback)is set, then the base register is updated after + // the memory transfer. + + static inline AMSubMode getAM4SubMode(unsigned Mode) { + return (AMSubMode)(Mode & 0x7); + } + + static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { + return (int)SubMode | ((int)WB << 3); + } + + static inline bool getAM4WBFlag(unsigned Mode) { + return (Mode >> 3) & 1; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #5 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as FP load/stores. + // + // addrmode5 := reg +/- imm8*4 + // + // The first operand is always a Reg. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + // + // This can also be used for FP load/store multiple ops. The third field encodes + // writeback mode in bit 8, the number of registers (or 2 times the number of + // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the + // following two sub-modes: + // + // IA - Increment after + // DB - Decrement before + + /// getAM5Opc - This function encodes the addrmode5 opc field. + static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + + /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and + /// FSTM instructions. + static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, + unsigned char Offset) { + assert((SubMode == ia || SubMode == db) && + "Illegal addressing mode 5 sub-mode!"); + return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + } + static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { + return (AMSubMode)((AM5Opc >> 9) & 0x7); + } + static inline bool getAM5WBFlag(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1); + } + +} // end namespace ARM_AM +} // end namespace llvm + +#endif + diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h new file mode 100644 index 0000000..3b38375 --- /dev/null +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -0,0 +1,64 @@ +//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains enumerations and support routines for ARM build attributes +// as defined in ARM ABI addenda document (ABI release 2.07). +// +//===----------------------------------------------------------------------===// + +#ifndef __TARGET_ARMBUILDATTRS_H__ +#define __TARGET_ARMBUILDATTRS_H__ + +namespace ARMBuildAttrs { + enum { + File = 1, + Section = 2, + Symbol = 3, + CPU_raw_name = 4, + CPU_name = 5, + CPU_arch = 6, + CPU_arch_profile = 7, + ARM_ISA_use = 8, + THUMB_ISA_use = 9, + VFP_arch = 10, + WMMX_arch = 11, + Advanced_SIMD_arch = 12, + PCS_config = 13, + ABI_PCS_R9_use = 14, + ABI_PCS_RW_data = 15, + ABI_PCS_RO_data = 16, + ABI_PCS_GOT_use = 17, + ABI_PCS_wchar_t = 18, + ABI_FP_rounding = 19, + ABI_FP_denormal = 20, + ABI_FP_exceptions = 21, + ABI_FP_user_exceptions = 22, + ABI_FP_number_model = 23, + ABI_align8_needed = 24, + ABI_align8_preserved = 25, + ABI_enum_size = 26, + ABI_HardFP_use = 27, + ABI_VFP_args = 28, + ABI_WMMX_args = 29, + ABI_optimization_goals = 30, + ABI_FP_optimization_goals = 31, + compatibility = 32, + CPU_unaligned_access = 34, + VFP_HP_extension = 36, + ABI_FP_16bit_format = 38, + nodefaults = 64, + also_compatible_with = 65, + T2EE_use = 66, + conformance = 67, + Virtualization_use = 68, + MPextension_use = 70 + }; +} + +#endif // __TARGET_ARMBUILDATTRS_H__ diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td new file mode 100644 index 0000000..6cd786e --- /dev/null +++ b/lib/Target/ARM/ARMCallingConv.td @@ -0,0 +1,87 @@ +//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for ARM architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A>: + CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>; + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign<string Align, CCAction A>: + CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_APCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // f64 is passed in pairs of GPRs, possibly split onto the stack + CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType<i32>>, + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 4>> +]>; + +def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType<i32>>, + CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_AAPCS : CallingConv<[ + + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // i64/f64 is passed in even pairs of GPRs + // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register + // (and the same is true for f64 if VFP is not enabled) + CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType<i32>>, + CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" + "ArgFlags.getOrigAlign() != 8", + CCAssignToReg<[R0, R1, R2, R3]>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ + CCIfType<[f32], CCBitConvertToType<i32>>, + CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, + CCDelegateTo<CC_ARM_APCS> +]>; + +def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, + CCDelegateTo<RetCC_ARM_APCS> +]>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp new file mode 100644 index 0000000..44fac12 --- /dev/null +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -0,0 +1,1411 @@ +//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the ARM machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMInstrInfo.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#ifndef NDEBUG +#include <iomanip> +#endif +using namespace llvm; + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + + class ARMCodeEmitter { + public: + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI); + }; + + template<class CodeEmitter> + class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, + public ARMCodeEmitter { + ARMJITInfo *JTI; + const ARMInstrInfo *II; + const TargetData *TD; + TargetMachine &TM; + CodeEmitter &MCE; + const std::vector<MachineConstantPoolEntry> *MCPEs; + const std::vector<MachineJumpTableEntry> *MJTEs; + bool IsPIC; + + public: + static char ID; + explicit Emitter(TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm), + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + Emitter(TargetMachine &tm, CodeEmitter &mce, + const ARMInstrInfo &ii, const TargetData &td) + : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm), + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "ARM Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Binary); + + void emitDWordLE(uint64_t Binary); + + void emitConstPoolInstruction(const MachineInstr &MI); + + void emitMOVi2piecesInstruction(const MachineInstr &MI); + + void emitLEApcrelJTInstruction(const MachineInstr &MI); + + void emitPseudoMoveInstruction(const MachineInstr &MI); + + void addPCLabel(unsigned LabelID); + + void emitPseudoInstruction(const MachineInstr &MI); + + unsigned getMachineSoRegOpValue(const MachineInstr &MI, + const TargetInstrDesc &TID, + const MachineOperand &MO, + unsigned OpIdx); + + unsigned getMachineSoImmOpValue(unsigned SoImm); + + unsigned getAddrModeSBit(const MachineInstr &MI, + const TargetInstrDesc &TID) const; + + void emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn = 0); + + void emitLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMulFrmInstruction(const MachineInstr &MI); + + void emitExtendInstruction(const MachineInstr &MI); + + void emitMiscArithInstruction(const MachineInstr &MI); + + void emitBranchInstruction(const MachineInstr &MI); + + void emitInlineJumpTable(unsigned JTIndex); + + void emitMiscBranchInstruction(const MachineInstr &MI); + + void emitVFPArithInstruction(const MachineInstr &MI); + + void emitVFPConversionInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMiscInstruction(const MachineInstr &MI); + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); + unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) { + return getMachineOpValue(MI, MI.getOperand(OpIdx)); + } + + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. + /// + unsigned getShiftOp(unsigned Imm) const ; + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + bool NeedStub, intptr_t ACPV = 0); + void emitExternalSymbolAddress(const char *ES, unsigned Reloc); + void emitConstPoolAddress(unsigned CPI, unsigned Reloc); + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, + intptr_t JTBase = 0); + }; + template <class CodeEmitter> + char Emitter<CodeEmitter>::ID = 0; +} + +/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code +/// to the specified MCE object. + +namespace llvm { + +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE) { + return new Emitter<MachineCodeEmitter>(TM, MCE); +} +FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter<JITCodeEmitter>(TM, JCE); +} + +} // end namespace llvm + +template<class CodeEmitter> +bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); + TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); + JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + JTI->Initialize(MF, IsPIC); + + do { + DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. +/// +template<class CodeEmitter> +unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const { + switch (ARM_AM::getAM2ShiftOpc(Imm)) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return 2; + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::ror: + case ARM_AM::rrx: return 3; + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +template<class CodeEmitter> +unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) { + if (MO.isReg()) + return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + else if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); + else if (MO.isCPI()) { + const TargetInstrDesc &TID = MI.getDesc(); + // For VFP load, the immediate offset is multiplied by 4. + unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; + emitConstPoolAddress(MO.getIndex(), Reloc); + } else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); + else { + cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; + abort(); + } + return 0; +} + +/// emitGlobalAddress - Emit the specified address to the code stream. +/// +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + bool NeedStub, intptr_t ACPV) { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + GV, ACPV, NeedStub)); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, + unsigned Reloc) { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES)); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, + unsigned Reloc) { + // Tell JIT emitter we'll resolve the address. + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, true)); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex, + unsigned Reloc) { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, true)); +} + +/// emitMachineBasicBlock - Emit the specified address basic block. +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc, intptr_t JTBase) { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB, JTBase)); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) { +#ifndef NDEBUG + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << Binary << std::dec << "\n"; +#endif + MCE.emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) { +#ifndef NDEBUG + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << (unsigned)Binary << std::dec << "\n"; + DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') + << (unsigned)(Binary >> 32) << std::dec << "\n"; +#endif + MCE.emitDWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) { + DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI; + + NumEmitted++; // Keep track of the # of mi's emitted + switch (MI.getDesc().TSFlags & ARMII::FormMask) { + default: { + assert(0 && "Unhandled instruction encoding format!"); + break; + } + case ARMII::Pseudo: + emitPseudoInstruction(MI); + break; + case ARMII::DPFrm: + case ARMII::DPSoRegFrm: + emitDataProcessingInstruction(MI); + break; + case ARMII::LdFrm: + case ARMII::StFrm: + emitLoadStoreInstruction(MI); + break; + case ARMII::LdMiscFrm: + case ARMII::StMiscFrm: + emitMiscLoadStoreInstruction(MI); + break; + case ARMII::LdStMulFrm: + emitLoadStoreMultipleInstruction(MI); + break; + case ARMII::MulFrm: + emitMulFrmInstruction(MI); + break; + case ARMII::ExtFrm: + emitExtendInstruction(MI); + break; + case ARMII::ArithMiscFrm: + emitMiscArithInstruction(MI); + break; + case ARMII::BrFrm: + emitBranchInstruction(MI); + break; + case ARMII::BrMiscFrm: + emitMiscBranchInstruction(MI); + break; + // VFP instructions. + case ARMII::VFPUnaryFrm: + case ARMII::VFPBinaryFrm: + emitVFPArithInstruction(MI); + break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + emitVFPConversionInstruction(MI); + break; + case ARMII::VFPLdStFrm: + emitVFPLoadStoreInstruction(MI); + break; + case ARMII::VFPLdStMulFrm: + emitVFPLoadStoreMultipleInstruction(MI); + break; + case ARMII::VFPMiscFrm: + emitMiscInstruction(MI); + break; + } +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) { + unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. + unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. + const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; + + // Remember the CONSTPOOL_ENTRY address for later relocation. + JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); + + // Emit constpool island entry. In most cases, the actual values will be + // resolved and relocated after code emission. + if (MCPE.isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = + static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); + + DOUT << " ** ARM constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'; + + GlobalValue *GV = ACPV->getGV(); + if (GV) { + assert(!ACPV->isStub() && "Don't know how to deal this yet!"); + if (ACPV->isNonLazyPointer()) + MCE.addRelocation(MachineRelocation::getIndirectSymbol( + MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV, + (intptr_t)ACPV, false)); + else + emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, + ACPV->isStub() || isa<Function>(GV), (intptr_t)ACPV); + } else { + assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!"); + emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute); + } + emitWordLE(0); + } else { + Constant *CV = MCPE.Val.ConstVal; + +#ifndef NDEBUG + DOUT << " ** Constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " "; + if (const Function *F = dyn_cast<Function>(CV)) + DOUT << F->getName(); + else + DOUT << *CV; + DOUT << '\n'; +#endif + + if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV)); + emitWordLE(0); + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + uint32_t Val = *(uint32_t*)CI->getValue().getRawData(); + emitWordLE(Val); + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + if (CFP->getType() == Type::FloatTy) + emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else if (CFP->getType() == Type::DoubleTy) + emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else { + assert(0 && "Unable to handle this constantpool entry!"); + abort(); + } + } else { + assert(0 && "Unable to handle this constantpool entry!"); + abort(); + } + } +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + assert(MO1.isImm() && "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); + + // Emit the 'mov' instruction. + unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of <shifter_op> + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1)); + emitWordLE(Binary); + + // Now the 'orr' instruction. + Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of <shifter_op> + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2)); + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) { + // It's basically add r, pc, (LJTI - $+8) + + const TargetInstrDesc &TID = MI.getDesc(); + + // Emit the 'add' instruction. + unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100 + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // Encode Rd. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode Rn which is PC. + Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + + // Encode the displacement. + // Set bit I(25) to identify this is the immediate form of <shifter_op>. + Binary |= 1 << ARMII::I_BitShift; + emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) + Binary |= 1 << ARMII::S_BitShift; + + // Encode register def if there is one. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode the shift operation. + switch (Opcode) { + default: break; + case ARM::MOVrx: + // rrx + Binary |= 0x6 << 4; + break; + case ARM::MOVsrl_flag: + // lsr #1 + Binary |= (0x2 << 4) | (1 << 7); + break; + case ARM::MOVsra_flag: + // asr #1 + Binary |= (0x4 << 4) | (1 << 7); + break; + } + + // Encode register Rm. + Binary |= getMachineOpValue(MI, 1); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) { + DOUT << " ** LPC" << LabelID << " @ " + << (void*)MCE.getCurrentPCValue() << '\n'; + JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + switch (Opcode) { + default: + abort(); // FIXME: + case TargetInstrInfo::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + assert(0 && "JIT does not support inline asm!\n"); + abort(); + } + break; + } + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getImm()); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case ARM::DWARF_LOC: + // Do nothing. + break; + case ARM::CONSTPOOL_ENTRY: + emitConstPoolInstruction(MI); + break; + case ARM::PICADD: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // PICADD is just an add instruction that implicitly read pc. + emitDataProcessingInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICSTR: + case ARM::PICSTRB: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitLoadStoreInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDRH: + case ARM::PICLDRSH: + case ARM::PICLDRSB: + case ARM::PICSTRH: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitMiscLoadStoreInstruction(MI, ARM::PC); + break; + } + case ARM::MOVi2pieces: + // Two instructions to materialize a constant. + emitMOVi2piecesInstruction(MI); + break; + case ARM::LEApcrelJT: + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + case ARM::MOVrx: + case ARM::MOVsrl_flag: + case ARM::MOVsra_flag: + emitPseudoMoveInstruction(MI); + break; + } +} + +template<class CodeEmitter> +unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue( + const MachineInstr &MI, + const TargetInstrDesc &TID, + const MachineOperand &MO, + unsigned OpIdx) { + unsigned Binary = getMachineOpValue(MI, MO); + + const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); + const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); + + // Encode the shift opcode. + unsigned SBits = 0; + unsigned Rs = MO1.getReg(); + if (Rs) { + // Set shift operand (bit[7:4]). + // LSL - 0001 + // LSR - 0011 + // ASR - 0101 + // ROR - 0111 + // RRX - 0110 and bit[11:8] clear. + switch (SOpc) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x1; break; + case ARM_AM::lsr: SBits = 0x3; break; + case ARM_AM::asr: SBits = 0x5; break; + case ARM_AM::ror: SBits = 0x7; break; + case ARM_AM::rrx: SBits = 0x6; break; + } + } else { + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + } + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode the shift operation Rs or shift_imm (except rrx). + if (Rs) { + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | + (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift); + } + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; +} + +template<class CodeEmitter> +unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) { + // Encode rotate_imm. + unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1) + << ARMII::SoRotImmShift; + + // Encode immed_8. + Binary |= ARM_AM::getSOImmValImm(SoImm); + return Binary; +} + +template<class CodeEmitter> +unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI, + const TargetInstrDesc &TID) const { + for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ + const MachineOperand &MO = MI.getOperand(i-1); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) + return 1 << ARMII::S_BitShift; + } + return 0; +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitDataProcessingInstruction( + const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // Encode register def if there is one. + unsigned NumDefs = TID.getNumDefs(); + unsigned OpIdx = 0; + if (NumDefs) + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + else if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) + << ARMII::RegRdShift); + + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + // Encode first non-shifter register operand if there is one. + bool isUnary = TID.TSFlags & ARMII::UnaryDP; + if (!isUnary) { + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else { + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; + ++OpIdx; + } + } + + // Encode shifter operand. + const MachineOperand &MO = MI.getOperand(OpIdx); + if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + // Encode SoReg. + emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx)); + return; + } + + if (MO.isReg()) { + // Encode register Rm. + emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg())); + return; + } + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of <shifter_op>. + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(MO.getImm()); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitLoadStoreInstruction( + const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) + << ARMII::RegRdShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDR_PRE. + if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM2Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative). + Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + if (!MO2.getReg()) { // is immediate + if (ARM_AM::getAM2Offset(AM2Opc)) + // Set the value of offset_12 field + Binary |= ARM_AM::getAM2Offset(AM2Opc); + emitWordLE(Binary); + return; + } + + // Set bit I(25), because this is not in immediate enconding. + Binary |= 1 << ARMII::I_BitShift; + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + // Set bit[3:0] to the corresponding Rm register + Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + + // If this instr is in scaled register offset/index instruction, set + // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. + if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { + Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift + Binary |= ShImm << ARMII::ShiftShift; // shift_immed + } + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StMiscFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) + << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDRH_POST. + if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM3Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative) + Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + + // If this instr is in register offset/index encoding, set bit[3:0] + // to the corresponding Rm register. + if (MO2.getReg()) { + Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + emitWordLE(Binary); + return; + } + + // This instr is in immediate offset/index encoding, set bit 22 to 1. + Binary |= 1 << ARMII::AM3_I_BitShift; + if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { + // Set operands + Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH + Binary |= (ImmOffs & 0xF); // immedL + } + + emitWordLE(Binary); +} + +static unsigned getAddrModeUPBits(unsigned Mode) { + unsigned Binary = 0; + + // Set addressing mode by modifying bits U(23) and P(24) + // IA - Increment after - bit U = 1 and bit P = 0 + // IB - Increment before - bit U = 1 and bit P = 1 + // DA - Decrement after - bit U = 0 and bit P = 0 + // DB - Decrement before - bit U = 0 and bit P = 1 + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::da: break; + case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; + case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; + case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; + } + + return Binary; +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction( + const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set base address operand + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + const MachineOperand &MO = MI.getOperand(1); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); + + // Set bit W(21) + if (ARM_AM::getAM4WBFlag(MO.getImm())) + Binary |= 0x1 << ARMII::W_BitShift; + + // Set registers + for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + RegNum < 16); + Binary |= 0x1 << RegNum; + } + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, TID); + + // 32x32->64bit operations have two destination registers. The number + // of register definitions will tell us if that's what we're dealing with. + unsigned OpIdx = 0; + if (TID.getNumDefs() == 2) + Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; + + // Encode Rm + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode Rs + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; + + // Many multiple instructions (e.g. MLA) have three src operands. Encode + // it as Rn (for multiply, that's in the same offset as RdLo. + if (TID.getNumOperands() > OpIdx && + !TID.OpInfo[OpIdx].isPredicate() && + !TID.OpInfo[OpIdx].isOptionalDef()) + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO1 = MI.getOperand(OpIdx++); + const MachineOperand &MO2 = MI.getOperand(OpIdx); + if (MO2.isReg()) { + // Two register operand form. + // Encode Rn. + Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, MO2); + ++OpIdx; + } else { + Binary |= getMachineOpValue(MI, MO1); + } + + // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. + if (MI.getOperand(OpIdx).isImm() && + !TID.OpInfo[OpIdx].isPredicate() && + !TID.OpInfo[OpIdx].isOptionalDef()) + Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO = MI.getOperand(OpIdx++); + if (OpIdx == TID.getNumOperands() || + TID.OpInfo[OpIdx].isPredicate() || + TID.OpInfo[OpIdx].isOptionalDef()) { + // Encode Rm and it's done. + Binary |= getMachineOpValue(MI, MO); + emitWordLE(Binary); + return; + } + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode shift_imm. + unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + if (TID.Opcode == ARM::TPsoft) + abort(); // FIXME + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set signed_immed_24 field + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) { + // Remember the base address of the inline jump table. + uintptr_t JTBase = MCE.getCurrentPCValue(); + JTI->addJumpTableBaseAddr(JTIndex, JTBase); + DOUT << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n'; + + // Now emit the jump table entries. + const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + if (IsPIC) + // DestBB address - JT base. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); + else + // Absolute DestBB address. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); + emitWordLE(0); + } +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Handle jump tables. + if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { + // First emit a ldr pc, [] instruction. + emitDataProcessingInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + unsigned JTIndex = (TID.Opcode == ARM::BR_JTr) + ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); + emitInlineJumpTable(JTIndex); + return; + } else if (TID.Opcode == ARM::BR_JTm) { + // First emit a ldr pc, [] instruction. + emitLoadStoreInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + emitInlineJumpTable(MI.getOperand(3).getIndex()); + return; + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + if (TID.Opcode == ARM::BX_RET) + // The return register is LR. + Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); + else + // otherwise, set the return register + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP); + if (!isSPVFP) + Binary |= RegD << ARMII::RegRdShift; + else { + Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; + Binary |= (RegD & 0x01) << ARMII::D_BitShift; + } + return Binary; +} + +static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP); + if (!isSPVFP) + Binary |= RegN << ARMII::RegRnShift; + else { + Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; + Binary |= (RegN & 0x01) << ARMII::N_BitShift; + } + return Binary; +} + +static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = false; + RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP); + if (!isSPVFP) + Binary |= RegM; + else { + Binary |= ((RegM & 0x1E) >> 1); + Binary |= (RegM & 0x01) << ARMII::M_BitShift; + } + return Binary; +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + assert((Binary & ARMII::D_BitShift) == 0 && + (Binary & ARMII::N_BitShift) == 0 && + (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // If this is a two-address operand, skip it, e.g. FMACD. + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + + // Encode Dn / Sn. + if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + Binary |= encodeVFPRn(MI, OpIdx++); + + if (OpIdx == TID.getNumOperands() || + TID.OpInfo[OpIdx].isPredicate() || + TID.OpInfo[OpIdx].isOptionalDef()) { + // FCMPEZD etc. has only one operand. + emitWordLE(Binary); + return; + } + + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, OpIdx); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitVFPConversionInstruction( + const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Form = TID.TSFlags & ARMII::FormMask; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + break; + case ARMII::VFPConv4Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 0); + break; + case ARMII::VFPConv5Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 0); + break; + } + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 1); + break; + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 1); + break; + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 1); + break; + } + + if (Form == ARMII::VFPConv5Frm) + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 2); + else if (Form == ARMII::VFPConv3Frm) + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 2); + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // Encode address base. + const MachineOperand &Base = MI.getOperand(OpIdx++); + Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; + + // If there is a non-zero immediate offset, encode it. + if (Base.isReg()) { + const MachineOperand &Offset = MI.getOperand(OpIdx); + if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { + if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) + Binary |= 1 << ARMII::U_BitShift; + Binary |= ImmOffs; + emitWordLE(Binary); + return; + } + } + + // If immediate offset is omitted, default to +0. + Binary |= 1 << ARMII::U_BitShift; + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction( + const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set base address operand + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + const MachineOperand &MO = MI.getOperand(1); + Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); + + // Set bit W(21) + if (ARM_AM::getAM5WBFlag(MO.getImm())) + Binary |= 0x1 << ARMII::W_BitShift; + + // First register is encoded in Dd. + Binary |= encodeVFPRd(MI, 4); + + // Number of registers are encoded in offset field. + unsigned NumRegs = 1; + for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + ++NumRegs; + } + Binary |= NumRegs * 2; + + emitWordLE(Binary); +} + +template<class CodeEmitter> +void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + emitWordLE(Binary); +} + +#include "ARMGenCodeEmitter.inc" + diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp new file mode 100644 index 0000000..db723fe --- /dev/null +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -0,0 +1,1285 @@ +//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that splits the constant pool up into 'islands' +// which are scattered through-out the function. This is required due to the +// limited pc-relative displacements that ARM has. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-cp-islands" +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); +STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); + +namespace { + /// ARMConstantIslands - Due to limited PC-relative displacements, ARM + /// requires constant pool entries to be scattered among the instructions + /// inside a function. To do this, it completely ignores the normal LLVM + /// constant pool; instead, it places constants wherever it feels like with + /// special instructions. + /// + /// The terminology used in this pass includes: + /// Islands - Clumps of constants placed in the function. + /// Water - Potential places where an island could be formed. + /// CPE - A constant pool entry that has been placed somewhere, which + /// tracks a list of users. + class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass { + /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed + /// by MBB Number. The two-byte pads required for Thumb alignment are + /// counted as part of the following block (i.e., the offset and size for + /// a padded block will both be ==2 mod 4). + std::vector<unsigned> BBSizes; + + /// BBOffsets - the offset of each MBB in bytes, starting from 0. + /// The two-byte pads required for Thumb alignment are counted as part of + /// the following block. + std::vector<unsigned> BBOffsets; + + /// WaterList - A sorted list of basic blocks where islands could be placed + /// (i.e. blocks that don't fall through to the following block, due + /// to a return, unreachable, or unconditional branch). + std::vector<MachineBasicBlock*> WaterList; + + /// CPUser - One user of a constant pool, keeping the machine instruction + /// pointer, the constant pool being referenced, and the max displacement + /// allowed from the instruction to the CP. + struct CPUser { + MachineInstr *MI; + MachineInstr *CPEMI; + unsigned MaxDisp; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp) + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {} + }; + + /// CPUsers - Keep track of all of the machine instructions that use various + /// constant pools and their max displacement. + std::vector<CPUser> CPUsers; + + /// CPEntry - One per constant pool entry, keeping the machine instruction + /// pointer, the constpool index, and the number of CPUser's which + /// reference this entry. + struct CPEntry { + MachineInstr *CPEMI; + unsigned CPI; + unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) + : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} + }; + + /// CPEntries - Keep track of all of the constant pool entry machine + /// instructions. For each original constpool index (i.e. those that + /// existed upon entry to this pass), it keeps a vector of entries. + /// Original elements are cloned as we go along; the clones are + /// put in the vector of the original element, but have distinct CPIs. + std::vector<std::vector<CPEntry> > CPEntries; + + /// ImmBranch - One per immediate branch, keeping the machine instruction + /// pointer, conditional or unconditional, the max displacement, + /// and (if isCond is true) the corresponding unconditional branch + /// opcode. + struct ImmBranch { + MachineInstr *MI; + unsigned MaxDisp : 31; + bool isCond : 1; + int UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) + : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} + }; + + /// ImmBranches - Keep track of all the immediate branch instructions. + /// + std::vector<ImmBranch> ImmBranches; + + /// PushPopMIs - Keep track of all the Thumb push / pop instructions. + /// + SmallVector<MachineInstr*, 4> PushPopMIs; + + /// HasFarJump - True if any far jump instruction has been emitted during + /// the branch fix up pass. + bool HasFarJump; + + const TargetInstrInfo *TII; + ARMFunctionInfo *AFI; + bool isThumb; + public: + static char ID; + ARMConstantIslands() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM constant island placement and branch shortening pass"; + } + + private: + void DoInitialPlacement(MachineFunction &Fn, + std::vector<MachineInstr*> &CPEMIs); + CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + void InitialFunctionScan(MachineFunction &Fn, + const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); + void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); + bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); + int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); + bool LookForWater(CPUser&U, unsigned UserOffset, + MachineBasicBlock** NewMBB); + MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB, + std::vector<MachineBasicBlock*>::iterator IP); + void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + MachineBasicBlock** NewMBB); + bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex); + void RemoveDeadCPEMI(MachineInstr *CPEMI); + bool RemoveUnusedCPEntries(); + bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, + bool DoDump); + bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, + CPUser &U); + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK); + bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool UndoLRSpillRestore(); + + unsigned GetOffsetOf(MachineInstr *MI) const; + void dumpBBs(); + void verify(MachineFunction &Fn); + }; + char ARMConstantIslands::ID = 0; +} + +/// verify - check BBOffsets, BBSizes, alignment of islands +void ARMConstantIslands::verify(MachineFunction &Fn) { + assert(BBOffsets.size() == BBSizes.size()); + for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) + assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); + if (isThumb) { + for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty() && + MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) + assert((BBOffsets[MBB->getNumber()]%4 == 0 && + BBSizes[MBB->getNumber()]%4 == 0) || + (BBOffsets[MBB->getNumber()]%4 != 0 && + BBSizes[MBB->getNumber()]%4 != 0)); + } + } +} + +/// print block size and offset information - debugging +void ARMConstantIslands::dumpBBs() { + for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { + DOUT << "block " << J << " offset " << BBOffsets[J] << + " size " << BBSizes[J] << "\n"; + } +} + +/// createARMConstantIslandPass - returns an instance of the constpool +/// island pass. +FunctionPass *llvm::createARMConstantIslandPass() { + return new ARMConstantIslands(); +} + +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { + MachineConstantPool &MCP = *Fn.getConstantPool(); + + TII = Fn.getTarget().getInstrInfo(); + AFI = Fn.getInfo<ARMFunctionInfo>(); + isThumb = AFI->isThumbFunction(); + + HasFarJump = false; + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + Fn.RenumberBlocks(); + + /// Thumb functions containing constant pools get 2-byte alignment. + /// This is so we can keep exact track of where the alignment padding goes. + /// Set default. + AFI->setAlign(isThumb ? 1U : 2U); + + // Perform the initial placement of the constant pool entries. To start with, + // we put them all at the end of the function. + std::vector<MachineInstr*> CPEMIs; + if (!MCP.isEmpty()) { + DoInitialPlacement(Fn, CPEMIs); + if (isThumb) + AFI->setAlign(2U); + } + + /// The next UID to take is the first unused one. + AFI->initConstPoolEntryUId(CPEMIs.size()); + + // Do the initial scan of the function, building up information about the + // sizes of each block, the location of all the water, and finding all of the + // constant pool users. + InitialFunctionScan(Fn, CPEMIs); + CPEMIs.clear(); + + /// Remove dead constant pool entries. + RemoveUnusedCPEntries(); + + // Iteratively place constant pool entries and fix up branches until there + // is no change. + bool MadeChange = false; + while (true) { + bool Change = false; + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) + Change |= HandleConstantPoolUser(Fn, i); + DEBUG(dumpBBs()); + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) + Change |= FixUpImmediateBr(Fn, ImmBranches[i]); + DEBUG(dumpBBs()); + if (!Change) + break; + MadeChange = true; + } + + // After a while, this might be made debug-only, but it is not expensive. + verify(Fn); + + // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. + // Undo the spill / restore of LR if possible. + if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb) + MadeChange |= UndoLRSpillRestore(); + + BBSizes.clear(); + BBOffsets.clear(); + WaterList.clear(); + CPUsers.clear(); + CPEntries.clear(); + ImmBranches.clear(); + PushPopMIs.clear(); + + return MadeChange; +} + +/// DoInitialPlacement - Perform the initial placement of the constant pool +/// entries. To start with, we put them all at the end of the function. +void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, + std::vector<MachineInstr*> &CPEMIs) { + // Create the basic block to hold the CPE's. + MachineBasicBlock *BB = Fn.CreateMachineBasicBlock(); + Fn.push_back(BB); + + // Add all of the constants from the constant pool to the end block, use an + // identity mapping of CPI's to CPE's. + const std::vector<MachineConstantPoolEntry> &CPs = + Fn.getConstantPool()->getConstants(); + + const TargetData &TD = *Fn.getTarget().getTargetData(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); + // Verify that all constant pool entries are a multiple of 4 bytes. If not, + // we would have to pad them out or something so that instructions stay + // aligned. + assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + MachineInstr *CPEMI = + BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(i).addConstantPoolIndex(i).addImm(Size); + CPEMIs.push_back(CPEMI); + + // Add a new CPEntry, but no corresponding CPUser yet. + std::vector<CPEntry> CPEs; + CPEs.push_back(CPEntry(CPEMI, i)); + CPEntries.push_back(CPEs); + NumCPEs++; + DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n"; + } +} + +/// BBHasFallthrough - Return true if the specified basic block can fallthrough +/// into the block immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + return false; + + MachineBasicBlock *NextBB = next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, +/// look up the corresponding CPEntry. +ARMConstantIslands::CPEntry +*ARMConstantIslands::findConstPoolEntry(unsigned CPI, + const MachineInstr *CPEMI) { + std::vector<CPEntry> &CPEs = CPEntries[CPI]; + // Number of entries per constpool index should be small, just do a + // linear search. + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + if (CPEs[i].CPEMI == CPEMI) + return &CPEs[i]; + } + return NULL; +} + +/// InitialFunctionScan - Do the initial scan of the function, building up +/// information about the sizes of each block, the location of all the water, +/// and finding all of the constant pool users. +void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, + const std::vector<MachineInstr*> &CPEMIs) { + unsigned Offset = 0; + for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + // If this block doesn't fall through into the next MBB, then this is + // 'water' that a constant pool island could be placed. + if (!BBHasFallthrough(&MBB)) + WaterList.push_back(&MBB); + + unsigned MBBSize = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + // Add instruction size to MBBSize. + MBBSize += TII->GetInstSizeInBytes(I); + + int Opc = I->getOpcode(); + if (I->getDesc().isBranch()) { + bool isCond = false; + unsigned Bits = 0; + unsigned Scale = 1; + int UOpc = Opc; + switch (Opc) { + case ARM::tBR_JTr: + // A Thumb table jump may involve padding; for the offsets to + // be right, functions containing these must be 4-byte aligned. + AFI->setAlign(2U); + if ((Offset+MBBSize)%4 != 0) + MBBSize += 2; // padding + continue; // Does not get an entry in ImmBranches + default: + continue; // Ignore other JT branches + case ARM::Bcc: + isCond = true; + UOpc = ARM::B; + // Fallthrough + case ARM::B: + Bits = 24; + Scale = 4; + break; + case ARM::tBcc: + isCond = true; + UOpc = ARM::tB; + Bits = 8; + Scale = 2; + break; + case ARM::tB: + Bits = 11; + Scale = 2; + break; + } + + // Record this immediate branch. + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc)); + } + + if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) + PushPopMIs.push_back(I); + + // Scan the instructions for constant pool operands. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (I->getOperand(op).isCPI()) { + // We found one. The addressing mode tells us the max displacement + // from the PC that this instruction permits. + + // Basic size info comes from the TSFlags field. + unsigned Bits = 0; + unsigned Scale = 1; + unsigned TSFlags = I->getDesc().TSFlags; + switch (TSFlags & ARMII::AddrModeMask) { + default: + // Constant pool entries can reach anything. + if (I->getOpcode() == ARM::CONSTPOOL_ENTRY) + continue; + if (I->getOpcode() == ARM::tLEApcrel) { + Bits = 8; // Taking the address of a CP entry. + break; + } + assert(0 && "Unknown addressing mode for CP reference!"); + case ARMII::AddrMode1: // AM1: 8 bits << 2 + Bits = 8; + Scale = 4; // Taking the address of a CP entry. + break; + case ARMII::AddrMode2: + Bits = 12; // +-offset_12 + break; + case ARMII::AddrMode3: + Bits = 8; // +-offset_8 + break; + // addrmode4 has no immediate offset. + case ARMII::AddrMode5: + Bits = 8; + Scale = 4; // +-(offset_8*4) + break; + case ARMII::AddrModeT1: + Bits = 5; // +offset_5 + break; + case ARMII::AddrModeT2: + Bits = 5; + Scale = 2; // +(offset_5*2) + break; + case ARMII::AddrModeT4: + Bits = 5; + Scale = 4; // +(offset_5*4) + break; + case ARMII::AddrModeTs: + Bits = 8; + Scale = 4; // +(offset_8*4) + break; + } + + // Remember that this is a user of a CP entry. + unsigned CPI = I->getOperand(op).getIndex(); + MachineInstr *CPEMI = CPEMIs[CPI]; + unsigned MaxOffs = ((1 << Bits)-1) * Scale; + CPUsers.push_back(CPUser(I, CPEMI, MaxOffs)); + + // Increment corresponding CPEntry reference count. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Cannot find a corresponding CPEntry!"); + CPE->RefCount++; + + // Instructions can only use one CP entry, don't bother scanning the + // rest of the operands. + break; + } + } + + // In thumb mode, if this block is a constpool island, we may need padding + // so it's aligned on 4 byte boundary. + if (isThumb && + !MBB.empty() && + MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && + (Offset%4) != 0) + MBBSize += 2; + + BBSizes.push_back(MBBSize); + BBOffsets.push_back(Offset); + Offset += MBBSize; + } +} + +/// GetOffsetOf - Return the current offset of the specified machine instruction +/// from the start of the function. This offset changes as stuff is moved +/// around inside the function. +unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BBOffsets[MBB->getNumber()]; + + // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has + // alignment padding, and compensate if so. + if (isThumb && + MI->getOpcode() == ARM::CONSTPOOL_ENTRY && + Offset%4 != 0) + Offset += 2; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + if (&*I == MI) return Offset; + Offset += TII->GetInstSizeInBytes(I); + } +} + +/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB +/// ID. +static bool CompareMBBNumbers(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) { + return LHS->getNumber() < RHS->getNumber(); +} + +/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// machine function, it upsets all of the block numbers. Renumber the blocks +/// and update the arrays that parallel this numbering. +void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { + // Renumber the MBB's to keep them consequtive. + NewBB->getParent()->RenumberBlocks(NewBB); + + // Insert a size into BBSizes to align it properly with the (newly + // renumbered) block numbers. + BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); + + // Likewise for BBOffsets. + BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + + // Next, update WaterList. Specifically, we need to add NewMBB as having + // available water after it. + std::vector<MachineBasicBlock*>::iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, + CompareMBBNumbers); + WaterList.insert(IP, NewBB); +} + + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update datastructures and renumber blocks to +/// account for this change and returns the newly created block. +MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + MachineFunction &MF = *OrigBB->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MF.insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc::getUnknownLoc(), + TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB); + NumSplit++; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + while (!OrigBB->succ_empty()) { + MachineBasicBlock *Succ = *OrigBB->succ_begin(); + OrigBB->removeSuccessor(Succ); + NewBB->addSuccessor(Succ); + + // This pass should be run after register allocation, so there should be no + // PHI nodes to update. + assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI) + && "PHI nodes should be eliminated by now!"); + } + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + // This is almost the same as UpdateForInsertedWaterBlock, except that + // the Water goes after OrigBB, not NewBB. + MF.RenumberBlocks(NewBB); + + // Insert a size into BBSizes to align it properly with the (newly + // renumbered) block numbers. + BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); + + // Likewise for BBOffsets. + BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + + // Next, update WaterList. Specifically, we need to add OrigMBB as having + // available water after it (but not if it's already there, which happens + // when splitting before a conditional branch that is followed by an + // unconditional branch - in that case we want to insert NewBB). + std::vector<MachineBasicBlock*>::iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, + CompareMBBNumbers); + MachineBasicBlock* WaterBB = *IP; + if (WaterBB == OrigBB) + WaterList.insert(next(IP), NewBB); + else + WaterList.insert(IP, OrigBB); + + // Figure out how large the first NewMBB is. (It cannot + // contain a constpool_entry or tablejump.) + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += TII->GetInstSizeInBytes(I); + + unsigned OrigBBI = OrigBB->getNumber(); + unsigned NewBBI = NewBB->getNumber(); + // Set the size of NewBB in BBSizes. + BBSizes[NewBBI] = NewBBSize; + + // We removed instructions from UserMBB, subtract that off from its size. + // Add 2 or 4 to the block to count the unconditional branch we added to it. + unsigned delta = isThumb ? 2 : 4; + BBSizes[OrigBBI] -= NewBBSize - delta; + + // ...and adjust BBOffsets for NewBB accordingly. + BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + + // All BBOffsets following these blocks must be modified. + AdjustBBOffsetsAfter(NewBB, delta); + + return NewBB; +} + +/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// reference) is within MaxDisp of TrialOffset (a proposed location of a +/// constant pool entry). +bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, + unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) { + // On Thumb offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // Effectively, the valid range of displacements is 2 bytes smaller for such + // references. + if (isThumb && UserOffset%4 !=0) + UserOffset -= 2; + // CPEs will be rounded up to a multiple of 4. + if (isThumb && TrialOffset%4 != 0) + TrialOffset += 2; + + if (UserOffset <= TrialOffset) { + // User before the Trial. + if (TrialOffset-UserOffset <= MaxDisp) + return true; + } else if (NegativeOK) { + if (UserOffset-TrialOffset <= MaxDisp) + return true; + } + return false; +} + +/// WaterIsInRange - Returns true if a CPE placed after the specified +/// Water (a basic block) will be in range for the specific MI. + +bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, + MachineBasicBlock* Water, CPUser &U) +{ + unsigned MaxDisp = U.MaxDisp; + MachineFunction::iterator I = next(MachineFunction::iterator(Water)); + unsigned CPEOffset = BBOffsets[Water->getNumber()] + + BBSizes[Water->getNumber()]; + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. (Currently applies only to ARM, so + // no alignment compensation attempted here.) + if (CPEOffset < UserOffset) + UserOffset += U.CPEMI->getOperand(2).getImm(); + + return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb); +} + +/// CPEIsInRange - Returns true if the distance between specific MI and +/// specific ConstPool entry instruction can fit in MI's displacement field. +bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, + unsigned MaxDisp, bool DoDump) { + unsigned CPEOffset = GetOffsetOf(CPEMI); + assert(CPEOffset%4 == 0 && "Misaligned CPE"); + + if (DoDump) { + DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << " insn address=" << UserOffset + << " CPE address=" << CPEOffset + << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI; + } + + return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb); +} + +#ifndef NDEBUG +/// BBIsJumpedOver - Return true of the specified basic block's only predecessor +/// unconditionally branches to its only successor. +static bool BBIsJumpedOver(MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + MachineBasicBlock *Succ = *MBB->succ_begin(); + MachineBasicBlock *Pred = *MBB->pred_begin(); + MachineInstr *PredMI = &Pred->back(); + if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB) + return PredMI->getOperand(0).getMBB() == Succ; + return false; +} +#endif // NDEBUG + +void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, + int delta) { + MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); + for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) { + BBOffsets[i] += delta; + // If some existing blocks have padding, adjust the padding as needed, a + // bit tricky. delta can be negative so don't use % on that. + if (isThumb) { + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty()) { + // Constant pool entries require padding. + if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { + unsigned oldOffset = BBOffsets[i] - delta; + if (oldOffset%4==0 && BBOffsets[i]%4!=0) { + // add new padding + BBSizes[i] += 2; + delta += 2; + } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) { + // remove existing padding + BBSizes[i] -=2; + delta -= 2; + } + } + // Thumb jump tables require padding. They should be at the end; + // following unconditional branches are removed by AnalyzeBranch. + MachineInstr *ThumbJTMI = NULL; + if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr) + ThumbJTMI = prior(MBB->end()); + if (ThumbJTMI) { + unsigned newMIOffset = GetOffsetOf(ThumbJTMI); + unsigned oldMIOffset = newMIOffset - delta; + if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) { + // remove existing padding + BBSizes[i] -= 2; + delta -= 2; + } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) { + // add new padding + BBSizes[i] += 2; + delta += 2; + } + } + if (delta==0) + return; + } + MBBI = next(MBBI); + } + } +} + +/// DecrementOldEntry - find the constant pool entry with index CPI +/// and instruction CPEMI, and decrement its refcount. If the refcount +/// becomes 0 remove the entry and instruction. Returns true if we removed +/// the entry, false if we didn't. + +bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { + // Find the old entry. Eliminate it if it is no longer used. + CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); + assert(CPE && "Unexpected!"); + if (--CPE->RefCount == 0) { + RemoveDeadCPEMI(CPEMI); + CPE->CPEMI = NULL; + NumCPEs--; + return true; + } + return false; +} + +/// LookForCPEntryInRange - see if the currently referenced CPE is in range; +/// if not, see if an in-range clone of the CPE is in range, and if so, +/// change the data structures so the user references the clone. Returns: +/// 0 = no existing entry found +/// 1 = entry found, and there were no code insertions or deletions +/// 2 = entry found, and there were code insertions or deletions +int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +{ + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + + // Check to see if the CPE is already in-range. + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) { + DOUT << "In range\n"; + return 1; + } + + // No. Look for previously created clones of the CPE that are in range. + unsigned CPI = CPEMI->getOperand(1).getIndex(); + std::vector<CPEntry> &CPEs = CPEntries[CPI]; + for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { + // We already tried this one + if (CPEs[i].CPEMI == CPEMI) + continue; + // Removing CPEs can leave empty entries, skip + if (CPEs[i].CPEMI == NULL) + continue; + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) { + DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"; + // Point the CPUser node to the replacement + U.CPEMI = CPEs[i].CPEMI; + // Change the CPI in the instruction operand to refer to the clone. + for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) + if (UserMI->getOperand(j).isCPI()) { + UserMI->getOperand(j).setIndex(CPEs[i].CPI); + break; + } + // Adjust the refcount of the clone... + CPEs[i].RefCount++; + // ...and the original. If we didn't remove the old entry, none of the + // addresses changed, so we don't need another pass. + return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + } + } + return 0; +} + +/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in +/// the specific unconditional branch instruction. +static inline unsigned getUnconditionalBrDisp(int Opc) { + return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4; +} + +/// AcceptWater - Small amount of common code factored out of the following. + +MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB, + std::vector<MachineBasicBlock*>::iterator IP) { + DOUT << "found water in range\n"; + // Remove the original WaterList entry; we want subsequent + // insertions in this vicinity to go after the one we're + // about to insert. This considerably reduces the number + // of times we have to move the same CPE more than once. + WaterList.erase(IP); + // CPE goes before following block (NewMBB). + return next(MachineFunction::iterator(WaterBB)); +} + +/// LookForWater - look for an existing entry in the WaterList in which +/// we can place the CPE referenced from U so it's within range of U's MI. +/// Returns true if found, false if not. If it returns true, *NewMBB +/// is set to the WaterList entry. +/// For ARM, we prefer the water that's farthest away. For Thumb, prefer +/// water that will not introduce padding to water that will; within each +/// group, prefer the water that's farthest away. + +bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, + MachineBasicBlock** NewMBB) { + std::vector<MachineBasicBlock*>::iterator IPThatWouldPad; + MachineBasicBlock* WaterBBThatWouldPad = NULL; + if (!WaterList.empty()) { + for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()), + B = WaterList.begin();; --IP) { + MachineBasicBlock* WaterBB = *IP; + if (WaterIsInRange(UserOffset, WaterBB, U)) { + if (isThumb && + (BBOffsets[WaterBB->getNumber()] + + BBSizes[WaterBB->getNumber()])%4 != 0) { + // This is valid Water, but would introduce padding. Remember + // it in case we don't find any Water that doesn't do this. + if (!WaterBBThatWouldPad) { + WaterBBThatWouldPad = WaterBB; + IPThatWouldPad = IP; + } + } else { + *NewMBB = AcceptWater(WaterBB, IP); + return true; + } + } + if (IP == B) + break; + } + } + if (isThumb && WaterBBThatWouldPad) { + *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad); + return true; + } + return false; +} + +/// CreateNewWater - No existing WaterList entry will work for +/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the +/// block is used if in range, and the conditional branch munged so control +/// flow is correct. Otherwise the block is split to create a hole with an +/// unconditional branch around it. In either case *NewMBB is set to a +/// block following which the new island can be inserted (the WaterList +/// is not adjusted). + +void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, + unsigned UserOffset, MachineBasicBlock** NewMBB) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + MachineBasicBlock *UserMBB = UserMI->getParent(); + unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + + BBSizes[UserMBB->getNumber()]; + assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + + // If the use is at the end of the block, or the end of the block + // is within range, make new water there. (The addition below is + // for the unconditional branch we will be adding: 4 bytes on ARM, + // 2 on Thumb. Possible Thumb alignment padding is allowed for + // inside OffsetIsInRange. + // If the block ends in an unconditional branch already, it is water, + // and is known to be out of range, so we'll always be adding a branch.) + if (&UserMBB->back() == UserMI || + OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4), + U.MaxDisp, !isThumb)) { + DOUT << "Split at end of block\n"; + if (&UserMBB->back() == UserMI) + assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); + *NewMBB = next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. + // Record it for branch lengthening; this new branch will not get out of + // range, but if the preceding conditional branch is out of range, the + // targets will be exchanged, and the altered branch may be out of + // range, so the machinery has to know about it. + int UncondBr = isThumb ? ARM::tB : ARM::B; + BuildMI(UserMBB, DebugLoc::getUnknownLoc(), + TII->get(UncondBr)).addMBB(*NewMBB); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + int delta = isThumb ? 2 : 4; + BBSizes[UserMBB->getNumber()] += delta; + AdjustBBOffsetsAfter(UserMBB, delta); + } else { + // What a big block. Find a place within the block to split it. + // This is a little tricky on Thumb since instructions are 2 bytes + // and constant pool entries are 4 bytes: if instruction I references + // island CPE, and instruction I+1 references CPE', it will + // not work well to put CPE as far forward as possible, since then + // CPE' cannot immediately follow it (that location is 2 bytes + // farther away from I+1 than CPE was from I) and we'd need to create + // a new island. So, we make a first guess, then walk through the + // instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions + // that reference CPEs will be able to use the same island area; + // if not, we back up the insertion point. + + // The 4 in the following is for the unconditional branch we'll be + // inserting (allows for long branch on Thumb). Alignment of the + // island is handled inside OffsetIsInRange. + unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; + // This could point off the end of the block if we've already got + // constant pool entries following this block; only the last one is + // in the water list. Back past any possible branches (allow for a + // conditional and a maximally long unconditional). + if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) + BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - + (isThumb ? 6 : 8); + unsigned EndInsertOffset = BaseInsertOffset + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = next(MI)) { + if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + if (!OffsetIsInRange(Offset, EndInsertOffset, + CPUsers[CPUIndex].MaxDisp, !isThumb)) { + BaseInsertOffset -= (isThumb ? 2 : 4); + EndInsertOffset -= (isThumb ? 2 : 4); + } + // This is overly conservative, as we don't account for CPEMIs + // being reused within the block, but it doesn't matter much. + EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); + CPUIndex++; + } + } + DOUT << "Split in middle of big block\n"; + *NewMBB = SplitBlockBeforeInstr(prior(MI)); + } +} + +/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// is out-of-range. If so, pick up the constant pool value and move it some +/// place in-range. Return true if we changed any addresses (thus must run +/// another pass of branch lengthening), false otherwise. +bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, + unsigned CPUserIndex) { + CPUser &U = CPUsers[CPUserIndex]; + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned Size = CPEMI->getOperand(2).getImm(); + MachineBasicBlock *NewMBB; + // Compute this only once, it's expensive. The 4 or 8 is the value the + // hardware keeps in the PC (2 insns ahead of the reference). + unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + + // Special case: tLEApcrel are two instructions MI's. The actual user is the + // second instruction. + if (UserMI->getOpcode() == ARM::tLEApcrel) + UserOffset += 2; + + // See if the current entry is within range, or there is a clone of it + // in range. + int result = LookForExistingCPEntry(U, UserOffset); + if (result==1) return false; + else if (result==2) return true; + + // No existing clone of this CPE is within range. + // We will be generating a new clone. Get a UID for it. + unsigned ID = AFI->createConstPoolEntryUId(); + + // Look for water where we can place this CPE. We look for the farthest one + // away that will work. Forward references only for now (although later + // we might find some that are backwards). + + if (!LookForWater(U, UserOffset, &NewMBB)) { + // No water found. + DOUT << "No water found\n"; + CreateNewWater(CPUserIndex, UserOffset, &NewMBB); + } + + // Okay, we know we can put an island before NewMBB now, do it! + MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock(); + Fn.insert(NewMBB, NewIsland); + + // Update internal data structures to account for the newly inserted MBB. + UpdateForInsertedWaterBlock(NewIsland); + + // Decrement the old entry, and remove it if refcount becomes 0. + DecrementOldEntry(CPI, CPEMI); + + // Now that we have an island to add the CPE to, clone the original CPE and + // add it to the island. + U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(), + TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); + NumCPEs++; + + BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; + // Compensate for .align 2 in thumb mode. + if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0) + Size += 2; + // Increase the size of the island block to account for the new entry. + BBSizes[NewIsland->getNumber()] += Size; + AdjustBBOffsetsAfter(NewIsland, Size); + + // Finally, change the CPI in the instruction operand to be ID. + for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) + if (UserMI->getOperand(i).isCPI()) { + UserMI->getOperand(i).setIndex(ID); + break; + } + + DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI; + + return true; +} + +/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// sizes and offsets of impacted basic blocks. +void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { + MachineBasicBlock *CPEBB = CPEMI->getParent(); + unsigned Size = CPEMI->getOperand(2).getImm(); + CPEMI->eraseFromParent(); + BBSizes[CPEBB->getNumber()] -= Size; + // All succeeding offsets have the current size value added in, fix this. + if (CPEBB->empty()) { + // In thumb mode, the size of island may be padded by two to compensate for + // the alignment requirement. Then it will now be 2 when the block is + // empty, so fix this. + // All succeeding offsets have the current size value added in, fix this. + if (BBSizes[CPEBB->getNumber()] != 0) { + Size += BBSizes[CPEBB->getNumber()]; + BBSizes[CPEBB->getNumber()] = 0; + } + } + AdjustBBOffsetsAfter(CPEBB, -Size); + // An island has only one predecessor BB and one successor BB. Check if + // this BB's predecessor jumps directly to this BB's successor. This + // shouldn't happen currently. + assert(!BBIsJumpedOver(CPEBB) && "How did this happen?"); + // FIXME: remove the empty blocks after all the work is done? +} + +/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// are zero. +bool ARMConstantIslands::RemoveUnusedCPEntries() { + unsigned MadeChange = false; + for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { + std::vector<CPEntry> &CPEs = CPEntries[i]; + for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { + if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { + RemoveDeadCPEMI(CPEs[j].CPEMI); + CPEs[j].CPEMI = NULL; + MadeChange = true; + } + } + } + return MadeChange; +} + +/// BBIsInRange - Returns true if the distance between specific MI and +/// specific BB can fit in MI's displacement field. +bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, + unsigned MaxDisp) { + unsigned PCAdj = isThumb ? 4 : 8; + unsigned BrOffset = GetOffsetOf(MI) + PCAdj; + unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + + DOUT << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " max delta=" << MaxDisp + << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI; + + if (BrOffset <= DestOffset) { + // Branch before the Dest. + if (DestOffset-BrOffset <= MaxDisp) + return true; + } else { + if (BrOffset-DestOffset <= MaxDisp) + return true; + } + return false; +} + +/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// away to fit in its displacement field. +bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + + // Check to see if the DestBB is already in-range. + if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + return false; + + if (!Br.isCond) + return FixUpUnconditionalBr(Fn, Br); + return FixUpConditionalBr(Fn, Br); +} + +/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// too far away to fit in its displacement field. If the LR register has been +/// spilled in the epilogue, then we can use BL to implement a far jump. +/// Otherwise, add an intermediate branch instruction to a branch. +bool +ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *MBB = MI->getParent(); + assert(isThumb && "Expected a Thumb function!"); + + // Use BL to implement far jump. + Br.MaxDisp = (1 << 21) * 2; + MI->setDesc(TII->get(ARM::tBfar)); + BBSizes[MBB->getNumber()] += 2; + AdjustBBOffsetsAfter(MBB, 2); + HasFarJump = true; + NumUBrFixed++; + + DOUT << " Changed B to long jump " << *MI; + + return true; +} + +/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// far away to fit in its displacement field. It is converted to an inverse +/// conditional branch + an unconditional branch to the destination. +bool +ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { + MachineInstr *MI = Br.MI; + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + + // Add an unconditional branch to the destination and invert the branch + // condition to jump over it: + // blt L1 + // => + // bge L2 + // b L1 + // L2: + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); + CC = ARMCC::getOppositeCondition(CC); + unsigned CCReg = MI->getOperand(2).getReg(); + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through block. Otherwise, + // split the MBB before the next instruction. + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + NumCBrFixed++; + if (BMI != MI) { + if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + BMI->getOpcode() == Br.UncondBr) { + // Last MI in the BB is an unconditional branch. Can we simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { + DOUT << " Invert Bcc condition and swap its destination with " << *BMI; + BMI->getOperand(0).setMBB(DestBB); + MI->getOperand(0).setMBB(NewDest); + MI->getOperand(1).setImm(CC); + return true; + } + } + } + + if (NeedSplit) { + SplitBlockBeforeInstr(MI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->GetInstSizeInBytes(&MBB->back()); + BBSizes[MBB->getNumber()] -= delta; + MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB)); + AdjustBBOffsetsAfter(SplitBB, -delta); + MBB->back().eraseFromParent(); + // BBOffsets[SplitBB] is wrong temporarily, fixed below + } + MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + + DOUT << " Insert B to BB#" << DestBB->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"; + + // Insert a new conditional branch and a new unconditional branch. + // Also update the ImmBranch as well as adding a new entry for the new branch. + BuildMI(MBB, DebugLoc::getUnknownLoc(), + TII->get(MI->getOpcode())) + .addMBB(NextBB).addImm(CC).addReg(CCReg); + Br.MI = &MBB->back(); + BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); + BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); + ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); + + // Remove the old conditional branch. It may or may not still be in MBB. + BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); + + // The net size change is an addition of one unconditional branch. + int delta = TII->GetInstSizeInBytes(&MBB->back()); + AdjustBBOffsetsAfter(MBB, delta); + return true; +} + +/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// LR / restores LR to pc. +bool ARMConstantIslands::UndoLRSpillRestore() { + bool MadeChange = false; + for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { + MachineInstr *MI = PushPopMIs[i]; + if (MI->getOpcode() == ARM::tPOP_RET && + MI->getOperand(0).getReg() == ARM::PC && + MI->getNumExplicitOperands() == 1) { + BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); + MI->eraseFromParent(); + MadeChange = true; + } + } + return MadeChange; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp new file mode 100644 index 0000000..3a038c9 --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -0,0 +1,100 @@ +//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#include "ARMConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/GlobalValue.h" +#include "llvm/Type.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include <ostream> +using namespace llvm; + +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, + ARMCP::ARMCPKind k, + unsigned char PCAdj, + const char *Modif, + bool AddCA) + : MachineConstantPoolValue((const Type*)gv->getType()), + GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj), + Modifier(Modif), AddCurrentAddress(AddCA) {} + +ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id, + ARMCP::ARMCPKind k, + unsigned char PCAdj, + const char *Modif, + bool AddCA) + : MachineConstantPoolValue((const Type*)Type::Int32Ty), + GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj), + Modifier(Modif), AddCurrentAddress(AddCA) {} + +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, + ARMCP::ARMCPKind k, + const char *Modif) + : MachineConstantPoolValue((const Type*)Type::Int32Ty), + GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0), + Modifier(Modif) {} + +int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (CPV->GV == GV && + CPV->S == S && + CPV->LabelId == LabelId && + CPV->Kind == Kind && + CPV->PCAdjust == PCAdjust) + return i; + } + } + + return -1; +} + +void +ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddPointer(S); + ID.AddInteger(LabelId); + ID.AddInteger((unsigned)Kind); + ID.AddInteger(PCAdjust); +} + +void ARMConstantPoolValue::dump() const { + cerr << " " << *this; +} + +void ARMConstantPoolValue::print(std::ostream &O) const { + raw_os_ostream RawOS(O); + print(RawOS); +} + +void ARMConstantPoolValue::print(raw_ostream &O) const { + if (GV) + O << GV->getName(); + else + O << S; + if (isNonLazyPointer()) O << "$non_lazy_ptr"; + else if (isStub()) O << "$stub"; + if (Modifier) O << "(" << Modifier << ")"; + if (PCAdjust != 0) { + O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; + if (AddCurrentAddress) O << "-."; + O << ")"; + } +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h new file mode 100644 index 0000000..d2b9066 --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -0,0 +1,92 @@ +//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H +#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include <iosfwd> + +namespace llvm { + +class GlobalValue; + +namespace ARMCP { + enum ARMCPKind { + CPValue, + CPNonLazyPtr, + CPStub + }; +} + +/// ARMConstantPoolValue - ARM specific constantpool value. This is used to +/// represent PC relative displacement between the address of the load +/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)). +class ARMConstantPoolValue : public MachineConstantPoolValue { + GlobalValue *GV; // GlobalValue being loaded. + const char *S; // ExtSymbol being loaded. + unsigned LabelId; // Label id of the load. + ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub? + unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative. + // 8 for ARM, 4 for Thumb. + const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) + bool AddCurrentAddress; + +public: + ARMConstantPoolValue(GlobalValue *gv, unsigned id, + ARMCP::ARMCPKind Kind = ARMCP::CPValue, + unsigned char PCAdj = 0, const char *Modifier = NULL, + bool AddCurrentAddress = false); + ARMConstantPoolValue(const char *s, unsigned id, + ARMCP::ARMCPKind Kind = ARMCP::CPValue, + unsigned char PCAdj = 0, const char *Modifier = NULL, + bool AddCurrentAddress = false); + ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind, + const char *Modifier); + + + GlobalValue *getGV() const { return GV; } + const char *getSymbol() const { return S; } + const char *getModifier() const { return Modifier; } + bool hasModifier() const { return Modifier != NULL; } + bool mustAddCurrentAddress() const { return AddCurrentAddress; } + unsigned getLabelId() const { return LabelId; } + bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; } + bool isStub() const { return Kind == ARMCP::CPStub; } + unsigned char getPCAdjustment() const { return PCAdjust; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + + void print(std::ostream *O) const { if (O) print(*O); } + void print(std::ostream &O) const; + void print(raw_ostream *O) const { if (O) print(*O); } + void print(raw_ostream &O) const; + void dump() const; +}; + + inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) { + V.print(O); + return O; +} + +inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { + V.print(O); + return O; +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h new file mode 100644 index 0000000..405b8f2 --- /dev/null +++ b/lib/Target/ARM/ARMFrameInfo.h @@ -0,0 +1,32 @@ +//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_FRAMEINFO_H +#define ARM_FRAMEINFO_H + +#include "ARM.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "ARMSubtarget.h" + +namespace llvm { + +class ARMFrameInfo : public TargetFrameInfo { +public: + explicit ARMFrameInfo(const ARMSubtarget &ST) + : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) { + } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp new file mode 100644 index 0000000..ca3a9cb --- /dev/null +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -0,0 +1,911 @@ +//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the ARM target. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===--------------------------------------------------------------------===// +/// ARMDAGToDAGISel - ARM specific code to select ARM machine +/// instructions for SelectionDAG operations. +/// +namespace { +class ARMDAGToDAGISel : public SelectionDAGISel { + ARMTargetMachine &TM; + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + +public: + explicit ARMDAGToDAGISel(ARMTargetMachine &tm) + : SelectionDAGISel(tm), TM(tm), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { + } + + virtual const char *getPassName() const { + return "ARM Instruction Selection"; + } + + SDNode *Select(SDValue Op); + virtual void InstructionSelect(); + bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode3Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset); + + bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, + SDValue &Label); + + bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base, + SDValue &Offset); + bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale, + SDValue &Base, SDValue &OffImm, + SDValue &Offset); + bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm, SDValue &Offset); + bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm); + + bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, + SDValue &B, SDValue &C); + + // Include the pieces autogenerated from the target description. +#include "ARMGenDAGISel.inc" + +private: + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; +} + +void ARMDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + SelectRoot(*CurDAG); + CurDAG->RemoveDeadNodes(); +} + +bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset, + SDValue &Opc) { + if (N.getOpcode() == ISD::MUL) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + // X * [3,5,9] -> X + X * [2,4,8] etc. + int RHSC = (int)RHS->getZExtValue(); + if (RHSC & 1) { + RHSC = RHSC & ~1; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + if (isPowerOf2_32(RHSC)) { + unsigned ShAmt = Log2_32(RHSC); + Base = Offset = N.getOperand(0); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, + ARM_AM::lsl), + MVT::i32); + return true; + } + } + } + } + + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return true; + } + + // Match simple R +/- imm12 operands. + if (N.getOpcode() == ISD::ADD) + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC >= 0 && RHSC < 0x1000) || + (RHSC < 0 && RHSC > -0x1000)) { // 12 bits. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, + ARM_AM::no_shift), + MVT::i32); + return true; + } + } + + // Otherwise this is R +/- [possibly shifted] R + ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + unsigned ShAmt = 0; + + Base = N.getOperand(0); + Offset = N.getOperand(1); + + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(1).getOperand(0); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + + // Try matching (R shl C) + (R). + if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't + // fold it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + } + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { + int Val = (int)C->getZExtValue(); + if (Val >= 0 && Val < 0x1000) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; + } + } + + Offset = N; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + unsigned ShAmt = 0; + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShAmt = Sh->getZExtValue(); + Offset = N.getOperand(0); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; +} + + +bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset, + SDValue &Opc) { + if (N.getOpcode() == ISD::SUB) { + // X - C is canonicalize to X + -C, no need to handle it here. + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); + return true; + } + + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); + return true; + } + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC >= 0 && RHSC < 256) || + (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32); + return true; + } + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { + int Val = (int)C->getZExtValue(); + if (Val >= 0 && Val < 256) { + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); + return true; + } + } + + Offset = N; + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32); + return true; +} + + +bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset) { + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; + } + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4. + RHSC >>= 2; + if ((RHSC >= 0 && RHSC < 256) || + (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), + MVT::i32); + return true; + } + } + } + + Base = N; + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, + SDValue &Offset, SDValue &Label) { + if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { + Offset = N.getOperand(0); + SDValue N1 = N.getOperand(1); + Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), + MVT::i32); + return true; + } + return false; +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, + SDValue &Base, SDValue &Offset){ + // FIXME dl should come from the parent load or store, not the address + DebugLoc dl = Op.getDebugLoc(); + if (N.getOpcode() != ISD::ADD) { + Base = N; + // We must materialize a zero in a reg! Returning a constant here + // wouldn't work without additional code to position the node within + // ISel's topological ordering in a place where ISel will process it + // normally. Instead, just explicitly issue a tMOVri8 node! + Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + return true; + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + return true; +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, + unsigned Scale, SDValue &Base, + SDValue &OffImm, SDValue &Offset) { + if (Scale == 4) { + SDValue TmpBase, TmpOffImm; + if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm)) + return false; // We want to select tLDRspi / tSTRspi instead. + if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() == ISD::TargetConstantPool) + return false; // We want to select tLDRpci instead. + } + + if (N.getOpcode() != ISD::ADD) { + Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N; + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // Thumb does not have [sp, r] address mode. + RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); + RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); + if ((LHSR && LHSR->getReg() == ARM::SP) || + (RHSR && RHSR->getReg() == ARM::SP)) { + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // If the RHS is + imm5 * scale, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied. + RHSC /= Scale; + if (RHSC >= 0 && RHSC < 32) { + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm, + SDValue &Offset) { + return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset); +} + +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm) { + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (N.getOpcode() != ISD::ADD) + return false; + + RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); + if (N.getOperand(0).getOpcode() == ISD::FrameIndex || + (LHSR && LHSR->getReg() == ARM::SP)) { + // If the RHS is + imm8 * scale, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if ((RHSC & 3) == 0) { // The constant is implicitly multiplied. + RHSC >>= 2; + if (RHSC >= 0 && RHSC < 256) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, + SDValue N, + SDValue &BaseReg, + SDValue &ShReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getZExtValue() & 31; + } else { + ShReg = N.getOperand(1); + } + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +/// getAL - Returns a ARMCC::AL immediate node. +static inline SDValue getAL(SelectionDAG *CurDAG) { + return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); +} + + +SDNode *ARMDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + case ISD::Constant: { + unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); + bool UseCP = true; + if (Subtarget->isThumb()) + UseCP = (Val > 255 && // MOV + ~Val > 255 && // MOV + MVN + !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL + else + UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV + ARM_AM::getSOImmVal(~Val) == -1 && // MVN + !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + if (UseCP) { + SDValue CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + TLI.getPointerTy()); + + SDNode *ResNode; + if (Subtarget->isThumb()) + ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, + CPIdx, CurDAG->getEntryNode()); + else { + SDValue Ops[] = { + CPIdx, + CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getEntryNode() + }; + ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, + Ops, 6); + } + ReplaceUses(Op, SDValue(ResNode, 0)); + return NULL; + } + + // Other cases are autogenerated. + break; + } + case ISD::FrameIndex: { + // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + if (Subtarget->isThumb()) { + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); + } else { + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5); + } + } + case ISD::ADD: { + if (!Subtarget->isThumb()) + break; + // Select add sp, c to tADDhirr. + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0)); + RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1)); + if (LHSR && LHSR->getReg() == ARM::SP) { + std::swap(N0, N1); + std::swap(LHSR, RHSR); + } + if (RHSR && RHSR->getReg() == ARM::SP) { + SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl, + Op.getValueType(), N0, N0), 0); + return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1); + } + break; + } + case ISD::MUL: + if (Subtarget->isThumb()) + break; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RHSV = C->getZExtValue(); + if (!RHSV) break; + if (isPowerOf2_32(RHSV-1)) { // 2^n+1? + SDValue V = Op.getOperand(0); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1)); + SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + } + if (isPowerOf2_32(RHSV+1)) { // 2^n-1? + SDValue V = Op.getOperand(0); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1)); + SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + } + } + break; + case ARMISD::FMRRD: + return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + Op.getOperand(0), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)); + case ISD::UMUL_LOHI: { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } + case ISD::SMUL_LOHI: { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + MVT LoadedVT = LD->getMemoryVT(); + if (AM != ISD::UNINDEXED) { + SDValue Offset, AMOpc; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + bool Match = false; + if (LoadedVT == MVT::i32 && + SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + Match = true; + } else if (LoadedVT == MVT::i16 && + SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) + ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) + : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); + } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { + if (LD->getExtensionType() == ISD::SEXTLOAD) { + if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; + } + } else { + if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + } + } + } + + if (Match) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Ops, 6); + } + } + // Other cases are autogenerated. + break; + } + case ARMISD::BRCOND: { + // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) + // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + + // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) + // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + + unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc; + SDValue Chain = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N1.getOpcode() == ISD::BasicBlock); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; + SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other, + MVT::Flag, Ops, 5); + Chain = SDValue(ResNode, 0); + if (Op.getNode()->getNumValues() == 2) { + InFlag = SDValue(ResNode, 1); + ReplaceUses(SDValue(Op.getNode(), 1), InFlag); + } + ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo())); + return NULL; + } + case ARMISD::CMOV: { + bool isThumb = Subtarget->isThumb(); + MVT VT = Op.getValueType(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Pattern complexity = 18 cost = 1 size = 0 + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (!isThumb && VT == MVT::i32 && + SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, + // (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true, + // (imm:i32):$cc) + // Emits: (MOVCCi:i32 GPR:i32:$false, + // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc) + // Pattern complexity = 10 cost = 1 size = 0 + if (VT == MVT::i32 && + N3.getOpcode() == ISD::Constant && + Predicate_so_imm(N3.getNode())) { + SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N1)->getZExtValue()), + MVT::i32); + Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode()); + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5); + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + // + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 11 size = 0 + // + // Also FCPYScc and FCPYDcc. + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::i32: + Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr; + break; + case MVT::f32: + Opc = ARM::FCPYScc; + break; + case MVT::f64: + Opc = ARM::FCPYDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + } + case ARMISD::CNEG: { + MVT VT = Op.getValueType(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDValue N3 = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(N2.getOpcode() == ISD::Constant); + assert(N3.getOpcode() == ISD::Register); + + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast<ConstantSDNode>(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT()) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::f32: + Opc = ARM::FNEGScc; + break; + case MVT::f64: + Opc = ARM::FNEGDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); + } + + case ISD::DECLARE: { + SDValue Chain = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1); + // FIXME: handle VLAs. + if (!FINode) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + if (N2.getOpcode() == ARMISD::PIC_ADD && isa<LoadSDNode>(N2.getOperand(0))) + N2 = N2.getOperand(0); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(N2); + if (!Ld) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + SDValue BasePtr = Ld->getBasePtr(); + assert(BasePtr.getOpcode() == ARMISD::Wrapper && + isa<ConstantPoolSDNode>(BasePtr.getOperand(0)) && + "llvm.dbg.variable should be a constantpool node"); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(BasePtr.getOperand(0)); + GlobalValue *GV = 0; + if (CP->isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal(); + GV = ACPV->getGV(); + } else + GV = dyn_cast<GlobalValue>(CP->getConstVal()); + if (!GV) { + ReplaceUses(Op.getValue(0), Chain); + return NULL; + } + + SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), + TLI.getPointerTy()); + SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); + SDValue Ops[] = { Tmp1, Tmp2, Chain }; + return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, + MVT::Other, Ops, 3); + } + } + + return SelectCode(Op); +} + +bool ARMDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) { + assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + + SDValue Base, Offset, Opc; + if (!SelectAddrMode2(Op, Op, Base, Offset, Opc)) + return true; + + OutOps.push_back(Base); + OutOps.push_back(Offset); + OutOps.push_back(Opc); + return false; +} + +/// createARMISelDag - This pass converts a legalized DAG into a +/// ARM-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) { + return new ARMDAGToDAGISel(TM); +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp new file mode 100644 index 0000000..c0fd9dc --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -0,0 +1,2346 @@ +//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Intrinsics.h" +#include "llvm/GlobalValue.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + +ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) + : TargetLowering(TM), ARMPCLabelIndex(0) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + + if (Subtarget->isTargetDarwin()) { + // Uses VFP for Thumb libfuncs if available. + if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + // Single-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); + setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); + setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); + setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); + + // Double-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); + setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); + setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); + setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); + + // Single-precision comparisons. + setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); + setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); + setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); + setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); + setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); + setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); + setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); + setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); + + setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); + setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); + + // Double-precision comparisons. + setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); + setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); + setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); + setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); + setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); + setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); + setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); + setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); + + setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); + setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); + + // Floating-point to integer conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); + + // Conversions between floating types. + setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); + + // Integer to floating-point conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: + // e.g., __floatunsidf vs. __floatunssidfvfp. + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); + } + } + + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, 0); + setLibcallName(RTLIB::SRL_I128, 0); + setLibcallName(RTLIB::SRA_I128, 0); + + if (Subtarget->isThumb()) + addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); + else + addRegisterClass(MVT::i32, ARM::GPRRegisterClass); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + addRegisterClass(MVT::f32, ARM::SPRRegisterClass); + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + } + computeRegisterProperties(); + + // ARM does not have f32 extending load. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // ARM does not have i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // ARM supports all 4 flavors of integer indexed load / store. + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i1, Legal); + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i1, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + } + + // i64 operation support. + if (Subtarget->isThumb()) { + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + } else { + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + if (!Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Expand); + } + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL, MVT::i64, Custom); + setOperationAction(ISD::SRA, MVT::i64, Custom); + + // ARM does not have ROTL. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + if (!Subtarget->hasV5TOps() || Subtarget->isThumb()) + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + + // Only ARMv6 has BSWAP. + if (!Subtarget->hasV6Ops()) + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + // These are expanded into libcalls. + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + // Support label based line numbers. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + setOperationAction(ISD::RET, MVT::Other, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + + // Use the default implementation. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + if (!Subtarget->hasV6Ops()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + } + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) + // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + + // We don't support sin/cos/fmod/copysign/pow + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + } + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + + // int <-> fp are custom expanded into bit_convert + ARMISD ops. + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + } + + // We have target-specific dag combine patterns for the following nodes: + // ARMISD::FMRRD - No need to call setTargetDAGCombine + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + + setStackPointerRegisterToSaveRestore(ARM::SP); + setSchedulingPreference(SchedulingForRegPressure); + setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); + setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); + + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type + // Do not enable CodePlacementOpt for now: it currently runs after the + // ARMConstantIslandPass and messes up branch relaxation and placement + // of constant islands. + // benefitFromCodePlacementOpt = true; +} + +const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case ARMISD::Wrapper: return "ARMISD::Wrapper"; + case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; + case ARMISD::CALL: return "ARMISD::CALL"; + case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; + case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::tCALL: return "ARMISD::tCALL"; + case ARMISD::BRCOND: return "ARMISD::BRCOND"; + case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::CMP: return "ARMISD::CMP"; + case ARMISD::CMPNZ: return "ARMISD::CMPNZ"; + case ARMISD::CMPFP: return "ARMISD::CMPFP"; + case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CNEG: return "ARMISD::CNEG"; + + case ARMISD::FTOSI: return "ARMISD::FTOSI"; + case ARMISD::FTOUI: return "ARMISD::FTOUI"; + case ARMISD::SITOF: return "ARMISD::SITOF"; + case ARMISD::UITOF: return "ARMISD::UITOF"; + + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; + case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; + case ARMISD::RRX: return "ARMISD::RRX"; + + case ARMISD::FMRRD: return "ARMISD::FMRRD"; + case ARMISD::FMDRR: return "ARMISD::FMDRR"; + + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + } +} + +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// + +/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC +static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown condition code!"); + case ISD::SETNE: return ARMCC::NE; + case ISD::SETEQ: return ARMCC::EQ; + case ISD::SETGT: return ARMCC::GT; + case ISD::SETGE: return ARMCC::GE; + case ISD::SETLT: return ARMCC::LT; + case ISD::SETLE: return ARMCC::LE; + case ISD::SETUGT: return ARMCC::HI; + case ISD::SETUGE: return ARMCC::HS; + case ISD::SETULT: return ARMCC::LO; + case ISD::SETULE: return ARMCC::LS; + } +} + +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It +/// returns true if the operands should be inverted to form the proper +/// comparison. +static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + ARMCC::CondCodes &CondCode2) { + bool Invert = false; + CondCode2 = ARMCC::AL; + switch (CC) { + default: assert(0 && "Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = ARMCC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = ARMCC::GE; break; + case ISD::SETOLT: CondCode = ARMCC::MI; break; + case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; + case ISD::SETO: CondCode = ARMCC::VC; break; + case ISD::SETUO: CondCode = ARMCC::VS; break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; + case ISD::SETUGT: CondCode = ARMCC::HI; break; + case ISD::SETUGE: CondCode = ARMCC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = ARMCC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = ARMCC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = ARMCC::NE; break; + } + return Invert; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "ARMGenCallingConv.inc" + +// APCS f64 is in register pairs, possibly split to stack +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const unsigned LoRegList[] = { ARM::R1, + ARM::R2, + ARM::R3, + ARM::NoRegister }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 4; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + if (LoRegList[i] != ARM::NoRegister) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + MVT::i32, LocInfo)); + return true; // we handled it +} + +// AAPCS f64 is in aligned register pairs +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + MVT::i32, LocInfo)); + return true; // we handled it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *ARMTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + DebugLoc dl = TheCall->getDebugLoc(); + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + bool isVarArg = TheCall->isVarArg(); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM); + + SmallVector<SDValue, 8> ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val; + if (VA.needsCustom()) { + // Handle f64 as custom. + SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + } else { + Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + } + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); + break; + } + + ResultVals.push_back(Val); + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/false, NULL, 0, NULL, 0); +} + +/// LowerMemOpCallTo - Store the argument to the stack. +SDValue +ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + } + return DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset); +} + +/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- +/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter +/// nodes. +SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); + MVT RetVT = TheCall->getRetValType(0); + SDValue Chain = TheCall->getChain(); + unsigned CC = TheCall->getCallingConv(); + assert((CC == CallingConv::C || + CC == CallingConv::Fast) && "unknown calling convention"); + bool isVarArg = TheCall->isVarArg(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(TheCall, CC_ARM); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization, arguments are handled later. + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(realArgIdx); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // f64 is passed in i32 pairs and must be combined + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1))); + else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, fmrrd.getValue(1), + Flags)); + } + } else if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Arg, Flags)); + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + bool isDirect = false; + bool isARMFunc = false; + bool isLocalARMFunc = false; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + GlobalValue *GV = G->getGlobal(); + isDirect = true; + bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + bool isStub = (isExt && Subtarget->isTargetDarwin()) && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && !isExt; + // tBX takes a register source operand. + if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, + ARMCP::CPStub, 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); + } else + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + isDirect = true; + bool isStub = Subtarget->isTargetDarwin() && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // tBX takes a register source operand. + const char *Sym = S->getSymbol(); + if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex, + ARMCP::CPStub, 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); + } else + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } + + // FIXME: handle tail calls differently. + unsigned CallOpc; + if (Subtarget->isThumb()) { + if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc)) + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; + } else { + CallOpc = (isDirect || Subtarget->hasV5TOps()) + ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) + : ARMISD::CALL_NOLINK; + } + if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) { + // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK + Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); + InFlag = Chain.getValue(1); + } + + std::vector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), + &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (RetVT != MVT::Other) + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} + +SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // CCValAssign - represent the assignment of the return value to a location. + SmallVector<CCValAssign, 16> RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + + // Analyze return values of ISD::RET. + CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + SDValue Arg = Op.getOperand(realRVLocIdx*2+1); + + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is + // available. + if (VA.needsCustom()) { + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Flag); + } else + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + + // Guarantee that all emitted copies are + // stuck together, avoiding something bad. + Flag = Chain.getValue(1); + } + + SDValue result; + if (Flag.getNode()) + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else // Return Void + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + + return result; +} + +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOVi. +static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + else + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); + return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); +} + +// Lower ISD::GlobalTLSAddress using the "general dynamic" model +SDValue +ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + DebugLoc dl = GA->getDebugLoc(); + MVT PtrVT = getPointerTy(); + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, + PCAdj, "tlsgd", true); + SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); + SDValue Chain = Argument.getValue(1); + + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); + + // call __tls_get_addr. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Argument; + Entry.Ty = (const Type *) Type::Int32Ty; + Args.push_back(Entry); + // FIXME: is there useful debug info available here? + std::pair<SDValue, SDValue> CallResult = + LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, + CallingConv::C, false, + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + return CallResult.first; +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" or +// "local exec" model. +SDValue +ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, + SelectionDAG &DAG) { + GlobalValue *GV = GA->getGlobal(); + DebugLoc dl = GA->getDebugLoc(); + SDValue Offset; + SDValue Chain = DAG.getEntryNode(); + MVT PtrVT = getPointerTy(); + // Get the Thread Pointer + SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + + if (GV->isDeclaration()){ + // initial exec model + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, + PCAdj, "gottpoff", true); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + Chain = Offset.getValue(1); + + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); + + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + } else { + // local exec model + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + } + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); +} + +SDValue +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { + // TODO: implement the "local dynamic" model + assert(Subtarget->isTargetELF() && + "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + // If the relocation model is PIC, use the "General Dynamic" TLS Model, + // otherwise use the "Local Exec" TLS Model + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + return LowerToTLSGeneralDynamicModel(GA, DAG); + else + return LowerToTLSExecModels(GA, DAG); +} + +SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + if (RelocM == Reloc::PIC_) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); + if (!UseGOTOFF) + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + return Result; + } else { + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + } +} + +/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol +/// even in non-static mode. +static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) + return false; + return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); +} + +SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); + SDValue CPAddr; + if (RelocM == Reloc::Static) + CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + else { + unsigned PCAdj = (RelocM != Reloc::PIC_) + ? 0 : (Subtarget->isThumb() ? 4 : 8); + ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr + : ARMCP::CPValue; + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, + Kind, PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + } + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + + if (RelocM == Reloc::PIC_) { + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + } + if (IsIndirect) + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + + return Result; +} + +SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, + SelectionDAG &DAG){ + assert(Subtarget->isTargetELF() && + "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, + ARMCP::CPValue, PCAdj); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); +} + +SDValue +ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); + switch (IntNo) { + default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_thread_pointer: + return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + case Intrinsic::eh_sjlj_setjmp: + SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, + Op.getOperand(1)); + return Res; + } +} + +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + unsigned VarArgsFrameIndex) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + DebugLoc dl = Op.getDebugLoc(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); +} + +SDValue +ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + SDValue Root = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM); + + SmallVector<SDValue, 16> ArgValues; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + if (RegVT == MVT::f64) { + // f64 is passed in pairs of GPRs and must be combined. + RegVT = MVT::i32; + } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) + assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + + // f64 is passed in i32 pairs and must be combined. + if (VA.needsCustom()) { + SDValue ArgValue2; + + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isMemLoc()) { + // must be APCS to split like this + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load node to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + } else { + Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); + } + + ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, + ArgValue, ArgValue2); + } + + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::ZExt: + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + } + + ArgValues.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); + + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // varargs + if (isVarArg) { + static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + unsigned NumGPRs = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned VARegSize = (4 - NumGPRs) * 4; + unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + unsigned ArgOffset = 0; + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + ArgOffset = CCInfo.getNextStackOffset(); + VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + + VARegSaveSize - VARegSize); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + SmallVector<SDValue, 4> MemOps; + for (; NumGPRs < 4; ++NumGPRs) { + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +/// isFloatingPointZero - Return true if this is +0.0. +static bool isFloatingPointZero(SDValue Op) { + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) + return CFP->getValueAPF().isPosZero(); + else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { + // Maybe this has already been legalized into the constant pool? + if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { + SDValue WrapperOp = Op.getOperand(1).getOperand(0); + if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + return CFP->getValueAPF().isPosZero(); + } + } + return false; +} + +static bool isLegalCmpImmediate(unsigned C, bool isThumb) { + return ( isThumb && (C & ~255U) == 0) || + (!isThumb && ARM_AM::getSOImmVal(C) != -1); +} + +/// Returns appropriate ARM CMP (cmp) and corresponding condition code for +/// the given operands. +static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, bool isThumb, + DebugLoc dl) { + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { + unsigned C = RHSC->getZExtValue(); + if (!isLegalCmpImmediate(C, isThumb)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGE: + if (isLegalCmpImmediate(C-1, isThumb)) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + RHS = DAG.getConstant(C-1, MVT::i32); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + RHS = DAG.getConstant(C-1, MVT::i32); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if (isLegalCmpImmediate(C+1, isThumb)) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + RHS = DAG.getConstant(C+1, MVT::i32); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + RHS = DAG.getConstant(C+1, MVT::i32); + } + break; + } + } + } + + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + ARMISD::NodeType CompareType; + switch (CondCode) { + default: + CompareType = ARMISD::CMP; + break; + case ARMCC::EQ: + case ARMCC::NE: + case ARMCC::MI: + case ARMCC::PL: + // Uses only N and Z Flags + CompareType = ARMISD::CMPNZ; + break; + } + ARMCC = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); +} + +/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. +static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + SDValue Cmp; + if (!isFloatingPointZero(RHS)) + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); + else + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); +} + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + + if (LHS.getValueType() == MVT::i32) { + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); + } + + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + std::swap(TrueVal, FalseVal); + + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, + ARMCC, CCR, Cmp); + if (CondCode2 != ARMCC::AL) { + SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + // FIXME: Needs another CMP because flag can have but one use. + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + Result = DAG.getNode(ARMISD::CMOV, dl, VT, + Result, TrueVal, ARMCC2, CCR, Cmp2); + } + return Result; +} + +static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + if (LHS.getValueType() == MVT::i32) { + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, + Chain, Dest, ARMCC, CCR,Cmp); + } + + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + // Swap the LHS/RHS of the comparison if needed. + std::swap(LHS, RHS); + + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + if (CondCode2 != ARMCC::AL) { + ARMCC = DAG.getConstant(CondCode2, MVT::i32); + SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + } + return Res; +} + +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + MVT PTy = getPointerTy(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, + Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); + if (isPIC) + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); +} + +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc = + Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI; + Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +} + +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc = + Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; + + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + +static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + // Implement fcopysign with a fabs and a conditional fneg. + SDValue Tmp0 = Op.getOperand(0); + SDValue Tmp1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT SrcVT = Tmp1.getValueType(); + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); + SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); + SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); +} + +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces()) + ? ARM::R7 : ARM::R11; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + +SDValue +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + MVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::f64) { + // Turn i64->f64 into FMDRR. + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + } + + // Turn f64->i64 into FMRRD. + SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); +} + +static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { + assert(N->getValueType(0) == MVT::i64 && + (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + "Unknown shift to lower!"); + + // We only lower SRA, SRL of 1 here, all others use generic lowering. + if (!isa<ConstantSDNode>(N->getOperand(1)) || + cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) + return SDValue(); + + // If we are in thumb mode, we don't have RRX. + if (ST->isThumb()) return SDValue(); + + // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. + DebugLoc dl = N->getDebugLoc(); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(1, MVT::i32)); + + // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and + // captures the result into a carry flag. + unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); + + // The low part is an ARMISD::RRX operand, which shifts the carry in. + Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); +} + +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Don't know how to custom lower this!"); abort(); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: + return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : + LowerGlobalAddressELF(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); + case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); + case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); + case ISD::RETURNADDR: break; + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::SRL: + case ISD::SRA: return ExpandSRx(Op.getNode(), DAG,Subtarget); + } + return SDValue(); +} + +/// ReplaceNodeResults - Replace the results of node with an illegal result +/// type with new values built out of custom code. +void ARMTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: + assert(0 && "Don't know how to custom expand this!"); + return; + case ISD::BIT_CONVERT: + Results.push_back(ExpandBIT_CONVERT(N, DAG)); + return; + case ISD::SRL: + case ISD::SRA: { + SDValue Res = ExpandSRx(N, DAG, Subtarget); + if (Res.getNode()) + Results.push_back(Res); + return; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Scheduler Hooks +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case ARM::tMOVCCr: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Optimization Hooks +//===----------------------------------------------------------------------===// + +static +SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + + if (isSlctCC) { + CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); + } else { + SDValue CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + + if (LHS.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(LHS)->isNullValue()) { + DoXform = true; + } else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(RHS)->isNullValue()) { + std::swap(LHS, RHS); + SDValue Op0 = Slct.getOperand(0); + MVT OpVT = isSlctCC ? Op0.getValueType() : + Op0.getOperand(0).getValueType(); + bool isInt = OpVT.isInteger(); + CC = ISD::getSetCCInverse(CC, isInt); + + if (!TLI.isCondCodeLegal(CC, OpVT)) + return SDValue(); // Inverse operator isn't legal. + + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDValue CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), + CCOp.getOperand(0), CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, OtherOp, Result); + } + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +static SDValue PerformSUBCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + + +/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. +static SDValue PerformFMRRDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // fmrrd(fmdrr x, y) -> x,y + SDValue InDouble = N->getOperand(0); + if (InDouble.getOpcode() == ARMISD::FMDRR) + return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + return SDValue(); +} + +SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); + case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + } + + return SDValue(); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode for load / store of the +/// given type. +static bool isLegalAddressImmediate(int64_t V, MVT VT, + const ARMSubtarget *Subtarget) { + if (V == 0) + return true; + + if (!VT.isSimple()) + return false; + + if (Subtarget->isThumb()) { + if (V < 0) + return false; + + unsigned Scale = 1; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + // Scale == 1; + break; + case MVT::i16: + // Scale == 2; + Scale = 2; + break; + case MVT::i32: + // Scale == 4; + Scale = 4; + break; + } + + if ((V & (Scale - 1)) != 0) + return false; + V /= Scale; + return V == (V & ((1LL << 5) - 1)); + } + + if (V < 0) + V = - V; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i32: + // +- imm12 + return V == (V & ((1LL << 12) - 1)); + case MVT::i16: + // +- imm8 + return V == (V & ((1LL << 8) - 1)); + case MVT::f32: + case MVT::f64: + if (!Subtarget->hasVFP2()) + return false; + if ((V & 3) != 0) + return false; + V >>= 2; + return V == (V & ((1LL << 8) - 1)); + } +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + MVT VT = getValueType(Ty, true); + if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) + return false; + + // Can never fold addr of global into load/store. + if (AM.BaseGV) + return false; + + switch (AM.Scale) { + case 0: // no scale reg, must be "r+i" or "r", or "i". + break; + case 1: + if (Subtarget->isThumb()) + return false; + // FALL THROUGH. + default: + // ARM doesn't support any R+R*scale+imm addr modes. + if (AM.BaseOffs) + return false; + + if (!VT.isSimple()) + return false; + + int Scale = AM.Scale; + switch (VT.getSimpleVT()) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i32: + case MVT::i64: + // This assumes i64 is legalized to a pair of i32. If not (i.e. + // ldrd / strd are used, then its address mode is same as i16. + // r + r + if (Scale < 0) Scale = -Scale; + if (Scale == 1) + return true; + // r + r << imm + return isPowerOf2_32(Scale & ~1); + case MVT::i16: + // r + r + if (((unsigned)AM.HasBaseReg + Scale) <= 2) + return true; + return false; + + case MVT::isVoid: + // Note, we allow "void" uses (basically, uses that aren't loads or + // stores), because arm allows folding a scale into many arithmetic + // operations. This should be made more precise and revisited later. + + // Allow r << imm, but the imm has to be a multiple of two. + if (AM.Scale & 1) return false; + return isPowerOf2_32(AM.Scale); + } + break; + } + return true; +} + +static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + + if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { + // AddressingMode 3 + Base = Ptr->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -256) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + return true; + } + } + isInc = (Ptr->getOpcode() == ISD::ADD); + Offset = Ptr->getOperand(1); + return true; + } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { + // AddressingMode 2 + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -0x1000) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + Base = Ptr->getOperand(0); + return true; + } + } + + if (Ptr->getOpcode() == ISD::ADD) { + isInc = true; + ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + Base = Ptr->getOperand(1); + Offset = Ptr->getOperand(0); + } else { + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + } + return true; + } + + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + return true; + } + + // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + return false; +} + +/// getPreIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if the node's address +/// can be legally represented as pre-indexed load / store address. +bool +ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + if (Subtarget->isThumb()) + return false; + + MVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; + } + return false; +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + if (Subtarget->isThumb()) + return false; + + MVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getMemoryVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + return false; +} + +void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + switch (Op.getOpcode()) { + default: break; + case ARMISD::CMOV: { + // Bits are known zero/one if known on the LHS and RHS. + DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + if (KnownZero == 0 && KnownOne == 0) return; + + APInt KnownZeroRHS, KnownOneRHS; + DAG.ComputeMaskedBits(Op.getOperand(1), Mask, + KnownZeroRHS, KnownOneRHS, Depth+1); + KnownZero &= KnownZeroRHS; + KnownOne &= KnownOneRHS; + return; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +ARMTargetLowering::ConstraintType +ARMTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 'l': return C_RegisterClass; + case 'w': return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair<unsigned, const TargetRegisterClass*> +ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'l': + if (Subtarget->isThumb()) + return std::make_pair(0U, ARM::tGPRRegisterClass); + else + return std::make_pair(0U, ARM::GPRRegisterClass); + case 'r': + return std::make_pair(0U, ARM::GPRRegisterClass); + case 'w': + if (VT == MVT::f32) + return std::make_pair(0U, ARM::SPRRegisterClass); + if (VT == MVT::f64) + return std::make_pair(0U, ARM::DPRRegisterClass); + break; + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +std::vector<unsigned> ARMTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const { + if (Constraint.size() != 1) + return std::vector<unsigned>(); + + switch (Constraint[0]) { // GCC ARM Constraint Letters + default: break; + case 'l': + return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + 0); + case 'r': + return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, ARM::R11, + ARM::R12, ARM::LR, 0); + case 'w': + if (VT == MVT::f32) + return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12,ARM::S13,ARM::S14,ARM::S15, + ARM::S16,ARM::S17,ARM::S18,ARM::S19, + ARM::S20,ARM::S21,ARM::S22,ARM::S23, + ARM::S24,ARM::S25,ARM::S26,ARM::S27, + ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); + if (VT == MVT::f64) + return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10,ARM::D11, + ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); + break; + } + + return std::vector<unsigned>(); +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char Constraint, + bool hasMemory, + std::vector<SDValue>&Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + switch (Constraint) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) + return; + + int64_t CVal64 = C->getSExtValue(); + int CVal = (int) CVal64; + // None of these constraints allow values larger than 32 bits. Check + // that the value fits in an int. + if (CVal != CVal64) + return; + + switch (Constraint) { + case 'I': + if (Subtarget->isThumb()) { + // This must be a constant between 0 and 255, for ADD immediates. + if (CVal >= 0 && CVal <= 255) + break; + } else { + // A constant that can be used as an immediate value in a + // data-processing instruction. + if (ARM_AM::getSOImmVal(CVal) != -1) + break; + } + return; + + case 'J': + if (Subtarget->isThumb()) { + // This must be a constant between -255 and -1, for negated ADD + // immediates. This can be used in GCC with an "n" modifier that + // prints the negated value, for use with SUB instructions. It is + // not useful otherwise but is implemented for compatibility. + if (CVal >= -255 && CVal <= -1) + break; + } else { + // This must be a constant between -4095 and 4095. It is not clear + // what this constraint is intended for. Implemented for + // compatibility with GCC. + if (CVal >= -4095 && CVal <= 4095) + break; + } + return; + + case 'K': + if (Subtarget->isThumb()) { + // A 32-bit value where only one byte has a nonzero value. Exclude + // zero to match GCC. This constraint is used by GCC internally for + // constants that can be loaded with a move/shift combination. + // It is not useful otherwise but is implemented for compatibility. + if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) + break; + } else { + // A constant whose bitwise inverse can be used as an immediate + // value in a data-processing instruction. This can be used in GCC + // with a "B" modifier that prints the inverted value, for use with + // BIC and MVN instructions. It is not useful otherwise but is + // implemented for compatibility. + if (ARM_AM::getSOImmVal(~CVal) != -1) + break; + } + return; + + case 'L': + if (Subtarget->isThumb()) { + // This must be a constant between -7 and 7, + // for 3-operand ADD/SUB immediate instructions. + if (CVal >= -7 && CVal < 7) + break; + } else { + // A constant whose negation can be used as an immediate value in a + // data-processing instruction. This can be used in GCC with an "n" + // modifier that prints the negated value, for use with SUB + // instructions. It is not useful otherwise but is implemented for + // compatibility. + if (ARM_AM::getSOImmVal(-CVal) != -1) + break; + } + return; + + case 'M': + if (Subtarget->isThumb()) { + // This must be a multiple of 4 between 0 and 1020, for + // ADD sp + immediate. + if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) + break; + } else { + // A power of two or a constant between 0 and 32. This is used in + // GCC for the shift amount on shifted register operands, but it is + // useful in general for any shift amounts. + if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) + break; + } + return; + + case 'N': + if (Subtarget->isThumb()) { + // This must be a constant between 0 and 31, for shift amounts. + if (CVal >= 0 && CVal <= 31) + break; + } + return; + + case 'O': + if (Subtarget->isThumb()) { + // This must be a multiple of 4 between -508 and 508, for + // ADD/SUB sp = sp + immediate. + if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) + break; + } + return; + } + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, + Ops, DAG); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h new file mode 100644 index 0000000..2dab2db --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.h @@ -0,0 +1,184 @@ +//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMISELLOWERING_H +#define ARMISELLOWERING_H + +#include "ARMSubtarget.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include <vector> + +namespace llvm { + class ARMConstantPoolValue; + + namespace ARMISD { + // ARM Specific DAG Nodes + enum NodeType { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + Wrapper, // Wrapper - A wrapper node for TargetConstantPool, + // TargetExternalSymbol, and TargetGlobalAddress. + WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable + + CALL, // Function call. + CALL_PRED, // Function call that's predicable. + CALL_NOLINK, // Function call with branch not branch-and-link. + tCALL, // Thumb function call. + BRCOND, // Conditional branch. + BR_JT, // Jumptable branch. + RET_FLAG, // Return with a flag operand. + + PIC_ADD, // Add with a PC operand and a PIC label. + + CMP, // ARM compare instructions. + CMPNZ, // ARM compare that uses only N or Z flags. + CMPFP, // ARM VFP compare instruction, sets FPSCR. + CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. + FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CNEG, // ARM conditional negate instructions. + + FTOSI, // FP to sint within a FP register. + FTOUI, // FP to uint within a FP register. + SITOF, // sint to FP within a FP register. + UITOF, // uint to FP within a FP register. + + SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. + SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. + RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + + FMRRD, // double to two gprs. + FMDRR, // Two gprs to double. + + EH_SJLJ_SETJMP, // SjLj exception handling setjmp + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp + + THREAD_POINTER + }; + } + + //===--------------------------------------------------------------------===// + // ARMTargetLowering - ARM Implementation of the TargetLowering interface + + class ARMTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + public: + explicit ARMTargetLowering(TargetMachine &TM); + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG); + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + + /// getPreIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if the node's address + /// can be legally represented as pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + /// getPostIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if this node can be + /// combined with a load / store to form a post-indexed load / store. + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const; + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + std::vector<unsigned> + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + virtual void LowerAsmOperandForConstraint(SDValue Op, + char ConstraintLetter, + bool hasMemory, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + virtual const ARMSubtarget* getSubtarget() { + return Subtarget; + } + + private: + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + + /// ARMPCLabelIndex - Keep track the number of ARM PC labels created. + /// + unsigned ARMPCLabelIndex; + + SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, const CCValAssign &VA, + SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags); + SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG); + SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerRET(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG); + SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, + SelectionDAG &DAG); + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff); + }; +} + +#endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td new file mode 100644 index 0000000..9a1e1c2 --- /dev/null +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -0,0 +1,868 @@ +//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// ARM Instruction Format Definitions. +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format<bits<5> val> { + bits<5> Value = val; +} + +def Pseudo : Format<0>; +def MulFrm : Format<1>; +def BrFrm : Format<2>; +def BrMiscFrm : Format<3>; + +def DPFrm : Format<4>; +def DPSoRegFrm : Format<5>; + +def LdFrm : Format<6>; +def StFrm : Format<7>; +def LdMiscFrm : Format<8>; +def StMiscFrm : Format<9>; +def LdStMulFrm : Format<10>; + +def ArithMiscFrm : Format<11>; +def ExtFrm : Format<12>; + +def VFPUnaryFrm : Format<13>; +def VFPBinaryFrm : Format<14>; +def VFPConv1Frm : Format<15>; +def VFPConv2Frm : Format<16>; +def VFPConv3Frm : Format<17>; +def VFPConv4Frm : Format<18>; +def VFPConv5Frm : Format<19>; +def VFPLdStFrm : Format<20>; +def VFPLdStMulFrm : Format<21>; +def VFPMiscFrm : Format<22>; + +def ThumbFrm : Format<23>; + +// Misc flag for data processing instructions that indicates whether +// the instruction has a Rn register operand. +class UnaryDP { bit isUnaryDataProc = 1; } + +//===----------------------------------------------------------------------===// + +// ARM Instruction templates. +// + +class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im, + Format f, string cstr> + : Instruction { + field bits<32> Inst; + + let Namespace = "ARM"; + + // TSFlagsFields + AddrMode AM = am; + bits<4> AddrModeBits = AM.Value; + + SizeFlagVal SZ = sz; + bits<3> SizeFlag = SZ.Value; + + IndexMode IM = im; + bits<2> IndexModeBits = IM.Value; + + Format F = f; + bits<5> Form = F.Value; + + // + // Attributes specific to ARM instructions... + // + bit isUnaryDataProc = 0; + + let Constraints = cstr; +} + +class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern> + : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, ""> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; +} + +// Almost all ARM instructions are predicable. +class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + IndexMode im, Format f, string opc, string asm, string cstr, + list<dag> pattern> + : InstARM<am, sz, im, f, cstr> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let Pattern = pattern; + list<Predicate> Predicates = [IsARM]; +} + +// Same as I except it can optionally modify CPSR. Note it's modeled as +// an input operand since by default it's a zero register. It will +// become an implicit def once it's "flipped". +class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + IndexMode im, Format f, string opc, string asm, string cstr, + list<dag> pattern> + : InstARM<am, sz, im, f, cstr> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let Pattern = pattern; + list<Predicate> Predicates = [IsARM]; +} + +// Special cases +class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + IndexMode im, Format f, string asm, string cstr, list<dag> pattern> + : InstARM<am, sz, im, f, cstr> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list<Predicate> Predicates = [IsARM]; +} + +class AI<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern>; +class AsI<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern>; +class AXI<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, asm, + "", pattern>; + +// Ctrl flow instructions +class ABI<bits<4> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, opc, + asm, "", pattern> { + let Inst{27-24} = opcod; +} +class ABXI<bits<4> opcod, dag oops, dag iops, string asm, list<dag> pattern> + : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, asm, + "", pattern> { + let Inst{27-24} = opcod; +} +class ABXIx2<dag oops, dag iops, string asm, list<dag> pattern> + : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, asm, + "", pattern>; + +// BR_JT instructions +class JTI<dag oops, dag iops, string asm, list<dag> pattern> + : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, + asm, "", pattern>; + +// addrmode1 instructions +class AI1<bits<4> opcod, dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AsI1<bits<4> opcod, dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AXI1<bits<4> opcod, dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, asm, + "", pattern> { + let Inst{24-21} = opcod; + let Inst{27-26} = {0,0}; +} +class AI1x2<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, opc, + asm, "", pattern>; + + +// addrmode2 loads and stores +class AI2<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{27-26} = {0,1}; +} + +// loads +class AI2ldw<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2ldw<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldb<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2ldb<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// stores +class AI2stw<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2stw<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stb<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AXI2stb<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Pre-indexed loads +class AI2ldwpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldbpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Pre-indexed stores +class AI2stwpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stbpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; +} + +// Post-indexed loads +class AI2ldwpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} +class AI2ldbpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} + +// Post-indexed stores +class AI2stwpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} +class AI2stbpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 1; // B bit + let Inst{24} = 0; // P bit + let Inst{27-26} = {0,1}; +} + +// addrmode3 instructions +class AI3<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern>; +class AXI3<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, asm, + "", pattern>; + +// loads +class AI3ldh<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldh<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsh<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldsh<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsb<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3ldsb<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldd<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} + +// stores +class AI3sth<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AXI3sth<dag oops, dag iops, Format f, string asm, + list<dag> pattern> + : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} +class AI3std<dag oops, dag iops, Format f, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc, + asm, "", pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit + let Inst{24} = 1; // P bit +} + +// Pre-indexed loads +class AI3ldhpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldshpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} +class AI3ldsbpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} + +// Pre-indexed stores +class AI3sthpr<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc, + asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit +} + +// Post-indexed loads +class AI3ldhpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} +class AI3ldshpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} +class AI3ldsbpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 1; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} + +// Post-indexed stores +class AI3sthpo<dag oops, dag iops, Format f, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc, + asm, cstr,pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 0; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 0; // P bit +} + + +// addrmode4 instructions +class AXI4ld<dag oops, dag iops, Format f, string asm, list<dag> pattern> + : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm, + "", pattern> { + let Inst{20} = 1; // L bit + let Inst{22} = 0; // S bit + let Inst{27-25} = 0b100; +} +class AXI4st<dag oops, dag iops, Format f, string asm, list<dag> pattern> + : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm, + "", pattern> { + let Inst{20} = 0; // L bit + let Inst{22} = 0; // S bit + let Inst{27-25} = 0b100; +} + +// Unsigned multiply, multiply-accumulate instructions. +class AMul1I<bits<7> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc, + asm, "", pattern> { + let Inst{7-4} = 0b1001; + let Inst{20} = 0; // S bit + let Inst{27-21} = opcod; +} +class AsMul1I<bits<7> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc, + asm, "", pattern> { + let Inst{7-4} = 0b1001; + let Inst{27-21} = opcod; +} + +// Most significant word multiply +class AMul2I<bits<7> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc, + asm, "", pattern> { + let Inst{7-4} = 0b1001; + let Inst{20} = 1; + let Inst{27-21} = opcod; +} + +// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y> +class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc, + asm, "", pattern> { + let Inst{4} = 0; + let Inst{7} = 1; + let Inst{20} = 0; + let Inst{27-21} = opcod; +} + +// Extend instructions. +class AExtI<bits<8> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, opc, + asm, "", pattern> { + let Inst{7-4} = 0b0111; + let Inst{27-20} = opcod; +} + +// Misc Arithmetic instructions. +class AMiscA1I<bits<8> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, opc, + asm, "", pattern> { + let Inst{27-20} = opcod; +} + +//===----------------------------------------------------------------------===// + +// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode. +class ARMPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM]; +} +class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV5TE]; +} +class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV6]; +} + +//===----------------------------------------------------------------------===// +// +// Thumb Instruction Format Definitions. +// + + +// TI - Thumb instruction. + +class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz, + string asm, string cstr, list<dag> pattern> + : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> { + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb]; +} + +class TI<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>; +class TI1<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>; +class TI2<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>; +class TI4<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>; +class TIs<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>; + +// Two-address instructions +class TIt<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>; + +// BL, BLX(1) are translated by assembler into two instructions +class TIx2<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>; + +// BR_JT instructions +class TJTI<dag outs, dag ins, string asm, list<dag> pattern> + : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>; + + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM VFP Instruction templates. +// + +// ARM VFP addrmode5 loads and stores +class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + VFPLdStFrm, opc, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1011; +} + +class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + VFPLdStFrm, opc, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1010; +} + +// Load / store multiple +class AXSI5<dag oops, dag iops, string asm, list<dag> pattern> + : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + VFPLdStMulFrm, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; +} + +class AXDI5<dag oops, dag iops, string asm, list<dag> pattern> + : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + VFPLdStMulFrm, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1010; +} + + +// Double precision, unary +class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> { + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = 0b1011; + let Inst{7-4} = opcod3; +} + +// Double precision, binary +class ADbI<bits<8> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> { + let Inst{27-20} = opcod; + let Inst{11-8} = 0b1011; +} + +// Single precision, unary +class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> { + // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding. + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = 0b1010; + let Inst{7-4} = opcod3; +} + +// Single precision, binary +class ASbI<bits<8> opcod, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> { + // Bit 22 (D bit) can be changed during instruction encoding. + let Inst{27-20} = opcod; + let Inst{11-8} = 0b1010; +} + +// VFP conversion instructions +class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, + dag oops, dag iops, string opc, string asm, list<dag> pattern> + : AI<oops, iops, VFPConv1Frm, opc, asm, pattern> { + let Inst{27-20} = opcod1; + let Inst{19-16} = opcod2; + let Inst{11-8} = opcod3; + let Inst{6} = 1; +} + +class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f, + string opc, string asm, list<dag> pattern> + : AI<oops, iops, f, opc, asm, pattern> { + let Inst{27-20} = opcod1; + let Inst{11-8} = opcod2; + let Inst{4} = 1; +} + +class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, opc, asm, pattern>; + +class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, opc, asm, pattern>; + +class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, opc, asm, pattern>; + +class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc, + string asm, list<dag> pattern> + : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, opc, asm, pattern>; + +//===----------------------------------------------------------------------===// + + +// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode. +class ThumbPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb]; +} + +class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb, HasV5T]; +} diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp new file mode 100644 index 0000000..4b0dbb5 --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -0,0 +1,1025 @@ +//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARMInstrInfo.h" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMGenInstrInfo.inc" +#include "ARMMachineFunctionInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt<bool> +EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, + cl::desc("Enable ARM 2-addr to 3-addr conv")); + +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + RI(*this, STI) { +} + + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + switch (oc) { + default: + return false; + case ARM::FCPYS: + case ARM::FCPYD: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + case ARM::MOVr: + case ARM::tMOVr: + case ARM::tMOVhir2lor: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2hir: + assert(MI.getDesc().getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "Invalid ARM MOV instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FLDD: + case ARM::FLDS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::tRestore: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FSTD: + case ARM::FSTS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::tSpill: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { + DebugLoc dl = Orig->getDebugLoc(); + if (Orig->getOpcode() == ARM::MOVi2pieces) { + RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(), + Orig->getOperand(2).getImm(), + Orig->getOperand(3).getReg(), this, false, dl); + return; + } + + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); +} + +static unsigned getUnindexedOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::LDR_PRE: + case ARM::LDR_POST: + return ARM::LDR; + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + return ARM::LDRH; + case ARM::LDRB_PRE: + case ARM::LDRB_POST: + return ARM::LDRB; + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + return ARM::LDRSH; + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + return ARM::LDRSB; + case ARM::STR_PRE: + case ARM::STR_POST: + return ARM::STR; + case ARM::STRH_PRE: + case ARM::STRH_POST: + return ARM::STRH; + case ARM::STRB_PRE: + case ARM::STRB_POST: + return ARM::STRB; + } + return 0; +} + +MachineInstr * +ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + if (!EnableARM3Addr) + return NULL; + + MachineInstr *MI = MBBI; + MachineFunction &MF = *MI->getParent()->getParent(); + unsigned TSFlags = MI->getDesc().TSFlags; + bool isPre = false; + switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { + default: return NULL; + case ARMII::IndexModePre: + isPre = true; + break; + case ARMII::IndexModePost: + break; + } + + // Try splitting an indexed load/store to an un-indexed one plus an add/sub + // operation. + unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); + if (MemOpc == 0) + return NULL; + + MachineInstr *UpdateMI = NULL; + MachineInstr *MemMI = NULL; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + bool isLoad = !TID.mayStore(); + const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); + const MachineOperand &Base = MI->getOperand(2); + const MachineOperand &Offset = MI->getOperand(NumOps-3); + unsigned WBReg = WB.getReg(); + unsigned BaseReg = Base.getReg(); + unsigned OffReg = Offset.getReg(); + unsigned OffImm = MI->getOperand(NumOps-2).getImm(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); + switch (AddrMode) { + default: + assert(false && "Unknown indexed op!"); + return NULL; + case ARMII::AddrMode2: { + bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + if (OffReg == 0) { + int SOImmVal = ARM_AM::getSOImmVal(Amt); + if (SOImmVal == -1) + // Can't encode it in a so_imm operand. This transformation will + // add more than 1 instruction. Abandon! + return NULL; + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(SOImmVal) + .addImm(Pred).addReg(0).addReg(0); + } else if (Amt != 0) { + ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); + unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) + .addImm(Pred).addReg(0).addReg(0); + } else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + case ARMII::AddrMode3 : { + bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM3Offset(OffImm); + if (OffReg == 0) + // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(Amt) + .addImm(Pred).addReg(0).addReg(0); + else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + } + + std::vector<MachineInstr*> NewMIs; + if (isPre) { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + NewMIs.push_back(MemMI); + NewMIs.push_back(UpdateMI); + } else { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + if (WB.isDead()) + UpdateMI->getOperand(0).setIsDead(); + NewMIs.push_back(UpdateMI); + NewMIs.push_back(MemMI); + } + + // Transfer LiveVariables states, kill / dead info. + if (LV) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + if (MO.isDef()) { + MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; + if (MO.isDead()) + LV->addVirtualRegisterDead(Reg, NewMI); + } + if (MO.isUse() && MO.isKill()) { + for (unsigned j = 0; j < 2; ++j) { + // Look at the two new MI's in reverse order. + MachineInstr *NewMI = NewMIs[j]; + if (!NewMI->readsRegister(Reg)) + continue; + LV->addVirtualRegisterKilled(Reg, NewMI); + if (VI.removeKill(MI)) + VI.Kills.push_back(NewMI); + break; + } + } + } + } + } + + MFI->insert(MBBI, NewMIs[1]); + MFI->insert(MBBI, NewMIs[0]); + return NewMIs[0]; +} + +// Branch analysis. +bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastOpc == ARM::B || LastOpc == ARM::tB) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it. + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(1)); + Cond.push_back(SecondLastInst->getOperand(2)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // ...likewise if it ends with a branch table followed by an unconditional + // branch. The branch folder can create these, and we must get rid of them for + // correctness of Thumb constant islands. + if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm || + SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != BccOpc) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "ARM branch conditions have two components!"); + + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch? + BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + else + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + return 1; + } + + // Two-way conditional branch. + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + return 2; +} + +bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (!AFI->isThumbFunction()) { + if (DestRC == ARM::GPRRegisterClass) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg))); + return true; + } + } else { + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } + } + } + if (DestRC != SrcRC) { + // Not yet supported! + return false; + } + + + if (DestRC == ARM::SPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + .addReg(SrcReg)); + else if (DestRC == ARM::DPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + .addReg(SrcReg)); + else + return false; + + return true; +} + +void ARMInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert (!AFI->isThumbFunction()); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addReg(0).addImm(0)); + } else if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert (AFI->isThumbFunction()); + BuildMI(MBB, I, DL, get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); + } +} + +void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + if (RC == ARM::GPRRegisterClass) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->isThumbFunction()) { + Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR; + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; + } + Opc = ARM::STR; + } else if (RC == ARM::DPRRegisterClass) { + Opc = ARM::FSTD; + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + Opc = ARM::FSTS; + } + + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + AddDefaultPred(MIB); + NewMIs.push_back(MIB); + return; +} + +void ARMInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert (!AFI->isThumbFunction()); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) + .addFrameIndex(FI).addReg(0).addImm(0)); + } else if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert (AFI->isThumbFunction()); + BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + .addFrameIndex(FI).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + .addFrameIndex(FI).addImm(0)); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + .addFrameIndex(FI).addImm(0)); + } +} + +void ARMInstrInfo:: +loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + if (RC == ARM::GPRRegisterClass) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->isThumbFunction()) { + Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; + } + Opc = ARM::LDR; + } else if (RC == ARM::DPRRegisterClass) { + Opc = ARM::FLDD; + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + Opc = ARM::FLDS; + } + + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + AddDefaultPred(MIB); + NewMIs.push_back(MIB); + return; +} + +bool ARMInstrInfo:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } + return true; +} + +bool ARMInstrInfo:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); + MBB.insert(MI, PopMI); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (Reg == ARM::LR) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + Reg = ARM::PC; + PopMI->setDesc(get(ARM::tPOP_RET)); + MBB.erase(MI); + } + PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + return true; +} + +MachineInstr *ARMInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + switch (Opc) { + default: break; + case ARM::MOVr: { + if (MI->getOperand(4).getReg() == ARM::CPSR) + // If it is updating CPSR, then it cannot be folded. + break; + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + break; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + break; + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0); + } + break; + } + case ARM::FCPYS: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + .addReg(SrcReg).addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg) + .addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + case ARM::FCPYD: { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } + break; + } + } + + return NewMI; +} + +bool ARMInstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + if (Ops.size() != 1) return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::MOVr: + // If it is updating CPSR, then it cannot be folded. + return MI->getOperand(4).getReg() != ARM::CPSR; + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + return false; + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + // tRestore cannot target a high register operand. + return false; + } + return true; + } + case ARM::FCPYS: + case ARM::FCPYD: + return true; + } + + return false; +} + +bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::BX_RET: // Return. + case ARM::LDM_RET: + case ARM::tBX_RET: + case ARM::tBX_RET_vararg: + case ARM::tPOP_RET: + case ARM::B: + case ARM::tB: // Uncond branch. + case ARM::tBR_JTr: + case ARM::BR_JTr: // Jumptable branch. + case ARM::BR_JTm: // Jumptable branch through mem. + case ARM::BR_JTadd: // Jumptable branch add to pc. + return true; + default: return false; + } +} + +bool ARMInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; +} + +bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + +bool ARMInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::B || Opc == ARM::tB) { + MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc)); + MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); + MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + return true; + } + + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx != -1) { + MachineOperand &PMO = MI->getOperand(PIdx); + PMO.setImm(Pred[0].getImm()); + MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); + return true; + } + return false; +} + +bool ARMInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + if (Pred1.size() > 2 || Pred2.size() > 2) + return false; + + ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); + ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); + if (CC1 == CC2) + return true; + + switch (CC1) { + default: + return false; + case ARMCC::AL: + return true; + case ARMCC::HS: + return CC2 == ARMCC::HI; + case ARMCC::LS: + return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; + case ARMCC::GE: + return CC2 == ARMCC::GT; + case ARMCC::LE: + return CC2 == ARMCC::LT; + } +} + +bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + return false; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == ARM::CPSR) { + Pred.push_back(MO); + Found = true; + } + } + + return Found; +} + + +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) DISABLE_INLINE; +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) { + return JT[JTI].MBBs.size(); +} + +/// GetInstSize - Return the size of the specified MachineInstr. +/// +unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo(); + + // Basic size info comes from the TSFlags field. + const TargetInstrDesc &TID = MI->getDesc(); + unsigned TSFlags = TID.TSFlags; + + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: { + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); + if (MI->isLabel()) + return 0; + switch (MI->getOpcode()) { + default: + assert(0 && "Unknown or unset size field for instr!"); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + return 0; + } + break; + } + case ARMII::Size8Bytes: return 8; // Arm instruction x 2. + case ARMII::Size4Bytes: return 4; // Arm instruction. + case ARMII::Size2Bytes: return 2; // Thumb instruction. + case ARMII::SizeSpecial: { + switch (MI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_setjmp: return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. + unsigned NumOps = TID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + return getNumJTEntries(JT, JTI) * 4 + + (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4); + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } + } + } + return 0; // Not reached +} diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h new file mode 100644 index 0000000..13ff3fe --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -0,0 +1,258 @@ +//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMINSTRUCTIONINFO_H +#define ARMINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARM.h" + +namespace llvm { + class ARMSubtarget; + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace ARMII { + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This four-bit field describes the addressing mode used. + + AddrModeMask = 0xf, + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrModeT1 = 6, + AddrModeT2 = 7, + AddrModeT4 = 8, + AddrModeTs = 9, // i8 * 4 for pc and sp relative data + + // Size* - Flags to keep track of the size of an instruction. + SizeShift = 4, + SizeMask = 7 << SizeShift, + SizeSpecial = 1, // 0 byte pseudo or special case. + Size8Bytes = 2, + Size4Bytes = 3, + Size2Bytes = 4, + + // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load + // and store ops + IndexModeShift = 7, + IndexModeMask = 3 << IndexModeShift, + IndexModePre = 1, + IndexModePost = 2, + + //===------------------------------------------------------------------===// + // Misc flags. + + // UnaryDP - Indicates this is a unary data processing instruction, i.e. + // it doesn't have a Rn operand. + UnaryDP = 1 << 9, + + //===------------------------------------------------------------------===// + // Instruction encoding formats. + // + FormShift = 10, + FormMask = 0x1f << FormShift, + + // Pseudo instructions + Pseudo = 0 << FormShift, + + // Multiply instructions + MulFrm = 1 << FormShift, + + // Branch instructions + BrFrm = 2 << FormShift, + BrMiscFrm = 3 << FormShift, + + // Data Processing instructions + DPFrm = 4 << FormShift, + DPSoRegFrm = 5 << FormShift, + + // Load and Store + LdFrm = 6 << FormShift, + StFrm = 7 << FormShift, + LdMiscFrm = 8 << FormShift, + StMiscFrm = 9 << FormShift, + LdStMulFrm = 10 << FormShift, + + // Miscellaneous arithmetic instructions + ArithMiscFrm = 11 << FormShift, + + // Extend instructions + ExtFrm = 12 << FormShift, + + // VFP formats + VFPUnaryFrm = 13 << FormShift, + VFPBinaryFrm = 14 << FormShift, + VFPConv1Frm = 15 << FormShift, + VFPConv2Frm = 16 << FormShift, + VFPConv3Frm = 17 << FormShift, + VFPConv4Frm = 18 << FormShift, + VFPConv5Frm = 19 << FormShift, + VFPLdStFrm = 20 << FormShift, + VFPLdStMulFrm = 21 << FormShift, + VFPMiscFrm = 22 << FormShift, + + // Thumb format + ThumbFrm = 23 << FormShift, + + //===------------------------------------------------------------------===// + // Field shifts - such shifts are used to set field while generating + // machine instructions. + M_BitShift = 5, + ShiftImmShift = 5, + ShiftShift = 7, + N_BitShift = 7, + ImmHiShift = 8, + SoRotImmShift = 8, + RegRsShift = 8, + ExtRotImmShift = 10, + RegRdLoShift = 12, + RegRdShift = 12, + RegRdHiShift = 16, + RegRnShift = 16, + S_BitShift = 20, + W_BitShift = 21, + AM3_I_BitShift = 22, + D_BitShift = 22, + U_BitShift = 23, + P_BitShift = 24, + I_BitShift = 25, + CondShift = 28 + }; +} + +class ARMInstrInfo : public TargetInstrInfoImpl { + const ARMRegisterInfo RI; +public: + explicit ARMInstrInfo(const ARMSubtarget &STI); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, const MachineInstr *Orig) const; + + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + // Predication support. + virtual bool isPredicated(const MachineInstr *MI) const; + + ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() + : ARMCC::AL; + } + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + /// GetInstSize - Returns the size of the specified MachineInstr. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td new file mode 100644 index 0000000..680e772 --- /dev/null +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -0,0 +1,1390 @@ +//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM specific DAG Nodes. +// + +// Type profiles. +def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; + +def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; + +def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; + +def SDT_ARMCMov : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def SDT_ARMBrcond : SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; + +def SDT_ARMBrJT : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + +def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; + +def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +// Node definitions. +def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; +def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; + +def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, + [SDNPInFlag]>; +def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, + [SDNPInFlag]>; + +def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, + [SDNPHasChain]>; + +def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, + [SDNPOutFlag]>; + +def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp, + [SDNPOutFlag]>; + +def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; + +def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; + +def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; + +//===----------------------------------------------------------------------===// +// ARM Flag Definitions. + +class RegConstraint<string C> { + string Constraints = C; +} + +//===----------------------------------------------------------------------===// +// ARM specific transformation functions and pattern fragments. +// + +// so_imm_XFORM - Return a so_imm value packed into the format described for +// so_imm def below. +def so_imm_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()), + MVT::i32); +}]>; + +// so_imm_neg_XFORM - Return a so_imm value packed into the format described for +// so_imm_neg def below. +def so_imm_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()), + MVT::i32); +}]>; + +// so_imm_not_XFORM - Return a so_imm value packed into the format described for +// so_imm_not def below. +def so_imm_not_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()), + MVT::i32); +}]>; + +// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24. +def rot_imm : PatLeaf<(i32 imm), [{ + int32_t v = (int32_t)N->getZExtValue(); + return v == 8 || v == 16 || v == 24; +}]>; + +/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. +def imm1_15 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; +}]>; + +/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. +def imm16_31 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; +}]>; + +def so_imm_neg : + PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1; + }], so_imm_neg_XFORM>; + +def so_imm_not : + PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1; + }], so_imm_not_XFORM>; + +// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. +def sext_16_node : PatLeaf<(i32 GPR:$a), [{ + return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; +}]>; + +class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// Branch target. +def brtarget : Operand<OtherVT>; + +// A list of registers separated by comma. Used by load/store multiple. +def reglist : Operand<i32> { + let PrintMethod = "printRegisterList"; +} + +// An operand for the CONSTPOOL_ENTRY pseudo-instruction. +def cpinst_operand : Operand<i32> { + let PrintMethod = "printCPInstOperand"; +} + +def jtblock_operand : Operand<i32> { + let PrintMethod = "printJTBlockOperand"; +} + +// Local PC labels. +def pclabel : Operand<i32> { + let PrintMethod = "printPCLabel"; +} + +// shifter_operand operands: so_reg and so_imm. +def so_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectShifterOperandReg", + [shl,srl,sra,rotr]> { + let PrintMethod = "printSORegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} + +// so_imm - Match a 32-bit shifter_operand immediate operand, which is an +// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are +// represented in the imm field in the same 12-bit form that they are encoded +// into so_imm instructions: the 8-bit immediate is the least significant bits +// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. +def so_imm : Operand<i32>, + PatLeaf<(imm), + [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }], + so_imm_XFORM> { + let PrintMethod = "printSOImmOperand"; +} + +// Break so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to +// get the first/second pieces. +def so_imm2part : Operand<i32>, + PatLeaf<(imm), [{ + return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); + }]> { + let PrintMethod = "printSOImm2PartOperand"; +} + +def so_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + +def so_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + + +// Define ARM specific addressing modes. + +// addrmode2 := reg +/- reg shop imm +// addrmode2 := reg +/- imm12 +// +def addrmode2 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode2", []> { + let PrintMethod = "printAddrMode2Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + +def am2offset : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> { + let PrintMethod = "printAddrMode2OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode3 := reg +/- reg +// addrmode3 := reg +/- imm8 +// +def addrmode3 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode3", []> { + let PrintMethod = "printAddrMode3Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + +def am3offset : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> { + let PrintMethod = "printAddrMode3OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode4 := reg, <mode|W> +// +def addrmode4 : Operand<i32>, + ComplexPattern<i32, 2, "", []> { + let PrintMethod = "printAddrMode4Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode5 := reg +/- imm8*4 +// +def addrmode5 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5", []> { + let PrintMethod = "printAddrMode5Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmodepc := pc + reg +// +def addrmodepc : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrModePC", []> { + let PrintMethod = "printAddrModePCOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// ARM Predicate operand. Default to 14 = always (AL). Second part is CC +// register whose default is 0 (no register). +def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), + (ops (i32 14), (i32 zero_reg))> { + let PrintMethod = "printPredicateOperand"; +} + +// Conditional code result for instructions whose 's' bit is set, e.g. subs. +// +def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { + let PrintMethod = "printSBitModifierOperand"; +} + +//===----------------------------------------------------------------------===// +// ARM Instruction flags. These need to match ARMInstrInfo.h. +// + +// Addressing mode. +class AddrMode<bits<4> val> { + bits<4> Value = val; +} +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrModeT1 : AddrMode<6>; +def AddrModeT2 : AddrMode<7>; +def AddrModeT4 : AddrMode<8>; +def AddrModeTs : AddrMode<9>; + +// Instruction size. +class SizeFlagVal<bits<3> val> { + bits<3> Value = val; +} +def SizeInvalid : SizeFlagVal<0>; // Unset. +def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. +def Size8Bytes : SizeFlagVal<2>; +def Size4Bytes : SizeFlagVal<3>; +def Size2Bytes : SizeFlagVal<4>; + +// Load / store index mode. +class IndexMode<bits<2> val> { + bits<2> Value = val; +} +def IndexModeNone : IndexMode<0>; +def IndexModePre : IndexMode<1>; +def IndexModePost : IndexMode<2>; + +//===----------------------------------------------------------------------===// + +include "ARMInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Multiclass helpers... +// + +/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a +/// binop that produces a value. +multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> { + def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + opc, " $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>; + def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, + opc, " $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>; + def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + opc, " $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>; +} + +/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the +/// instruction modifies the CSPR register. +let Defs = [CPSR] in { +multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> { + def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + opc, "s $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>; + def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, + opc, "s $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>; + def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + opc, "s $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>; +} +} + +/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// patterns. Similar to AsI1_bin_irs except the instruction does not produce +/// a explicit result, only implicitly set CPSR. +let Defs = [CPSR] in { +multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { + def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, + opc, " $a, $b", + [(opnode GPR:$a, so_imm:$b)]>; + def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, + opc, " $a, $b", + [(opnode GPR:$a, GPR:$b)]>; + def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + opc, " $a, $b", + [(opnode GPR:$a, so_reg:$b)]>; +} +} + +/// AI_unary_rrot - A unary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +/// FIXME: Remove the 'r' variant. Its rot_imm is zero. +multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> { + def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src), + opc, " $dst, $Src", + [(set GPR:$dst, (opnode GPR:$Src))]>, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } + def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src, i32imm:$rot), + opc, " $dst, $Src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } +} + +/// AI_bin_rrot - A binary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { + def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), + opc, " $dst, $LHS, $RHS", + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + Requires<[IsARM, HasV6]>; + def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + opc, " $dst, $LHS, $RHS, ror $rot", + [(set GPR:$dst, (opnode GPR:$LHS, + (rotr GPR:$RHS, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]>; +} + +/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and +/// setting carry bit. But it can optionally set CPSR. +let Uses = [CPSR] in { +multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> { + def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s), + DPFrm, !strconcat(opc, "${s} $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>; + def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s), + DPFrm, !strconcat(opc, "${s} $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>; + def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s), + DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>; +} +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in +/// the function. The first operand is the ID# for this instruction, the second +/// is the index into the MachineConstantPool that this is, the third is the +/// size in bytes of this constant pool entry. +let isNotDuplicable = 1 in +def CONSTPOOL_ENTRY : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), + "${instid:label} ${cpidx:cpentry}", []>; + +let Defs = [SP], Uses = [SP] in { +def ADJCALLSTACKUP : +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), + "@ ADJCALLSTACKUP $amt1", + [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; + +def ADJCALLSTACKDOWN : +PseudoInst<(outs), (ins i32imm:$amt, pred:$p), + "@ ADJCALLSTACKDOWN $amt", + [(ARMcallseq_start timm:$amt)]>; +} + +def DWARF_LOC : +PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), + ".loc $file, $line, $col", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; + + +// Address computation and loads and stores in PIC mode. +let isNotDuplicable = 1 in { +def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), + Pseudo, "$cp:\n\tadd$p $dst, pc, $a", + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; + +let AddedComplexity = 10 in { +let canFoldAsLoad = 1 in +def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr$p $dst, $addr", + [(set GPR:$dst, (load addrmodepc:$addr))]>; + +def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; + +def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; + +def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; + +def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; +} +let AddedComplexity = 10 in { +def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr$p $src, $addr", + [(store GPR:$src, addrmodepc:$addr)]>; + +def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr", + [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; + +def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr", + [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; +} +} // isNotDuplicable = 1 + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1 in + def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; +} + +// FIXME: remove when we have a way to marking a MI with these properties. +// FIXME: $dst1 should be a def. But the extra ops must be in the end of the +// operand list. +// FIXME: Should pc be an implicit operand like PICADD, etc? +let isReturn = 1, isTerminator = 1 in + def LDM_RET : AXI4ld<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), + LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + []>; + +let isCall = 1, + Defs = [R0, R1, R2, R3, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { + def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>; + + def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>; + + // ARMv5T and above + def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + "blx $func", + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> { + let Inst{7-4} = 0b0011; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } + + let Uses = [LR] in { + // ARMv4T + def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), + "mov lr, pc\n\tbx $func", + [(ARMcall_nolink GPR:$func)]>; + } +} + +let isBranch = 1, isTerminator = 1 in { + // B is "predicable" since it can be xformed into a Bcc. + let isBarrier = 1 in { + let isPredicable = 1 in + def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target", + [(br bb:$target)]>; + + let isNotDuplicable = 1, isIndirectBranch = 1 in { + def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), + "mov pc, $target \n$jt", + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{20} = 0; // S Bit + let Inst{24-21} = 0b1101; + let Inst{27-26} = {0,0}; + } + def BR_JTm : JTI<(outs), + (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), + "ldr pc, $target \n$jt", + [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, + imm:$id)]> { + let Inst{20} = 1; // L bit + let Inst{21} = 0; // W bit + let Inst{22} = 0; // B bit + let Inst{24} = 1; // P bit + let Inst{27-26} = {0,1}; + } + def BR_JTadd : JTI<(outs), + (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), + "add pc, $target, $idx \n$jt", + [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, + imm:$id)]> { + let Inst{20} = 0; // S bit + let Inst{24-21} = 0b0100; + let Inst{27-26} = {0,0}; + } + } // isNotDuplicable = 1, isIndirectBranch = 1 + } // isBarrier = 1 + + // FIXME: should be able to write a pattern for ARMBrcond, but can't use + // a two-value operand where a dag node expects two operands. :( + def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), + "b", " $target", + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; +} + +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// + +// Load +let canFoldAsLoad = 1 in +def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr", + [(set GPR:$dst, (load addrmode2:$addr))]>; + +// Special LDR for loads from non-pc-relative constpools. +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr", []>; + +// Loads with zero extension +def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "h $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; + +def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + "ldr", "b $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; + +// Loads with sign extension +def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; + +def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; + +let mayLoad = 1 in { +// Load doubleword +def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "d $dst, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed loads +def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode2:$addr), LdFrm, + "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), LdFrm, + "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode2:$addr), LdFrm, + "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdFrm, + "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, + "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, + "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; +} + +// Store +def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, + "str", " $src, $addr", + [(store GPR:$src, addrmode2:$addr)]>; + +// Stores with truncate +def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + "str", "h $src, $addr", + [(truncstorei16 GPR:$src, addrmode3:$addr)]>; + +def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, + "str", "b $src, $addr", + [(truncstorei8 GPR:$src, addrmode2:$addr)]>; + +// Store doubleword +let mayStore = 1 in +def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + "str", "d $src, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed stores +def STR_PRE : AI2stwpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, + "str", " $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STR_POST : AI2stwpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", " $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + "str", "h $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; + +def STRH_POST: AI3sthpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + "str", "h $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti16 GPR:$src, + GPR:$base, am3offset:$offset))]>; + +def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", "b $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; + +def STRB_POST: AI2stbpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + "str", "b $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +// FIXME: $dst1 should be a def. +let mayLoad = 1 in +def LDM : AXI4ld<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), + LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + []>; + +let mayStore = 1 in +def STM : AXI4st<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops), + LdStMulFrm, "stm${p}${addr:submode} $addr, $src1", + []>; + +//===----------------------------------------------------------------------===// +// Move Instructions. +// + +def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, + "mov", " $dst, $src", []>, UnaryDP; +def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, + "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, + "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP; + +def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", " $dst, $src, rrx", + [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP; + +// These aren't really mov instructions, but we have to define them this way +// due to flag operands. + +let Defs = [CPSR] in { +def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", "s $dst, $src, lsr #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; +def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + "mov", "s $dst, $src, asr #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; +} + +//===----------------------------------------------------------------------===// +// Extend Instructions. +// + +// Sign extenders + +defm SXTB : AI_unary_rrot<0b01101010, + "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm SXTH : AI_unary_rrot<0b01101011, + "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; + +defm SXTAB : AI_bin_rrot<0b01101010, + "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; +defm SXTAH : AI_bin_rrot<0b01101011, + "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; + +// TODO: SXT(A){B|H}16 + +// Zero extenders + +let AddedComplexity = 16 in { +defm UXTB : AI_unary_rrot<0b01101110, + "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm UXTH : AI_unary_rrot<0b01101111, + "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm UXTB16 : AI_unary_rrot<0b01101100, + "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; + +def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 24)>; +def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 8)>; + +defm UXTAB : AI_bin_rrot<0b01101110, "uxtab", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; +defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; +} + +// This isn't safe in general, the add is two 16-bit units, not a 32-bit add. +//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>; + +// TODO: UXT(A){B|H}16 + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +defm ADD : AsI1_bin_irs<0b0100, "add", + BinOpFrag<(add node:$LHS, node:$RHS)>>; +defm SUB : AsI1_bin_irs<0b0010, "sub", + BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +// ADD and SUB with 's' bit set. +defm ADDS : ASI1_bin_s_irs<0b0100, "add", + BinOpFrag<(addc node:$LHS, node:$RHS)>>; +defm SUBS : ASI1_bin_s_irs<0b0010, "sub", + BinOpFrag<(subc node:$LHS, node:$RHS)>>; + +// FIXME: Do not allow ADC / SBC to be predicated for now. +defm ADC : AsXI1_bin_c_irs<0b0101, "adc", + BinOpFrag<(adde node:$LHS, node:$RHS)>>; +defm SBC : AsXI1_bin_c_irs<0b0110, "sbc", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; + +// These don't define reg/reg forms, because they are handled above. +def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + "rsb", " $dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>; + +def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + "rsb", " $dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>; + +// RSB with 's' bit set. +let Defs = [CPSR] in { +def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, + "rsb", "s $dst, $a, $b", + [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>; +def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, + "rsb", "s $dst, $a, $b", + [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>; +} + +// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR. +let Uses = [CPSR] in { +def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s), + DPFrm, "rsc${s} $dst, $a, $b", + [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>; +def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s), + DPSoRegFrm, "rsc${s} $dst, $a, $b", + [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>; +} + +// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +def : ARMPat<(add GPR:$src, so_imm_neg:$imm), + (SUBri GPR:$src, so_imm_neg:$imm)>; + +//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), +// (SUBSri GPR:$src, so_imm_neg:$imm)>; +//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm), +// (SBCri GPR:$src, so_imm_neg:$imm)>; + +// Note: These are implemented in C++ code, because they have to generate +// ADD/SUBrs instructions, which use a complex pattern that a xform function +// cannot produce. +// (mul X, 2^n+1) -> (add (X << n), X) +// (mul X, 2^n-1) -> (rsb X, (X << n)) + + +//===----------------------------------------------------------------------===// +// Bitwise Instructions. +// + +defm AND : AsI1_bin_irs<0b0000, "and", + BinOpFrag<(and node:$LHS, node:$RHS)>>; +defm ORR : AsI1_bin_irs<0b1100, "orr", + BinOpFrag<(or node:$LHS, node:$RHS)>>; +defm EOR : AsI1_bin_irs<0b0001, "eor", + BinOpFrag<(xor node:$LHS, node:$RHS)>>; +defm BIC : AsI1_bin_irs<0b1110, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + +def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, + "mvn", " $dst, $src", + [(set GPR:$dst, (not GPR:$src))]>, UnaryDP; +def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, + "mvn", " $dst, $src", + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, + "mvn", " $dst, $imm", + [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP; + +def : ARMPat<(and GPR:$src, so_imm_not:$imm), + (BICri GPR:$src, so_imm_not:$imm)>; + +//===----------------------------------------------------------------------===// +// Multiply Instructions. +// + +def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + "mul", " $dst, $a, $b", + [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; + +def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "mla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; + +// Extra precision multiplies with low / high results +def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "smull", " $ldst, $hdst, $a, $b", []>; + +def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umull", " $ldst, $hdst, $a, $b", []>; + +// Multiply + accumulate +def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "smlal", " $ldst, $hdst, $a, $b", []>; + +def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umlal", " $ldst, $hdst, $a, $b", []>; + +def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), + (ins GPR:$a, GPR:$b), + "umaal", " $ldst, $hdst, $a, $b", []>, + Requires<[IsARM, HasV6]>; + +// Most significant word multiply +def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + "smmul", " $dst, $a, $b", + [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0001; + let Inst{15-12} = 0b1111; +} + +def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0001; +} + + +def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "smmls", " $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1101; +} + +multiclass AI_smul<string opc, PatFrag opnode> { + def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "bt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } + + def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "tt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } + + def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wb"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + !strconcat(opc, "wt"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } +} + + +multiclass AI_smla<string opc, PatFrag opnode> { + def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, + (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } + + def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; + } + + def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; + } + + def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; + } + + def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16)))]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; + } +} + +defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; + +// TODO: Halfword multiple accumulate long: SMLAL<x><y> +// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD + +//===----------------------------------------------------------------------===// +// Misc. Arithmetic Instructions. +// + +def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), + "clz", " $dst, $src", + [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> { + let Inst{7-4} = 0b0001; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), + "rev", " $dst, $src", + [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), + "rev16", " $dst, $src", + [(set GPR:$dst, + (or (and (srl GPR:$src, 8), 0xFF), + (or (and (shl GPR:$src, 8), 0xFF00), + (or (and (srl GPR:$src, 8), 0xFF0000), + (and (shl GPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), + "revsh", " $dst, $src", + [(set GPR:$dst, + (sext_inreg + (or (srl (and GPR:$src, 0xFF00), 8), + (shl GPR:$src, 8)), i16))]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; + let Inst{19-16} = 0b1111; +} + +def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), + (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhbt", " $dst, $src1, $src2, LSL $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), + (and (shl GPR:$src2, (i32 imm:$shamt)), + 0xFFFF0000)))]>, + Requires<[IsARM, HasV6]> { + let Inst{6-4} = 0b001; +} + +// Alternate cases for PKHBT where identities eliminate some nodes. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), + (PKHBT GPR:$src1, GPR:$src2, 0)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), + (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + + +def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), + (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhtb", " $dst, $src1, $src2, ASR $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), + (and (sra GPR:$src2, imm16_31:$shamt), + 0xFFFF)))]>, Requires<[IsARM, HasV6]> { + let Inst{6-4} = 0b101; +} + +// Alternate cases for PKHTB where identities eliminate some nodes. Note that +// a shift amount of 0 is *not legal* here, it is PKHBT instead. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)), + (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), + (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +defm CMP : AI1_cmp_irs<0b1010, "cmp", + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm CMN : AI1_cmp_irs<0b1011, "cmn", + BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; + +// Note that TST/TEQ don't set all the same flags that CMP does! +defm TST : AI1_cmp_irs<0b1000, "tst", + BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>; +defm TEQ : AI1_cmp_irs<0b1001, "teq", + BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>; + +defm CMPnz : AI1_cmp_irs<0b1010, "cmp", + BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>; +defm CMNnz : AI1_cmp_irs<0b1011, "cmn", + BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>; + +def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + +def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + + +// Conditional moves +// FIXME: should be able to write a pattern for ARMcmov, but can't use +// a two-value operand where a dag node expects two operands. :( +def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + +def MOVCCs : AI1<0b1101, (outs GPR:$dst), + (ins GPR:$false, so_reg:$true), DPSoRegFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + +def MOVCCi : AI1<0b1101, (outs GPR:$dst), + (ins GPR:$false, so_imm:$true), DPFrm, + "mov", " $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">, UnaryDP; + + +// LEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + +def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), + Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +let isCall = 1, + Defs = [R0, R12, LR, CPSR] in { + def TPsoft : ABXI<0b1011, (outs), (ins), + "bl __aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is a three instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everthing out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +let Defs = + [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, + D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in { + def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src), + AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, + "add r0, pc, #4\n\t" + "str r0, [$src, #+4]\n\t" + "mov r0, #0 @ eh_setjmp", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + +// Large immediate handling. + +// Two piece so_imms. +let isReMaterializable = 1 in +def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, + "mov", " $dst, $src", + [(set GPR:$dst, so_imm2part:$src)]>; + +def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), + (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), + (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; + +// TODO: add,sub,and, 3-instr forms? + + +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; + +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; + +// extload -> zextload +def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; + +def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; +def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; + +// smul* and smla* +def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16), + (SMULWB GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, 16), 16), + (sra (shl GPR:$b, 16), 16))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, (sra GPR:$b, 16))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, 16), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, sext_16_node:$b), 16)), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; + +//===----------------------------------------------------------------------===// +// Thumb Support +// + +include "ARMInstrThumb.td" + +//===----------------------------------------------------------------------===// +// Floating Point Support +// + +include "ARMInstrVFP.td" diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td new file mode 100644 index 0000000..ffb83a8 --- /dev/null +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -0,0 +1,562 @@ +//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Thumb specific DAG Nodes. +// + +def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def imm_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); +}]>; +def imm_comp_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); +}]>; + + +/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. +def imm0_7 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 8; +}]>; +def imm0_7_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getZExtValue() < 8; +}], imm_neg_XFORM>; + +def imm0_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 256; +}]>; +def imm0_255_comp : PatLeaf<(i32 imm), [{ + return ~((uint32_t)N->getZExtValue()) < 256; +}]>; + +def imm8_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256; +}]>; +def imm8_255_neg : PatLeaf<(i32 imm), [{ + unsigned Val = -N->getZExtValue(); + return Val >= 8 && Val < 256; +}], imm_neg_XFORM>; + +// Break imm's up into two pieces: an immediate + a left shift. +// This uses thumb_immshifted to match and thumb_immshifted_val and +// thumb_immshifted_shamt to get the val/shift pieces. +def thumb_immshifted : PatLeaf<(imm), [{ + return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue()); +}]>; + +def thumb_immshifted_val : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def thumb_immshifted_shamt : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +// Define Thumb specific addressing modes. + +// t_addrmode_rr := reg + reg +// +def t_addrmode_rr : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); +} + +// t_addrmode_s4 := reg + reg +// reg + imm5 * 4 +// +def t_addrmode_s4 : Operand<i32>, + ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> { + let PrintMethod = "printThumbAddrModeS4Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_s2 := reg + reg +// reg + imm5 * 2 +// +def t_addrmode_s2 : Operand<i32>, + ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> { + let PrintMethod = "printThumbAddrModeS2Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_s1 := reg + reg +// reg + imm5 +// +def t_addrmode_s1 : Operand<i32>, + ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> { + let PrintMethod = "printThumbAddrModeS1Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +} + +// t_addrmode_sp := sp + imm8 * 4 +// +def t_addrmode_sp : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { + let PrintMethod = "printThumbAddrModeSPOperand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +let Defs = [SP], Uses = [SP] in { +def tADJCALLSTACKUP : +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "@ tADJCALLSTACKUP $amt1", + [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>; + +def tADJCALLSTACKDOWN : +PseudoInst<(outs), (ins i32imm:$amt), + "@ tADJCALLSTACKDOWN $amt", + [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>; +} + +let isNotDuplicable = 1 in +def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp), + "$cp:\n\tadd $dst, pc", + [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>; + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1 in { + def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>; + // Alternative return instruction used by vararg functions. + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>; +} + +// FIXME: remove when we have a way to marking a MI with these properties. +let isReturn = 1, isTerminator = 1 in +def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins), + "pop $dst1", []>; + +let isCall = 1, + Defs = [R0, R1, R2, R3, LR, + D0, D1, D2, D3, D4, D5, D6, D7] in { + def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMtcall tglobaladdr:$func)]>; + // ARMv5T and above + def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), + "blx ${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>; + def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops), + "blx $func", + [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>; + // ARMv4T + def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), + "cpy lr, pc\n\tbx $func", + [(ARMcall_nolink tGPR:$func)]>; +} + +let isBranch = 1, isTerminator = 1 in { + let isBarrier = 1 in { + let isPredicable = 1 in + def tB : TI<(outs), (ins brtarget:$target), "b $target", + [(br bb:$target)]>; + + // Far jump + def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>; + + def tBR_JTr : TJTI<(outs), + (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), + "cpy pc, $target \n\t.align\t2\n$jt", + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; + } +} + +// FIXME: should be able to write a pattern for ARMBrcond, but can't use +// a two-value operand where a dag node expects two operands. :( +let isBranch = 1, isTerminator = 1 in + def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target", + [/*(ARMbrcond bb:$target, imm:$cc)*/]>; + +//===----------------------------------------------------------------------===// +// Load Store Instructions. +// + +let canFoldAsLoad = 1 in +def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; + +def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), + "ldrb $dst, $addr", + [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; + +def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), + "ldrh $dst, $addr", + [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; + +def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), + "ldrsb $dst, $addr", + [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + +def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), + "ldrsh $dst, $addr", + [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; + +let canFoldAsLoad = 1 in +def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; + +// Special instruction for restore. It cannot clobber condition register +// when it's expanded by eliminateCallFramePseudoInstr(). +let canFoldAsLoad = 1, mayLoad = 1 in +def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), + "ldr $dst, $addr", []>; + +// Load tconstpool +let canFoldAsLoad = 1 in +def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr), + "ldr $dst, $addr", + [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; + +// Special LDR for loads from non-pc-relative constpools. +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +def tLDRcp : TIs<(outs tGPR:$dst), (ins i32imm:$addr), + "ldr $dst, $addr", []>; + +def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), + "str $src, $addr", + [(store tGPR:$src, t_addrmode_s4:$addr)]>; + +def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), + "strb $src, $addr", + [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; + +def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), + "strh $src, $addr", + [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; + +def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), + "str $src, $addr", + [(store tGPR:$src, t_addrmode_sp:$addr)]>; + +let mayStore = 1 in { +// Special instruction for spill. It cannot clobber condition register +// when it's expanded by eliminateCallFramePseudoInstr(). +def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), + "str $src, $addr", []>; +} + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +// TODO: A7-44: LDMIA - load multiple + +let mayLoad = 1 in +def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins), + "pop $dst1", []>; + +let mayStore = 1 in +def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops), + "push $src1", []>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +// Add with carry +def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "adc $dst, $rhs", + [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; + +def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>; + + +def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; + +def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "add $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; + +def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "add $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; + +def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), + "add $dst, $rhs @ addhirr", []>; + +def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs), + "add $dst, pc, $rhs * 4", []>; + +def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), + "add $dst, $sp, $rhs * 4 @ addrspi", []>; + +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "add $dst, $rhs * 4", []>; + +def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "and $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; + +def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "asr $dst, $lhs, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>; + +def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "asr $dst, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; + +def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "bic $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; + + +def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; + +def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; + +def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; + +def tTST : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "tst $lhs, $rhs", + [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; + +def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>; + +def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>; + +def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>; + +// TODO: A7-37: CMP(3) - cmp hi regs + +def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "eor $dst, $rhs", + [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; + +def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "lsl $dst, $lhs, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>; + +def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "lsl $dst, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; + +def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "lsr $dst, $lhs, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>; + +def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "lsr $dst, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; + +// FIXME: This is not rematerializable because mov changes the condition code. +def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src), + "mov $dst, $src", + [(set tGPR:$dst, imm0_255:$src)]>; + +// TODO: A7-73: MOV(2) - mov setting flag. + + +// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', +// which is MOV(3). This also supports high registers. +def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src), + "cpy $dst, $src", []>; +def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src), + "cpy $dst, $src\t@ hir2lor", []>; +def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src), + "cpy $dst, $src\t@ lor2hir", []>; +def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), + "cpy $dst, $src\t@ hir2hir", []>; + +def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "mul $dst, $rhs", + [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; + +def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src), + "mvn $dst, $src", + [(set tGPR:$dst, (not tGPR:$src))]>; + +def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src), + "neg $dst, $src", + [(set tGPR:$dst, (ineg tGPR:$src))]>; + +def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "orr $dst, $rhs", + [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; + + +def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src), + "rev $dst, $src", + [(set tGPR:$dst, (bswap tGPR:$src))]>, + Requires<[IsThumb, HasV6]>; + +def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src), + "rev16 $dst, $src", + [(set tGPR:$dst, + (or (and (srl tGPR:$src, 8), 0xFF), + (or (and (shl tGPR:$src, 8), 0xFF00), + (or (and (srl tGPR:$src, 8), 0xFF0000), + (and (shl tGPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsThumb, HasV6]>; + +def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "revsh $dst, $src", + [(set tGPR:$dst, + (sext_inreg + (or (srl (and tGPR:$src, 0xFFFF), 8), + (shl tGPR:$src, 8)), i16))]>, + Requires<[IsThumb, HasV6]>; + +def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "ror $dst, $rhs", + [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; + + +// Subtract with carry +def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sbc $dst, $rhs", + [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; + +def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>; + + +// TODO: A7-96: STMIA - store multiple. + +def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; + +def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; + +def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + "sub $dst, $lhs, $rhs", + [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; + +def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), + "sub $dst, $rhs * 4", []>; + +def tSXTB : TI<(outs tGPR:$dst), (ins tGPR:$src), + "sxtb $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, + Requires<[IsThumb, HasV6]>; +def tSXTH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "sxth $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, + Requires<[IsThumb, HasV6]>; + + +def tUXTB : TI<(outs tGPR:$dst), (ins tGPR:$src), + "uxtb $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, + Requires<[IsThumb, HasV6]>; +def tUXTH : TI<(outs tGPR:$dst), (ins tGPR:$src), + "uxth $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, + Requires<[IsThumb, HasV6]>; + + +// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. +// Expanded by the scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. + def tMOVCCr : + PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), + "@ tMOVCCr $cc", + [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + +// tLEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label), + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("\tmov $dst, #PCRELV${:uid}\n", + "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), + []>; + +def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id), + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("\tmov $dst, #PCRELV${:uid}\n", + "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), + []>; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +let isCall = 1, + Defs = [R0, LR] in { + def tTPsoft : TIx2<(outs), (ins), + "bl __aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ConstantPool, GlobalAddress +def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; + +// JumpTable +def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (tLEApcrelJT tjumptable:$dst, imm:$id)>; + +// Direct calls +def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>; +def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>; + +// Indirect calls to ARM routines +def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>; + +// zextload i1 -> zextload i8 +def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr), + (tLDRB t_addrmode_s1:$addr)>; + +// extload -> zextload +def : ThumbPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : ThumbPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : ThumbPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; + +// Large immediate handling. + +// Two piece imms. +def : ThumbPat<(i32 thumb_immshifted:$src), + (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)), + (thumb_immshifted_shamt imm:$src))>; + +def : ThumbPat<(i32 imm0_255_comp:$src), + (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td new file mode 100644 index 0000000..168fb45 --- /dev/null +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -0,0 +1,12 @@ +//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb2 instruction set. +// +//===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td new file mode 100644 index 0000000..0247daf --- /dev/null +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -0,0 +1,398 @@ +//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM VFP instruction set. +// +//===----------------------------------------------------------------------===// + +def SDT_FTOI : +SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; +def SDT_ITOF : +SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; +def SDT_CMPFP0 : +SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_FMDRR : +SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; + +def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; +def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; +def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; +def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; +def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; +def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; +def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; + +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// + +let canFoldAsLoad = 1 in { +def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), + "fldd", " $dst, $addr", + [(set DPR:$dst, (load addrmode5:$addr))]>; + +def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), + "flds", " $dst, $addr", + [(set SPR:$dst, (load addrmode5:$addr))]>; +} // canFoldAsLoad + +def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), + "fstd", " $src, $addr", + [(store DPR:$src, addrmode5:$addr)]>; + +def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), + "fsts", " $src, $addr", + [(store SPR:$src, addrmode5:$addr)]>; + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +let mayLoad = 1 in { +def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, + variable_ops), + "fldm${addr:submode}d${p} ${addr:base}, $dst1", + []> { + let Inst{20} = 1; +} + +def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, + variable_ops), + "fldm${addr:submode}s${p} ${addr:base}, $dst1", + []> { + let Inst{20} = 1; +} +} + +let mayStore = 1 in { +def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, + variable_ops), + "fstm${addr:submode}d${p} ${addr:base}, $src1", + []> { + let Inst{20} = 0; +} + +def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, + variable_ops), + "fstm${addr:submode}s${p} ${addr:base}, $src1", + []> { + let Inst{20} = 0; +} +} // mayStore + +// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores + +//===----------------------------------------------------------------------===// +// FP Binary Operations. +// + +def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "faddd", " $dst, $a, $b", + [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; + +def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fadds", " $dst, $a, $b", + [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; + +// These are encoded as unary instructions. +def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), + "fcmped", " $a, $b", + [(arm_cmpfp DPR:$a, DPR:$b)]>; + +def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), + "fcmpes", " $a, $b", + [(arm_cmpfp SPR:$a, SPR:$b)]>; + +def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fdivd", " $dst, $a, $b", + [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; + +def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fdivs", " $dst, $a, $b", + [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; + +def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fmuld", " $dst, $a, $b", + [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; + +def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fmuls", " $dst, $a, $b", + [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; + +def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fnmuld", " $dst, $a, $b", + [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { + let Inst{6} = 1; +} + +def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fnmuls", " $dst, $a, $b", + [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { + let Inst{6} = 1; +} + +// Match reassociated forms only if not sign dependent rounding. +def : Pat<(fmul (fneg DPR:$a), DPR:$b), + (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; +def : Pat<(fmul (fneg SPR:$a), SPR:$b), + (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + + +def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + "fsubd", " $dst, $a, $b", + [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { + let Inst{6} = 1; +} + +def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + "fsubs", " $dst, $a, $b", + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { + let Inst{6} = 1; +} + +//===----------------------------------------------------------------------===// +// FP Unary Operations. +// + +def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), + "fabsd", " $dst, $a", + [(set DPR:$dst, (fabs DPR:$a))]>; + +def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + "fabss", " $dst, $a", + [(set SPR:$dst, (fabs SPR:$a))]>; + +def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), + "fcmpezd", " $a", + [(arm_cmpfp0 DPR:$a)]>; + +def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), + "fcmpezs", " $a", + [(arm_cmpfp0 SPR:$a)]>; + +def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), + "fcvtds", " $dst, $a", + [(set DPR:$dst, (fextend SPR:$a))]>; + +// Special case encoding: bits 11-8 is 0b1011. +def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + "fcvtsd", " $dst, $a", + [(set SPR:$dst, (fround DPR:$a))]> { + let Inst{27-23} = 0b11101; + let Inst{21-16} = 0b110111; + let Inst{11-8} = 0b1011; + let Inst{7-4} = 0b1100; +} + +def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), + "fcpyd", " $dst, $a", []>; + +def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), + "fcpys", " $dst, $a", []>; + +def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), + "fnegd", " $dst, $a", + [(set DPR:$dst, (fneg DPR:$a))]>; + +def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + "fnegs", " $dst, $a", + [(set SPR:$dst, (fneg SPR:$a))]>; + +def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), + "fsqrtd", " $dst, $a", + [(set DPR:$dst, (fsqrt DPR:$a))]>; + +def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), + "fsqrts", " $dst, $a", + [(set SPR:$dst, (fsqrt SPR:$a))]>; + +//===----------------------------------------------------------------------===// +// FP <-> GPR Copies. Int <-> FP Conversions. +// + +def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), + "fmrs", " $dst, $src", + [(set GPR:$dst, (bitconvert SPR:$src))]>; + +def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), + "fmsr", " $dst, $src", + [(set SPR:$dst, (bitconvert GPR:$src))]>; + +def FMRRD : AVConv3I<0b11000101, 0b1011, + (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), + "fmrrd", " $dst1, $dst2, $src", + [/* FIXME: Can't write pattern for multiple result instr*/]>; + +// FMDHR: GPR -> SPR +// FMDLR: GPR -> SPR + +def FMDRR : AVConv5I<0b11000100, 0b1011, + (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), + "fmdrr", " $dst, $src1, $src2", + [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; + +// FMRDH: SPR -> GPR +// FMRDL: SPR -> GPR +// FMRRS: SPR -> GPR +// FMRX : SPR system reg -> GPR + +// FMSRR: GPR -> SPR + +// FMXR: GPR -> VFP Sstem reg + + +// Int to FP: + +def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + "fsitod", " $dst, $a", + [(set DPR:$dst, (arm_sitof SPR:$a))]> { + let Inst{7} = 1; +} + +def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), + "fsitos", " $dst, $a", + [(set SPR:$dst, (arm_sitof SPR:$a))]> { + let Inst{7} = 1; +} + +def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + "fuitod", " $dst, $a", + [(set DPR:$dst, (arm_uitof SPR:$a))]>; + +def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), + "fuitos", " $dst, $a", + [(set SPR:$dst, (arm_uitof SPR:$a))]>; + +// FP to Int: +// Always set Z bit in the instruction, i.e. "round towards zero" variants. + +def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + "ftosizd", " $dst, $a", + [(set SPR:$dst, (arm_ftosi DPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + "ftosizs", " $dst, $a", + [(set SPR:$dst, (arm_ftosi SPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + "ftouizd", " $dst, $a", + [(set SPR:$dst, (arm_ftoui DPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + "ftouizs", " $dst, $a", + [(set SPR:$dst, (arm_ftoui SPR:$a))]> { + let Inst{7} = 1; // Z bit +} + +//===----------------------------------------------------------------------===// +// FP FMA Operations. +// + +def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fmacd", " $dst, $a, $b", + [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fmscd", " $dst, $a, $b", + [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fmscs", " $dst, $a, $b", + [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fnmacd", " $dst, $a, $b", + [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fnmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + "fnmscd", " $dst, $a, $b", + [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + "fnmscs", " $dst, $a, $b", + [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst"> { + let Inst{6} = 1; +} + +//===----------------------------------------------------------------------===// +// FP Conditional moves. +// + +def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, + (outs DPR:$dst), (ins DPR:$false, DPR:$true), + "fcpyd", " $dst, $true", + [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, + (outs SPR:$dst), (ins SPR:$false, SPR:$true), + "fcpys", " $dst, $true", + [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, + (outs DPR:$dst), (ins DPR:$false, DPR:$true), + "fnegd", " $dst, $true", + [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + +def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, + (outs SPR:$dst), (ins SPR:$false, SPR:$true), + "fnegs", " $dst, $true", + [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, + RegConstraint<"$false = $dst">; + + +//===----------------------------------------------------------------------===// +// Misc. +// + +let Defs = [CPSR] in +def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> { + let Inst{27-20} = 0b11101111; + let Inst{19-16} = 0b0001; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1010; + let Inst{7} = 0; + let Inst{4} = 1; +} diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp new file mode 100644 index 0000000..e551c41 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -0,0 +1,298 @@ +//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the ARM target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "ARMJITInfo.h" +#include "ARMInstrInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Config/alloca.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Memory.h" +#include <cstdlib> +using namespace llvm; + +void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + abort(); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us (we need +// to preserve more context and manipulate the stack directly). Instead, +// write our own wrapper, which does things our way, so we have complete +// control over register saving and restoring. +extern "C" { +#if defined(__arm__) + void ARMCompilationCallback(void); + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "ARMCompilationCallback\n" + ASMPREFIX "ARMCompilationCallback:\n" + // Save caller saved registers since they may contain stuff + // for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned, so we can't just preserve the callee saved regs. + "stmdb sp!, {r0, r1, r2, r3, lr}\n" +#ifndef __SOFTFP__ + "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // The LR contains the address of the stub function on entry. + // pass it as the argument to the C part of the callback + "mov r0, lr\n" + "sub sp, sp, #4\n" + // Call the C portion of the callback + "bl " ASMPREFIX "ARMCompilationCallbackC\n" + "add sp, sp, #4\n" + // Restoring the LR to the return address of the function that invoked + // the stub and de-allocating the stack space for it requires us to + // swap the two saved LR values on the stack, as they're backwards + // for what we need since the pop instruction has a pre-determined + // order for the registers. + // +--------+ + // 0 | LR | Original return address + // +--------+ + // 1 | LR | Stub address (start of stub) + // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) + // 6-20 | D0..D7 | Saved VFP registers + // +--------+ + // +#ifndef __SOFTFP__ + // Restore VFP caller-saved registers. + "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // + // We need to exchange the values in slots 0 and 1 so we can + // return to the address in slot 1 with the address in slot 0 + // restored to the LR. + "ldr r0, [sp,#20]\n" + "ldr r1, [sp,#16]\n" + "str r1, [sp,#20]\n" + "str r0, [sp,#16]\n" + // Return to the (newly modified) stub to invoke the real function. + // The above twiddling of the saved return addresses allows us to + // deallocate everything, including the LR the stub saved, all in one + // pop instruction. + "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n" + ); +#else // Not an ARM host + void ARMCompilationCallback() { + assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n"); + abort(); + } +#endif +} + +/// ARMCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. We're replacing the first two instructions of the + // stub with: + // ldr pc, [pc,#-4] + // <addr> + if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { + cerr << "ERROR: Unable to mark stub writable\n"; + abort(); + } + *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] + *(intptr_t *)(StubAddr+4) = NewVal; + if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { + cerr << "ERROR: Unable to mark stub executable\n"; + abort(); + } +} + +TargetJITInfo::LazyResolverFn +ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return ARMCompilationCallback; +} + +void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, + JITCodeEmitter &JCE) { + JCE.startGVStub(GV, 4, 4); + JCE.emitWordLE((intptr_t)Ptr); + void *PtrAddr = JCE.finishGVStub(GV); + addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); + return PtrAddr; +} + +void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)ARMCompilationCallback) { + // Branch to the corresponding function addr. + if (IsPIC) { + // The stub is 8-byte size and 4-aligned. + intptr_t LazyPtr = getIndirectSymAddr(Fn); + if (!LazyPtr) { + // In PIC mode, the function stub is loading a lazy-ptr. + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + if (F) + DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" + << F->getName() << "'\n"; + else + DOUT << "JIT: Stub emitted at [" << LazyPtr + << "] for external function at '" << Fn << "'\n"; + } + JCE.startGVStub(F, 16, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4] + JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip + JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] + JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + } else { + // The stub is 8-byte size and 4-aligned. + JCE.startGVStub(F, 8, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + JCE.emitWordLE((intptr_t)Fn); // addr of function + sys::Memory::InvalidateInstructionCache((void*)Addr, 8); + } + } else { + // The compilation callback will overwrite the first two words of this + // stub with indirect branch instructions targeting the compiled code. + // This stub sets the return address to restart the stub, so that + // the new branch will be invoked when we come back. + // + // Branch and link to the compilation callback. + // The stub is 16-byte size and 4-byte aligned. + JCE.startGVStub(F, 16, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + // Save LR so the callback can determine which stub called it. + // The compilation callback is responsible for popping this prior + // to returning. + JCE.emitWordLE(0xe92d4000); // push {lr} + // Set the return address to go back to the start of this stub. + JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 + // Invoke the compilation callback. + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + // The address of the compilation callback. + JCE.emitWordLE((intptr_t)ARMCompilationCallback); + sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + } + + return JCE.finishGVStub(F); +} + +intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { + ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); + switch (RT) { + default: + return (intptr_t)(MR->getResultPointer()); + case ARM::reloc_arm_pic_jt: + // Destination address - jump table base. + return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); + case ARM::reloc_arm_jt_base: + // Jump table base address. + return getJumpTableBaseAddr(MR->getJumpTableIndex()); + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + // Constant pool entry address. + return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); + case ARM::reloc_arm_machine_cp_entry: { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); + assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && + "Can't handle this machine constant pool entry yet!"); + intptr_t Addr = (intptr_t)(MR->getResultPointer()); + Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); + return Addr; + } + } +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = resolveRelocDestAddr(MR); + switch ((ARM::RelocationType)MR->getRelocationType()) { + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + case ARM::reloc_arm_relative: { + // It is necessary to calculate the correct PC relative value. We + // subtract the base addr from the target addr to form a byte offset. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + // If the result is positive, set bit U(23) to 1. + if (ResultPtr >= 0) + *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; + else { + // Otherwise, obtain the absolute value and set bit U(23) to 0. + *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); + ResultPtr = - ResultPtr; + } + // Set the immed value calculated. + // VFP immediate offset is multiplied by 4. + if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) + ResultPtr = ResultPtr >> 2; + *((intptr_t*)RelocPos) |= ResultPtr; + // Set register Rn to PC. + *((intptr_t*)RelocPos) |= + ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + break; + } + case ARM::reloc_arm_pic_jt: + case ARM::reloc_arm_machine_cp_entry: + case ARM::reloc_arm_absolute: { + // These addresses have already been resolved. + *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; + break; + } + case ARM::reloc_arm_branch: { + // It is necessary to calculate the correct value of signed_immed_24 + // field. We subtract the base addr from the target addr to form a + // byte offset, which must be inside the range -33554432 and +33554428. + // Then, we set the signed_immed_24 field of the instruction to bits + // [25:2] of the byte offset. More details ARM-ARM p. A4-11. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; + assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_jt_base: { + // JT base - (instruction addr + 8) + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + } + } +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h new file mode 100644 index 0000000..7dfeed8 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.h @@ -0,0 +1,178 @@ +//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMJITINFO_H +#define ARMJITINFO_H + +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + class ARMTargetMachine; + + class ARMJITInfo : public TargetJITInfo { + // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding + // CONSTPOOL_ENTRY addresses. + SmallVector<intptr_t, 16> ConstPoolId2AddrMap; + + // JumpTableId2AddrMap - A map from inline jumptable ids to the + // corresponding inline jump table bases. + SmallVector<intptr_t, 16> JumpTableId2AddrMap; + + // PCLabelMap - A map from PC labels to addresses. + DenseMap<unsigned, intptr_t> PCLabelMap; + + // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) + // addresses to their indirect symbol addresses. + DenseMap<void*, intptr_t> Sym2IndirectSymMap; + + // IsPIC - True if the relocation model is PIC. This is used to determine + // how to codegen function stubs. + bool IsPIC; + + public: + explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE); + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + virtual void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE); + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + virtual bool hasCustomConstantPool() const { return true; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + virtual bool hasCustomJumpTables() const { return true; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + virtual bool allocateSeparateGVMemory() const { +#ifdef __APPLE__ + return true; +#else + return false; +#endif + } + + /// Initialize - Initialize internal stage for the function being JITted. + /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize + /// jump table ids to jump table bases map; remember if codegen relocation + /// model is PIC. + void Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; + } + + /// getConstantPoolEntryAddr - The ARM target puts all constant + /// pool entries into constant islands. This returns the address of the + /// constant pool entry of the specified index. + intptr_t getConstantPoolEntryAddr(unsigned CPI) const { + assert(CPI < ConstPoolId2AddrMap.size()); + return ConstPoolId2AddrMap[CPI]; + } + + /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address + /// where its associated value is stored. When relocations are processed, + /// this value will be used to resolve references to the constant. + void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { + assert(CPI < ConstPoolId2AddrMap.size()); + ConstPoolId2AddrMap[CPI] = Addr; + } + + /// getJumpTableBaseAddr - The ARM target inline all jump tables within + /// text section of the function. This returns the address of the base of + /// the jump table of the specified index. + intptr_t getJumpTableBaseAddr(unsigned JTI) const { + assert(JTI < JumpTableId2AddrMap.size()); + return JumpTableId2AddrMap[JTI]; + } + + /// addJumpTableBaseAddr - Map a jump table index to the address where + /// the corresponding inline jump table is emitted. When relocations are + /// processed, this value will be used to resolve references to the + /// jump table. + void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { + assert(JTI < JumpTableId2AddrMap.size()); + JumpTableId2AddrMap[JTI] = Addr; + } + + /// getPCLabelAddr - Retrieve the address of the PC label of the specified id. + intptr_t getPCLabelAddr(unsigned Id) const { + DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id); + assert(I != PCLabelMap.end()); + return I->second; + } + + /// addPCLabelAddr - Remember the address of the specified PC label. + void addPCLabelAddr(unsigned Id, intptr_t Addr) { + PCLabelMap.insert(std::make_pair(Id, Addr)); + } + + /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the + /// specified symbol located at address. Returns 0 if the indirect symbol + /// has not been emitted. + intptr_t getIndirectSymAddr(void *Addr) const { + DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr); + if (I != Sym2IndirectSymMap.end()) + return I->second; + return 0; + } + + /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to + /// its indirect symbol address. + void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { + Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); + } + + private: + /// resolveRelocDestAddr - Resolve the resulting address of the relocation + /// if it's not already solved. Constantpool entries must be resolved by + /// ARM target. + intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; + }; +} + +#endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp new file mode 100644 index 0000000..047552f --- /dev/null +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -0,0 +1,778 @@ +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs load / store related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-ldst-opt" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +STATISTIC(NumLDMGened , "Number of ldm instructions generated"); +STATISTIC(NumSTMGened , "Number of stm instructions generated"); +STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); +STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); + +namespace { + struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { + static char ID; + ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + ARMFunctionInfo *AFI; + RegScavenger *RS; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM load / store optimization pass"; + } + + private: + struct MemOpQueueEntry { + int Offset; + unsigned Position; + MachineBasicBlock::iterator MBBI; + bool Merged; + MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) + : Offset(o), Position(p), MBBI(i), Merged(false) {}; + }; + typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; + typedef MemOpQueue::iterator MemOpQueueIter; + + SmallVector<MachineBasicBlock::iterator, 4> + MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, + int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps); + + void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); + bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + }; + char ARMLoadStoreOpt::ID = 0; +} + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass() { + return new ARMLoadStoreOpt(); +} + +static int getLoadStoreMultipleOpcode(int Opcode) { + switch (Opcode) { + case ARM::LDR: + NumLDMGened++; + return ARM::LDM; + case ARM::STR: + NumSTMGened++; + return ARM::STM; + case ARM::FLDS: + NumFLDMGened++; + return ARM::FLDMS; + case ARM::FSTS: + NumFSTMGened++; + return ARM::FSTMS; + case ARM::FLDD: + NumFLDMGened++; + return ARM::FLDMD; + case ARM::FSTD: + NumFSTMGened++; + return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeOps - Create and insert a LDM or STM with Base as base register and +/// registers in Regs as the register operands that would be loaded / stored. +/// It returns true if the transformation is done. +static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, + SmallVector<std::pair<unsigned, bool>, 8> &Regs, + const TargetInstrInfo *TII) { + // FIXME would it be better to take a DL from one of the loads arbitrarily? + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Only a single register to load / store. Don't bother. + unsigned NumRegs = Regs.size(); + if (NumRegs <= 1) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::ia; + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + if (isAM4 && Offset == 4) + Mode = ARM_AM::ib; + else if (isAM4 && Offset == -4 * (int)NumRegs + 4) + Mode = ARM_AM::da; + else if (isAM4 && Offset == -4 * (int)NumRegs) + Mode = ARM_AM::db; + else if (Offset != 0) { + // If starting offset isn't zero, insert a MI to materialize a new base. + // But only do so if it is cost effective, i.e. merging more than two + // loads / stores. + if (NumRegs <= 2) + return false; + + unsigned NewBase; + if (Opcode == ARM::LDR) + // If it is a load, then just use one of the destination register to + // use as the new base. + NewBase = Regs[NumRegs-1].first; + else { + // Use the scratch register to use as a new base. + NewBase = Scratch; + if (NewBase == 0) + return false; + } + int BaseOpc = ARM::ADDri; + if (Offset < 0) { + BaseOpc = ARM::SUBri; + Offset = - Offset; + } + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset == -1) + return false; // Probably not worth it then. + + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset) + .addImm(Pred).addReg(PredReg).addReg(0); + Base = NewBase; + BaseKill = true; // New base is always killed right its use. + } + + bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; + bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + Opcode = getLoadStoreMultipleOpcode(Opcode); + MachineInstrBuilder MIB = (isAM4) + ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) + : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) + .addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) + MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) + | getKillRegState(Regs[i].second)); + + return true; +} + +/// MergeLDR_STR - Merge a number of load / store instructions into one or more +/// load / store multiple instructions. +SmallVector<MachineBasicBlock::iterator, 4> +ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps) { + SmallVector<MachineBasicBlock::iterator, 4> Merges; + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + int Offset = MemOps[SIndex].Offset; + int SOffset = Offset; + unsigned Pos = MemOps[SIndex].Position; + MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; + unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); + unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); + bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); + + SmallVector<std::pair<unsigned,bool>, 8> Regs; + Regs.push_back(std::make_pair(PReg, isKill)); + for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { + int NewOffset = MemOps[i].Offset; + unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + isKill = MemOps[i].MBBI->getOperand(0).isKill(); + // AM4 - register numbers in ascending order. + // AM5 - consecutive register numbers in ascending order. + if (NewOffset == Offset + (int)Size && + ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { + Offset += Size; + Regs.push_back(std::make_pair(Reg, isKill)); + PRegNum = RegNum; + } else { + // Can't merge this in. Try merge the earlier ones first. + if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, + Scratch, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned j = SIndex; j < i; ++j) { + MBB.erase(MemOps[j].MBBI); + MemOps[j].Merged = true; + } + } + SmallVector<MachineBasicBlock::iterator, 4> Merges2 = + MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps); + Merges.append(Merges2.begin(), Merges2.end()); + return Merges; + } + + if (MemOps[i].Position > Pos) { + Pos = MemOps[i].Position; + Loc = MemOps[i].MBBI; + } + } + + bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, + Scratch, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { + MBB.erase(MemOps[i].MBBI); + MemOps[i].Merged = true; + } + } + + return Merges; +} + +/// getInstrPredicate - If instruction is predicated, returns its predicate +/// condition, otherwise returns AL. It also returns the condition code +/// register by reference. +static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx == -1) { + PredReg = 0; + return ARMCC::AL; + } + + PredReg = MI->getOperand(PIdx+1).getReg(); + return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); +} + +static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, ARMCC::CondCodes Pred, + unsigned PredReg) { + unsigned MyPredReg = 0; + return (MI && MI->getOpcode() == ARM::SUBri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg); +} + +static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, ARMCC::CondCodes Pred, + unsigned PredReg) { + unsigned MyPredReg = 0; + return (MI && MI->getOpcode() == ARM::ADDri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg); +} + +static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return 0; + case ARM::LDR: + case ARM::STR: + case ARM::FLDS: + case ARM::FSTS: + return 4; + case ARM::FLDD: + case ARM::FSTD: + return 8; + case ARM::LDM: + case ARM::STM: + return (MI->getNumOperands() - 4) * 4; + case ARM::FLDMS: + case ARM::FSTMS: + case ARM::FLDMD: + case ARM::FSTMD: + return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + } +} + +/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base +/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// +/// stmia rn, <ra, rb, rc> +/// rn := rn + 4 * 3; +/// => +/// stmia rn!, <ra, rb, rc> +/// +/// rn := rn - 4 * 3; +/// ldmia rn, <ra, rb, rc> +/// => +/// ldmdb rn!, <ra, rb, rc> +static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool &Advance, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(0).getReg(); + unsigned Bytes = getLSMultipleTransferSize(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + int Opcode = MI->getOpcode(); + bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; + + if (isAM4) { + if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) + return false; + + // Can't use the updating AM4 sub-mode if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; + } + + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); + MBB.erase(PrevMBBI); + return true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + return true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + return true; + } + } + } else { + // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); + unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (Mode == ARM_AM::ia && + isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + } + return true; + } + } + + return false; +} + +static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_PRE; + case ARM::STR: return ARM::STR_PRE; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_POST; + case ARM::STR: return ARM::STR_POST; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base +/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const TargetInstrInfo *TII, + bool &Advance, + MachineBasicBlock::iterator &I) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(1).getReg(); + bool BaseKill = MI->getOperand(1).isKill(); + unsigned Bytes = getLSMultipleTransferSize(MI); + int Opcode = MI->getOpcode(); + DebugLoc dl = MI->getDebugLoc(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || + (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) + return false; + + bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + // Can't do the merge if the destination register is the same as the would-be + // writeback register. + if (isLd && MI->getOperand(0).getReg() == Base) + return false; + + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + bool DoMerge = false; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + unsigned NewOpc = 0; + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, + Pred, PredReg)) { + DoMerge = true; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) + MBB.erase(PrevMBBI); + } + + if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + DoMerge = true; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) { + if (NextMBBI == I) { + Advance = true; + ++I; + } + MBB.erase(NextMBBI); + } + } + + if (!DoMerge) + return false; + + bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) + : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, + true, isDPR ? 2 : 1); + if (isLd) { + if (isAM2) + // LDR_PRE, LDR_POST; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + else + // FLDMS, FLDMD + BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Offset).addImm(Pred).addReg(PredReg) + .addReg(MI->getOperand(0).getReg(), RegState::Define); + } else { + MachineOperand &MO = MI->getOperand(0); + if (isAM2) + // STR_PRE, STR_POST; + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + .addReg(MO.getReg(), getKillRegState(BaseKill)) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + else + // FSTMS, FSTMD + BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) + .addImm(Pred).addReg(PredReg) + .addReg(MO.getReg(), getKillRegState(MO.isKill())); + } + MBB.erase(MBBI); + + return true; +} + +/// isMemoryOp - Returns true if instruction is a memory operations (that this +/// pass is capable of operating on). +static bool isMemoryOp(MachineInstr *MI) { + int Opcode = MI->getOpcode(); + switch (Opcode) { + default: break; + case ARM::LDR: + case ARM::STR: + return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; + case ARM::FLDS: + case ARM::FSTS: + return MI->getOperand(1).isReg(); + case ARM::FLDD: + case ARM::FSTD: + return MI->getOperand(1).isReg(); + } + return false; +} + +/// AdvanceRS - Advance register scavenger to just before the earliest memory +/// op that is being merged. +void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { + MachineBasicBlock::iterator Loc = MemOps[0].MBBI; + unsigned Position = MemOps[0].Position; + for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { + if (MemOps[i].Position < Position) { + Position = MemOps[i].Position; + Loc = MemOps[i].MBBI; + } + } + + if (Loc != MBB.begin()) + RS->forward(prior(Loc)); +} + +/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR +/// ops of the same base and incrementing offset into LDM / STM ops. +bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { + unsigned NumMerges = 0; + unsigned NumMemOps = 0; + MemOpQueue MemOps; + unsigned CurrBase = 0; + int CurrOpc = -1; + unsigned CurrSize = 0; + ARMCC::CondCodes CurrPred = ARMCC::AL; + unsigned CurrPredReg = 0; + unsigned Position = 0; + + RS->enterBasicBlock(&MBB); + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + bool Advance = false; + bool TryMerge = false; + bool Clobber = false; + + bool isMemOp = isMemoryOp(MBBI); + if (isMemOp) { + int Opcode = MBBI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + unsigned Size = getLSMultipleTransferSize(MBBI); + unsigned Base = MBBI->getOperand(1).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); + unsigned NumOperands = MBBI->getDesc().getNumOperands(); + unsigned OffField = MBBI->getOperand(NumOperands-3).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + // Watch out for: + // r4 := ldr [r5] + // r5 := ldr [r5, #4] + // r6 := ldr [r5, #8] + // + // The second ldr has effectively broken the chain even though it + // looks like the later ldr(s) use the same base register. Try to + // merge the ldr's so far, including this one. But don't try to + // combine the following ldr(s). + Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); + if (CurrBase == 0 && !Clobber) { + // Start of a new chain. + CurrBase = Base; + CurrOpc = Opcode; + CurrSize = Size; + CurrPred = Pred; + CurrPredReg = PredReg; + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + if (Clobber) { + TryMerge = true; + Advance = true; + } + + if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { + // No need to match PredReg. + // Continue adding to the queue. + if (Offset > MemOps.back().Offset) { + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + break; + } else if (Offset == I->Offset) { + // Collision! This can't be merged! + break; + } + } + } + } + } + } + + if (Advance) { + ++Position; + ++MBBI; + } else + TryMerge = true; + + if (TryMerge) { + if (NumMemOps > 1) { + // Try to find a free register to use as a new base in case it's needed. + // First advance to the instruction just before the start of the chain. + AdvanceRS(MBB, MemOps); + // Find a scratch register. Make sure it's a call clobbered register or + // a spilled callee-saved register. + unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); + if (!Scratch) + Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, + AFI->getSpilledCSRegisters()); + // Process the load / store instructions. + RS->forward(prior(MBBI)); + + // Merge ops. + SmallVector<MachineBasicBlock::iterator,4> MBBII = + MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, + CurrPred, CurrPredReg, Scratch, MemOps); + + // Try folding preceeding/trailing base inc/dec into the generated + // LDM/STM ops. + for (unsigned i = 0, e = MBBII.size(); i < e; ++i) + if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI)) + NumMerges++; + NumMerges += MBBII.size(); + + // Try folding preceeding/trailing base inc/dec into those load/store + // that were not merged to form LDM/STM ops. + for (unsigned i = 0; i != NumMemOps; ++i) + if (!MemOps[i].Merged) + if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) + NumMerges++; + + // RS may be pointing to an instruction that's deleted. + RS->skipTo(prior(MBBI)); + } + + CurrBase = 0; + CurrOpc = -1; + CurrSize = 0; + CurrPred = ARMCC::AL; + CurrPredReg = 0; + if (NumMemOps) { + MemOps.clear(); + NumMemOps = 0; + } + + // If iterator hasn't been advanced and this is not a memory op, skip it. + // It can't start a new chain anyway. + if (!Advance && !isMemOp && MBBI != E) { + ++Position; + ++MBBI; + } + } + } + return NumMerges > 0; +} + +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op +/// (bx lr) into the preceeding stack restore so it directly restore the value +/// of LR into pc. +/// ldmfd sp!, {r7, lr} +/// bx lr +/// => +/// ldmfd sp!, {r7, pc} +bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + if (MBB.empty()) return false; + + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { + MachineInstr *PrevMI = prior(MBBI); + if (PrevMI->getOpcode() == ARM::LDM) { + MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); + if (MO.getReg() == ARM::LR) { + PrevMI->setDesc(TII->get(ARM::LDM_RET)); + MO.setReg(ARM::PC); + MBB.erase(MBBI); + return true; + } + } + } + return false; +} + +bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + const TargetMachine &TM = Fn.getTarget(); + AFI = Fn.getInfo<ARMFunctionInfo>(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + RS = new RegScavenger(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= LoadStoreMultipleOpti(MBB); + Modified |= MergeReturnIntoLDM(MBB); + } + + delete RS; + return Modified; +} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h new file mode 100644 index 0000000..6662be1 --- /dev/null +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -0,0 +1,238 @@ +//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares ARM-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMACHINEFUNCTIONINFO_H +#define ARMMACHINEFUNCTIONINFO_H + +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + +/// ARMFunctionInfo - This class is derived from MachineFunction private +/// ARM target-specific information for each MachineFunction. +class ARMFunctionInfo : public MachineFunctionInfo { + + /// isThumb - True if this function is compiled under Thumb mode. + /// Used to initialized Align, so must precede it. + bool isThumb; + + /// Align - required alignment. ARM functions and Thumb functions with + /// constant pools require 4-byte alignment; other Thumb functions + /// require only 2-byte alignment. + unsigned Align; + + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. + /// + unsigned VarArgsRegSaveSize; + + /// HasStackFrame - True if this function has a stack frame. Set by + /// processFunctionBeforeCalleeSavedScan(). + bool HasStackFrame; + + /// LRSpilledForFarJump - True if the LR register has been for spilled to + /// enable far jump. + bool LRSpilledForFarJump; + + /// R3IsLiveIn - True if R3 is live in to this function. + /// FIXME: Remove when register scavenger for Thumb is done. + bool R3IsLiveIn; + + /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer + /// spill stack offset. + unsigned FramePtrSpillOffset; + + /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved + /// register spills areas. For Mac OS X: + /// + /// GPR callee-saved (1) : r4, r5, r6, r7, lr + /// -------------------------------------------- + /// GPR callee-saved (2) : r8, r10, r11 + /// -------------------------------------------- + /// DPR callee-saved : d8 - d15 + unsigned GPRCS1Offset; + unsigned GPRCS2Offset; + unsigned DPRCSOffset; + + /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills + /// areas. + unsigned GPRCS1Size; + unsigned GPRCS2Size; + unsigned DPRCSSize; + + /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices + /// which belong to these spill areas. + BitVector GPRCS1Frames; + BitVector GPRCS2Frames; + BitVector DPRCSFrames; + + /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers. + /// + BitVector SpilledCSRegs; + + /// JumpTableUId - Unique id for jumptables. + /// + unsigned JumpTableUId; + + unsigned ConstPoolEntryUId; + +public: + ARMFunctionInfo() : + isThumb(false), + Align(2U), + VarArgsRegSaveSize(0), HasStackFrame(false), + LRSpilledForFarJump(false), R3IsLiveIn(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), + JumpTableUId(0), ConstPoolEntryUId(0) {} + + ARMFunctionInfo(MachineFunction &MF) : + isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), + Align(isThumb ? 1U : 2U), + VarArgsRegSaveSize(0), HasStackFrame(false), + LRSpilledForFarJump(false), R3IsLiveIn(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), + SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), + JumpTableUId(0), ConstPoolEntryUId(0) {} + + bool isThumbFunction() const { return isThumb; } + + unsigned getAlign() const { return Align; } + void setAlign(unsigned a) { Align = a; } + + unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } + void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } + + bool hasStackFrame() const { return HasStackFrame; } + void setHasStackFrame(bool s) { HasStackFrame = s; } + + bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } + void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } + + // FIXME: Remove when register scavenger for Thumb is done. + bool isR3LiveIn() const { return R3IsLiveIn; } + void setR3IsLiveIn(bool l) { R3IsLiveIn = l; } + + unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } + void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } + unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } + unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } + + void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; } + void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; } + void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } + + unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } + unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } + + void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } + void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + + bool isGPRCalleeSavedArea1Frame(int fi) const { + if (fi < 0 || fi >= (int)GPRCS1Frames.size()) + return false; + return GPRCS1Frames[fi]; + } + bool isGPRCalleeSavedArea2Frame(int fi) const { + if (fi < 0 || fi >= (int)GPRCS2Frames.size()) + return false; + return GPRCS2Frames[fi]; + } + bool isDPRCalleeSavedAreaFrame(int fi) const { + if (fi < 0 || fi >= (int)DPRCSFrames.size()) + return false; + return DPRCSFrames[fi]; + } + + void addGPRCalleeSavedArea1Frame(int fi) { + if (fi >= 0) { + int Size = GPRCS1Frames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + GPRCS1Frames.resize(Size); + } + GPRCS1Frames[fi] = true; + } + } + void addGPRCalleeSavedArea2Frame(int fi) { + if (fi >= 0) { + int Size = GPRCS2Frames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + GPRCS2Frames.resize(Size); + } + GPRCS2Frames[fi] = true; + } + } + void addDPRCalleeSavedAreaFrame(int fi) { + if (fi >= 0) { + int Size = DPRCSFrames.size(); + if (fi >= Size) { + Size *= 2; + if (fi >= Size) + Size = fi+1; + DPRCSFrames.resize(Size); + } + DPRCSFrames[fi] = true; + } + } + + void setCSRegisterIsSpilled(unsigned Reg) { + SpilledCSRegs.set(Reg); + } + + bool isCSRegisterSpilled(unsigned Reg) const { + return SpilledCSRegs[Reg]; + } + + const BitVector &getSpilledCSRegisters() const { + return SpilledCSRegs; + } + + unsigned createJumpTableUId() { + return JumpTableUId++; + } + + unsigned getNumJumpTables() const { + return JumpTableUId; + } + + void initConstPoolEntryUId(unsigned UId) { + ConstPoolEntryUId = UId; + } + + unsigned getNumConstPoolEntries() const { + return ConstPoolEntryUId; + } + + unsigned createConstPoolEntryUId() { + return ConstPoolEntryUId++; + } +}; +} // End llvm namespace + +#endif // ARMMACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp new file mode 100644 index 0000000..199858f --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -0,0 +1,1528 @@ +//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include <algorithm> +using namespace llvm; + +static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging", + cl::Hidden, + cl::desc("Enable register scavenging on Thumb")); + +unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace ARM; + switch (RegEnum) { + case R0: case S0: case D0: return 0; + case R1: case S1: case D1: return 1; + case R2: case S2: case D2: return 2; + case R3: case S3: case D3: return 3; + case R4: case S4: case D4: return 4; + case R5: case S5: case D5: return 5; + case R6: case S6: case D6: return 6; + case R7: case S7: case D7: return 7; + case R8: case S8: case D8: return 8; + case R9: case S9: case D9: return 9; + case R10: case S10: case D10: return 10; + case R11: case S11: case D11: return 11; + case R12: case S12: case D12: return 12; + case SP: case S13: case D13: return 13; + case LR: case S14: case D14: return 14; + case PC: case S15: case D15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + default: + assert(0 && "Unknown ARM register!"); + abort(); + } +} + +unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum, + bool &isSPVFP) { + isSPVFP = false; + + using namespace ARM; + switch (RegEnum) { + default: + assert(0 && "Unknown ARM register!"); + abort(); + case R0: case D0: return 0; + case R1: case D1: return 1; + case R2: case D2: return 2; + case R3: case D3: return 3; + case R4: case D4: return 4; + case R5: case D5: return 5; + case R6: case D6: return 6; + case R7: case D7: return 7; + case R8: case D8: return 8; + case R9: case D9: return 9; + case R10: case D10: return 10; + case R11: case D11: return 11; + case R12: case D12: return 12; + case SP: case D13: return 13; + case LR: case D14: return 14; + case PC: case D15: return 15; + + case S0: case S1: case S2: case S3: + case S4: case S5: case S6: case S7: + case S8: case S9: case S10: case S11: + case S12: case S13: case S14: case S15: + case S16: case S17: case S18: case S19: + case S20: case S21: case S22: case S23: + case S24: case S25: case S26: case S27: + case S28: case S29: case S30: case S31: { + isSPVFP = true; + switch (RegEnum) { + default: return 0; // Avoid compile time warning. + case S0: return 0; + case S1: return 1; + case S2: return 2; + case S3: return 3; + case S4: return 4; + case S5: return 5; + case S6: return 6; + case S7: return 7; + case S8: return 8; + case S9: return 9; + case S10: return 10; + case S11: return 11; + case S12: return 12; + case S13: return 13; + case S14: return 14; + case S15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + } + } + } +} + +ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), + TII(tii), STI(sti), + FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) { +} + +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + unsigned Pred, unsigned PredReg, + const TargetInstrInfo *TII, + bool isThumb, + DebugLoc dl) const { + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + Constant *C = ConstantInt::get(Type::Int32Ty, Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + if (isThumb) + BuildMI(MBB, MBBI, dl, + TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx); + else + BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg) + .addConstantPoolIndex(Idx) + .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); +} + +const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { + return &ARM::GPRRegClass; +} + +/// isLowRegister - Returns true if the register is low register r0-r7. +/// +bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +const TargetRegisterClass* +ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { + if (STI.isThumb()) { + if (isLowRegister(Reg)) + return ARM::tGPRRegisterClass; + switch (Reg) { + default: + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: + return ARM::GPRRegisterClass; + } + } + return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); +} + +const unsigned* +ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { + ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, + ARM::R7, ARM::R6, ARM::R5, ARM::R4, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + + static const unsigned DarwinCalleeSavedRegs[] = { + ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, + ARM::R11, ARM::R10, ARM::R9, ARM::R8, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +} + +const TargetRegisterClass* const * +ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, + &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses; +} + +BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + // FIXME: avoid re-calculating this everytime. + BitVector Reserved(getNumRegs()); + Reserved.set(ARM::SP); + Reserved.set(ARM::PC); + if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(FramePtr); + // Some targets reserve R9. + if (STI.isR9Reserved()) + Reserved.set(ARM::R9); + return Reserved; +} + +bool +ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { + switch (Reg) { + default: break; + case ARM::SP: + case ARM::PC: + return true; + case ARM::R7: + case ARM::R11: + if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + return true; + break; + case ARM::R9: + return STI.isR9Reserved(); + } + + return false; +} + +bool +ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + return ThumbRegScavenging || !AFI->isThumbFunction(); +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +/// +bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +// not required, we reserve argument space for call sites in the function +// immediately on entry to the current function. This eliminates the need for +// add/sub sp brackets around call sites. Returns true if the call frame is +// included as part of the stack frame. +bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (AFI->isThumbFunction()) { + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + } else { + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + } + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// emitARMRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in ARM code. +static +void emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const TargetInstrInfo &TII, + DebugLoc dl) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + while (NumBytes) { + unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); + unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); + assert(ThisVal && "Didn't extract field correctly"); + + // We will handle these bits from offset, clear them. + NumBytes &= ~ThisVal; + + // Get the properly encoded SOImmVal field. + int SOImmVal = ARM_AM::getSOImmVal(ThisVal); + assert(SOImmVal != -1 && "Bit extraction didn't work?"); + + // Build the new ADD / SUB. + BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(SOImmVal) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + BaseReg = DestReg; + } +} + +/// calcNumMI - Returns the number of instructions required to materialize +/// the specific add / sub r, c instruction. +static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, + unsigned NumBits, unsigned Scale) { + unsigned NumMIs = 0; + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + + if (Opc == ARM::tADDrSPi) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + NumMIs++; + NumBits = 8; + Scale = 1; // Followed by a number of tADDi8. + Chunk = ((1 << NumBits) - 1) * Scale; + } + + NumMIs += Bytes / Chunk; + if ((Bytes % Chunk) != 0) + NumMIs++; + if (ExtraOpc) + NumMIs++; + return NumMIs; +} + +/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. Materialize the immediate +/// in a register using mov / mvn sequences or load the immediate from a +/// constpool entry. +static +void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, bool CanChangeCC, + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isHigh = !MRI.isLowRegister(DestReg) || + (BaseReg != 0 && !MRI.isLowRegister(BaseReg)); + bool isSub = false; + // Subtract doesn't have high register version. Load the negative value + // if either base or dest register is a high register. Also, if do not + // issue sub as part of the sequence if condition register is to be + // preserved. + if (NumBytes < 0 && !isHigh && CanChangeCC) { + isSub = true; + NumBytes = -NumBytes; + } + unsigned LdReg = DestReg; + if (DestReg == ARM::SP) { + assert(BaseReg == ARM::SP && "Unexpected!"); + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } + + if (NumBytes <= 255 && NumBytes >= 0) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + else if (NumBytes < 0 && NumBytes >= -255) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) + .addReg(LdReg, RegState::Kill); + } else + MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, + true, dl); + + // Emit add / sub. + int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, + TII.get(Opc), DestReg); + if (DestReg == ARM::SP || isSub) + MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); + else + MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); + if (DestReg == ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); +} + +/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. +static +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isSub = NumBytes < 0; + unsigned Bytes = (unsigned)NumBytes; + if (isSub) Bytes = -NumBytes; + bool isMul4 = (Bytes & 3) == 0; + bool isTwoAddr = false; + bool DstNotEqBase = false; + unsigned NumBits = 1; + unsigned Scale = 1; + int Opc = 0; + int ExtraOpc = 0; + + if (DestReg == BaseReg && BaseReg == ARM::SP) { + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + NumBits = 7; + Scale = 4; + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + isTwoAddr = true; + } else if (!isSub && BaseReg == ARM::SP) { + // r1 = add sp, 403 + // => + // r1 = add sp, 100 * 4 + // r1 = add r1, 3 + if (!isMul4) { + Bytes &= ~3; + ExtraOpc = ARM::tADDi3; + } + NumBits = 8; + Scale = 4; + Opc = ARM::tADDrSPi; + } else { + // sp = sub sp, c + // r1 = sub sp, c + // r8 = sub sp, c + if (DestReg != BaseReg) + DstNotEqBase = true; + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + + unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; + if (NumMIs > Threshold) { + // This will expand into too many instructions. Load the immediate from a + // constpool entry. + emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, + MRI, dl); + return; + } + + if (DstNotEqBase) { + if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) { + // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) + unsigned Chunk = (1 << 3) - 1; + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill); + } + BaseReg = DestReg; + } + + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + while (Bytes) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + ThisVal /= Scale; + // Build the new tADD / tSUB. + if (isTwoAddr) + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(DestReg).addImm(ThisVal); + else { + bool isKill = BaseReg != ARM::SP; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + BaseReg = DestReg; + + if (Opc == ARM::tADDrSPi) { + // r4 = add sp, imm + // r4 = add r4, imm + // ... + NumBits = 8; + Scale = 1; + Chunk = ((1 << NumBits) - 1) * Scale; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + } + } + + if (ExtraOpc) + BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(((unsigned)NumBytes) & 3); +} + +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, + bool isThumb, const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + if (isThumb) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, + MRI, dl); + else + emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII, dl); +} + +void ARMRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + bool isThumb = AFI->isThumbFunction(); + ARMCC::CondCodes Pred = isThumb + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg(); + emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl); + } + } + } + MBB.erase(I); +} + +/// emitThumbConstant - Emit a series of instructions to materialize a +/// constant. +static void emitThumbConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Imm, + const TargetInstrInfo &TII, + const ARMRegisterInfo& MRI, + DebugLoc dl) { + bool isSub = Imm < 0; + if (isSub) Imm = -Imm; + + int Chunk = (1 << 8) - 1; + int ThisVal = (Imm > Chunk) ? Chunk : Imm; + Imm -= ThisVal; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); + if (Imm > 0) + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + if (isSub) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) + .addReg(DestReg, RegState::Kill); +} + +/// findScratchRegister - Find a 'free' ARM register. If register scavenger +/// is not being used, R12 is available. Otherwise, try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static +unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, + ARMFunctionInfo *AFI) { + unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12; + assert (!AFI->isThumbFunction()); + if (Reg == 0) + // Try a already spilled CS register. + Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters()); + + return Reg; +} + +void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (hasFP(MF)) { + assert(SPAdj == 0 && "Unexpected"); + // There is alloca()'s in this function, must reference off the frame + // pointer instead. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrMode2. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrMode2; + + if (Opcode == ARM::ADDri) { + Offset += MI.getOperand(i+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::MOVr)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } else if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(ARM::SUBri)); + } + + // Common case: small offset, fits into instruction. + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset != -1) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(ImmedOffset); + return; + } + + // Otherwise, we fallback to common code below to form the imm offset with + // a sequence of ADDri instructions. First though, pull as much of the imm + // into this ADDri as possible. + unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + // Get the properly encoded SOImmVal field. + int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal); + assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); + MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal); + } else if (Opcode == ARM::tADDrSPi) { + Offset += MI.getOperand(i+1).getImm(); + + // Can't use tADDrSPi if it's based off the frame pointer. + unsigned NumBits = 0; + unsigned Scale = 1; + if (FrameReg != ARM::SP) { + Opcode = ARM::tADDi3; + MI.setDesc(TII.get(ARM::tADDi3)); + NumBits = 3; + } else { + NumBits = 8; + Scale = 4; + assert((Offset & 3) == 0 && + "Thumb add/sub sp, #imm immediate must be multiple of 4!"); + } + + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::tMOVhir2lor)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } + + // Common case: small offset, fits into instruction. + unsigned Mask = (1 << NumBits) - 1; + if (((Offset / Scale) & ~Mask) == 0) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + return; + } + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Bytes = (Offset > 0) ? Offset : -Offset; + unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); + // MI would expand into a large number of instructions. Don't try to + // simplify the immediate. + if (NumMIs > 2) { + emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, + *this, dl); + MBB.erase(II); + return; + } + + if (Offset > 0) { + // Translate r0 = add sp, imm to + // r0 = add sp, 255*4 + // r0 = add r0, (imm - 255*4) + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Mask); + Offset = (Offset - Mask * Scale); + MachineBasicBlock::iterator NII = next(II); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, + *this, dl); + } else { + // Translate r0 = add sp, -imm to + // r0 = -imm (this is then translated into a series of instructons) + // r0 = add r0, sp + emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); + MI.setDesc(TII.get(ARM::tADDhirr)); + MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + } + return; + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrMode2: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 12; + break; + } + case ARMII::AddrMode3: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + break; + } + case ARMII::AddrMode5: { + ImmIdx = i+1; + InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + break; + } + case ARMII::AddrModeTs: { + ImmIdx = i+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = (FrameReg == ARM::SP) ? 8 : 5; + Scale = 4; + break; + } + default: + assert(0 && "Unsupported addressing mode!"); + abort(); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0 && !isThumb) { + Offset = -Offset; + isSub = true; + } + + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + return; + } + + bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; + if (AddrMode == ARMII::AddrModeTs) { + // Thumb tLDRspi, tSTRspi. These will change to instructions that use + // a different base register. + NumBits = 5; + Mask = (1 << NumBits) - 1; + } + // If this is a thumb spill / restore, we will be using a constpool load to + // materialize the offset. + if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore) + ImmOp.ChangeToImmediate(0); + else { + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert(Offset && "This code isn't needed if offset already handled!"); + + if (isThumb) { + if (Desc.mayLoad()) { + // Use the destination register to materialize sp + offset. + unsigned TmpReg = MI.getOperand(0).getReg(); + bool UseRR = false; + if (Opcode == ARM::tRestore) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, + true, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tLDR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) + // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tLDR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + } else if (Desc.mayStore()) { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; + } + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, + true, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } else + assert(false && "Unexpected opcode!"); + } else { + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj); + int PIdx = MI.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); + unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); + emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, + isSub ? -Offset : Offset, Pred, PredReg, TII, dl); + MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + } +} + +static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + } + return (unsigned)Offset; +} + +void +ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // This tells PEI to spill the FP as if it is any other callee-save register + // to take advantage the eliminateFrameIndex machinery. This also ensures it + // is spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool CanEliminateFrame = true; + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector<unsigned, 4> UnspilledCS1GPRs; + SmallVector<unsigned, 4> UnspilledCS2GPRs; + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = getCalleeSavedRegs(); + const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.getRegInfo().isPhysRegUsed(Reg)) { + AFI->setCSRegisterIsSpilled(Reg); + Spilled = true; + CanEliminateFrame = false; + } else { + // Check alias registers too. + for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { + Spilled = true; + CanEliminateFrame = false; + } + } + } + + if (CSRegClasses[i] == &ARM::GPRRegClass) { + if (Spilled) { + NumGPRSpills++; + + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) + LRSpilled = true; + CS1Spilled = true; + continue; + } + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + } + + bool ForceLRSpill = false; + if (!LRSpilled && AFI->isThumbFunction()) { + unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + // Force LR to be spilled if the Thumb function size is > 2048. This enables + // use of BL to implement far jump. If it turns out that it's not needed + // then the branch fix up path will undo it. + if (FnSize >= (1 << 11)) { + CanEliminateFrame = false; + ForceLRSpill = true; + } + } + + bool ExtraCSSpill = false; + if (!CanEliminateFrame || hasFP(MF)) { + AFI->setHasStackFrame(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + ForceLRSpill = false; + ExtraCSSpill = true; + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(FramePtr); + NumGPRSpills++; + } + + // If stack and double are 8-byte aligned and we are spilling an odd number + // of GPRs. Spill one extra callee save GPR so we won't have to pad between + // the integer and double callee save areas. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (CS1Spilled && !UnspilledCS1GPRs.empty()) { + for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { + unsigned Reg = UnspilledCS1GPRs[i]; + // Don't spiil high register if the function is thumb + if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) { + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + break; + } + } + } else if (!UnspilledCS2GPRs.empty() && + !AFI->isThumbFunction()) { + unsigned Reg = UnspilledCS2GPRs.front(); + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + } + } + + // Estimate if we might need to scavenge a register at some point in order + // to materialize a stack offset. If so, either spill one additiona + // callee-saved register or reserve a special spill slot to facilitate + // register scavenging. + if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = estimateStackSize(MF, MFI); + unsigned Limit = (1 << 12) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB) + for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isFI()) { + unsigned Opcode = I->getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + if (AddrMode == ARMII::AddrMode3) { + Limit = (1 << 8) - 1; + goto DoneEstimating; + } else if (AddrMode == ARMII::AddrMode5) { + unsigned ThisLimit = ((1 << 8) - 1) * 4; + if (ThisLimit < Limit) + Limit = ThisLimit; + } + } + } + DoneEstimating: + if (Size >= Limit) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector<unsigned, 2> Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + AFI->setCSRegisterIsSpilled(Extras[i]); + } + } else { + // Reserve a slot closest to SP or frame pointer. + const TargetRegisterClass *RC = &ARM::GPRRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } + } + } + } + + if (ForceLRSpill) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + AFI->setLRIsSpilledForFarJump(true); + } +} + +/// Move iterator pass the next bunch of callee save load / store ops for +/// the particular spill area (1: integer area 1, 2: integer area 2, +/// 3: fp area, 0: don't care). +static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int Opc, unsigned Area, + const ARMSubtarget &STI) { + while (MBBI != MBB.end() && + MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) { + if (Area != 0) { + bool Done = false; + unsigned Category = 0; + switch (MBBI->getOperand(0).getReg()) { + case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: + case ARM::LR: + Category = 1; + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + Category = STI.isTargetDarwin() ? 2 : 1; + break; + case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11: + case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15: + Category = 3; + break; + default: + Done = true; + break; + } + if (Done || Category != Area) + break; + } + + ++MBBI; + } +} + +void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + if (isThumb) { + // Check if R3 is live in. It might have to be used as a scratch register. + for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), + E = MF.getRegInfo().livein_end(); I != E; ++I) { + if (I->first == ARM::R3) { + AFI->setR3IsLiveIn(true); + break; + } + } + + // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. + NumBytes = (NumBytes + 3) & ~3; + MFI->setStackSize(NumBytes); + } + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII, + *this, dl); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 1. + emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl); + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); + } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + ++MBBI; + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), + FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + if (!isThumb) AddDefaultCC(AddDefaultPred(MIB)); + } + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 2. + emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl); + + // Build the new SUBri to adjust SP for FP callee-save spill area. + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); + emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl); + } + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + if (!isThumb) + movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); + emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + } + + if(STI.isTargetELF() && hasFP(MF)) { + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + } + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + return ((MI->getOpcode() == ARM::FLDD || + MI->getOpcode() == ARM::LDR || + MI->getOpcode() == ARM::tRestore) && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); +} + +void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert((MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + } else { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + if (isThumb) { + if (hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (NumBytes) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + TII, *this, dl); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) + .addReg(FramePtr); + } else { + if (MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && + prior(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = prior(MBBI); + emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, + *this, dl); + } else + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, + *this, dl); + } + } else { + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->getGPRCalleeSavedArea2Size() || + AFI->getDPRCalleeSavedAreaSize() || + AFI->getDPRCalleeSavedAreaOffset()|| + hasFP(MF)) { + if (NumBytes) + BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) + .addImm(NumBytes) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + } + } else if (NumBytes) { + emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl); + } + + // Move SP to start of integer callee save spill area 2. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); + emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0, + false, TII, *this, dl); + + // Move SP to start of integer callee save spill area 1. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0, + false, TII, *this, dl); + + // Move SP to SP upon entry to the function. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0, + false, TII, *this, dl); + } + } + + if (VARegSaveSize) { + if (isThumb) + // Epilogue for vararg functions: pop LR to R3 and branch off it. + // FIXME: Verify this is still ok when R3 is no longer being reserved. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); + + emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII, + *this, dl); + + if (isThumb) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + MBB.erase(MBBI); + } + } +} + +unsigned ARMRegisterInfo::getRARegister() const { + return ARM::LR; +} + +unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { + if (STI.isTargetDarwin() || hasFP(MF)) + return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11; + else + return ARM::SP; +} + +unsigned ARMRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned ARMRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h new file mode 100644 index 0000000..e1d9efb --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -0,0 +1,102 @@ +//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMREGISTERINFO_H +#define ARMREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "ARMGenRegisterInfo.h.inc" + +namespace llvm { + class ARMSubtarget; + class TargetInstrInfo; + class Type; + +struct ARMRegisterInfo : public ARMGenRegisterInfo { + const TargetInstrInfo &TII; + const ARMSubtarget &STI; +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + +public: + ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + unsigned Pred, unsigned PredReg, + const TargetInstrInfo *TII, bool isThumb, + DebugLoc dl) const; + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// ARM::LR, return the number that it corresponds to (e.g. 14). + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// Same as previous getRegisterNumbering except it returns true in isSPVFP + /// if the register is a single precision VFP register. + static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + const TargetRegisterClass *getPointerRegClass() const; + + /// Code Generation virtual methods... + const TargetRegisterClass * + getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + bool isLowRegister(unsigned Reg) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td new file mode 100644 index 0000000..e8daf74 --- /dev/null +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -0,0 +1,221 @@ +//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the ARM register file +//===----------------------------------------------------------------------===// + +// Registers are identified with 4-bit ID numbers. +class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { + field bits<4> Num; + let Namespace = "ARM"; + let SubRegs = subregs; +} + +class ARMFReg<bits<5> num, string n> : Register<n> { + field bits<5> Num; + let Namespace = "ARM"; +} + +// Integer registers +def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>; +def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>; +def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>; +def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>; +def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>; +def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>; +def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>; +def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>; +def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>; +def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>; +def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>; +def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>; +def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>; +def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>; +def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>; +def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>; + +// Float registers +def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; +def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">; +def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">; +def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">; +def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">; +def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">; +def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">; +def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">; +def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">; +def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">; +def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">; +def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">; +def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; +def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; +def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; +def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; + +// Aliases of the F* registers used to hold 64-bit fp values (doubles) +def D0 : ARMReg< 0, "d0", [S0, S1]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>; +def D10 : ARMReg<10, "d10", [S20, S21]>; +def D11 : ARMReg<11, "d11", [S22, S23]>; +def D12 : ARMReg<12, "d12", [S24, S25]>; +def D13 : ARMReg<13, "d13", [S26, S27]>; +def D14 : ARMReg<14, "d14", [S28, S29]>; +def D15 : ARMReg<15, "d15", [S30, S31]>; + +// Current Program Status Register. +def CPSR : ARMReg<0, "cpsr">; + +// Register classes. +// +// pc == Program Counter +// lr == Link Register +// sp == Stack Pointer +// r12 == ip (scratch) +// r7 == Frame Pointer (thumb-style backtraces) +// r11 == Frame Pointer (arm-style backtraces) +// r10 == Stack Limit +// +def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R12, R11, + LR, SP, PC]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + // FIXME: We are reserving r12 in case the PEI needs to use it to + // generate large stack offset. Make it available once we have register + // scavenging. Similarly r3 is reserved in Thumb mode for now. + let MethodBodies = [{ + // FP is R11, R9 is available. + static const unsigned ARM_GPR_AO_1[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::R11 }; + // FP is R11, R9 is not available. + static const unsigned ARM_GPR_AO_2[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R10, + ARM::R11 }; + // FP is R7, R9 is available. + static const unsigned ARM_GPR_AO_3[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, + ARM::R8, ARM::R9, ARM::R10,ARM::R11, + ARM::R7 }; + // FP is R7, R9 is not available. + static const unsigned ARM_GPR_AO_4[] = { + ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, + ARM::R8, ARM::R10,ARM::R11, + ARM::R7 }; + + GPRClass::iterator + GPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_4; + else + return ARM_GPR_AO_3; + } else { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_2; + else + return ARM_GPR_AO_1; + } + } + + GPRClass::iterator + GPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + GPRClass::iterator I; + + if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) { + I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); + } else { + I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); + } + } else { + if (Subtarget.isR9Reserved()) { + I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); + } else { + I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); + } + } + + // Mac OS X requires FP not to be clobbered for backtracing purpose. + return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + } + }]; +} + +// Thumb registers are R0-R7 normally. Some instructions can still use +// the general GPR register class above (MOV, e.g.) +def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it + // to generate large stack offset. Make it available once we have register + // scavenging. + let MethodBodies = [{ + static const unsigned THUMB_tGPR_AO[] = { + ARM::R2, ARM::R1, ARM::R0, + ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; + + // FP is R7, only low registers available. + tGPRClass::iterator + tGPRClass::allocation_order_begin(const MachineFunction &MF) const { + return THUMB_tGPR_AO; + } + + tGPRClass::iterator + tGPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + tGPRClass::iterator I = + THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned)); + // Mac OS X requires FP not to be clobbered for backtracing purpose. + return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + } + }]; +} + +def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, + S23, S24, S25, S26, S27, S28, S29, S30, S31]>; + +// ARM requires only word alignment for double. It's more performant if it +// is double-word alignment though. +def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, + D9, D10, D11, D12, D13, D14, D15]>; + +// Condition code registers. +def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h new file mode 100644 index 0000000..2cc2950 --- /dev/null +++ b/lib/Target/ARM/ARMRelocations.h @@ -0,0 +1,56 @@ +//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMRELOCATIONS_H +#define ARMRELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace ARM { + enum RelocationType { + // reloc_arm_absolute - Absolute relocation, just add the relocated value + // to the value already in memory. + reloc_arm_absolute, + + // reloc_arm_relative - PC relative relocation, add the relocated value to + // the value already in memory, after we adjust it for where the PC is. + reloc_arm_relative, + + // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose + // addresses are kept locally in a map. + reloc_arm_cp_entry, + + // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset + // should be divided by 4. + reloc_arm_vfp_cp_entry, + + // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool + // entry. + reloc_arm_machine_cp_entry, + + // reloc_arm_jt_base - PC relative relocation for jump tables whose + // addresses are kept locally in a map. + reloc_arm_jt_base, + + // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base. + reloc_arm_pic_jt, + + // reloc_arm_branch - Branch address relocation. + reloc_arm_branch + }; + } +} + +#endif + diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp new file mode 100644 index 0000000..ef78cd5 --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -0,0 +1,84 @@ +//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "ARMSubtarget.h" +#include "ARMGenSubtarget.inc" +#include "llvm/Module.h" +using namespace llvm; + +ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, + bool isThumb) + : ARMArchVersion(V4T) + , ARMFPUType(None) + , IsThumb(isThumb) + , ThumbMode(Thumb1) + , UseThumbBacktraces(false) + , IsR9Reserved(false) + , stackAlignment(4) + , CPUString("generic") + , TargetType(isELF) // Default to ELF unless otherwise specified. + , TargetABI(ARM_ABI_APCS) { + // Determine default and user specified characteristics + + // Parse features string. + CPUString = ParseSubtargetFeatures(FS, CPUString); + + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + const std::string& TT = M.getTargetTriple(); + unsigned Len = TT.length(); + unsigned Idx = 0; + + if (Len >= 5 && TT.substr(0, 4) == "armv") + Idx = 4; + else if (Len >= 6 && TT.substr(0, 6) == "thumb") { + IsThumb = true; + if (Len >= 7 && TT[5] == 'v') + Idx = 6; + } + if (Idx) { + unsigned SubVer = TT[Idx]; + if (SubVer > '4' && SubVer <= '9') { + if (SubVer >= '7') + ARMArchVersion = V7A; + else if (SubVer == '6') + ARMArchVersion = V6; + else if (SubVer == '5') { + ARMArchVersion = V5T; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchVersion = V5TE; + } + } + } + + if (Len >= 10) { + if (TT.find("-darwin") != std::string::npos) + // arm-darwin + TargetType = isDarwin; + } else if (TT.empty()) { +#if defined(__APPLE__) + TargetType = isDarwin; +#endif + } + + if (TT.find("eabi") != std::string::npos) + TargetABI = ARM_ABI_AAPCS; + + if (isAAPCS_ABI()) + stackAlignment = 8; + + if (isTargetDarwin()) { + UseThumbBacktraces = true; + IsR9Reserved = true; + } +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h new file mode 100644 index 0000000..8b469cf --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.h @@ -0,0 +1,122 @@ +//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSUBTARGET_H +#define ARMSUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include <string> + +namespace llvm { +class Module; + +class ARMSubtarget : public TargetSubtarget { +protected: + enum ARMArchEnum { + V4T, V5T, V5TE, V6, V7A + }; + + enum ARMFPEnum { + None, VFPv2, VFPv3, NEON + }; + + enum ThumbTypeEnum { + Thumb1, + Thumb2 + }; + + /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE, + /// V6, V6T2, V7A. + ARMArchEnum ARMArchVersion; + + /// ARMFPUType - Floating Point Unit type. + ARMFPEnum ARMFPUType; + + /// IsThumb - True if we are in thumb mode, false if in ARM mode. + bool IsThumb; + + /// ThumbMode - Indicates supported Thumb version. + ThumbTypeEnum ThumbMode; + + /// UseThumbBacktraces - True if we use thumb style backtraces. + bool UseThumbBacktraces; + + /// IsR9Reserved - True if R9 is a not available as general purpose register. + bool IsR9Reserved; + + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned stackAlignment; + + /// CPUString - String name of used CPU. + std::string CPUString; + + public: + enum { + isELF, isDarwin + } TargetType; + + enum { + ARM_ABI_APCS, + ARM_ABI_AAPCS // ARM EABI + } TargetABI; + + /// This constructor initializes the data members to match that + /// of the specified module. + /// + ARMSubtarget(const Module &M, const std::string &FS, bool isThumb); + + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. + unsigned getMaxInlineSizeThreshold() const { + // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. + // Change this once Thumb ldmia / stmia support is added. + return isThumb() ? 0 : 64; + } + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool hasV4TOps() const { return ARMArchVersion >= V4T; } + bool hasV5TOps() const { return ARMArchVersion >= V5T; } + bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } + bool hasV6Ops() const { return ARMArchVersion >= V6; } + bool hasV7Ops() const { return ARMArchVersion >= V7A; } + + bool hasVFP2() const { return ARMFPUType >= VFPv2; } + bool hasVFP3() const { return ARMFPUType >= VFPv3; } + bool hasNEON() const { return ARMFPUType >= NEON; } + + bool isTargetDarwin() const { return TargetType == isDarwin; } + bool isTargetELF() const { return TargetType == isELF; } + + bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } + bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } + + bool isThumb() const { return IsThumb; } + bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + + bool useThumbBacktraces() const { return UseThumbBacktraces; } + bool isR9Reserved() const { return IsR9Reserved; } + + const std::string & getCPUString() const { return CPUString; } + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return stackAlignment; } +}; +} // End llvm namespace + +#endif // ARMSUBTARGET_H diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp new file mode 100644 index 0000000..4107dcc --- /dev/null +++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp @@ -0,0 +1,291 @@ +//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the ARMTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetAsmInfo.h" +#include "ARMTargetMachine.h" +#include <cstring> +#include <cctype> +using namespace llvm; + + +const char *const llvm::arm_asm_table[] = { + "{r0}", "r0", + "{r1}", "r1", + "{r2}", "r2", + "{r3}", "r3", + "{r4}", "r4", + "{r5}", "r5", + "{r6}", "r6", + "{r7}", "r7", + "{r8}", "r8", + "{r9}", "r9", + "{r10}", "r10", + "{r11}", "r11", + "{r12}", "r12", + "{r13}", "r13", + "{r14}", "r14", + "{lr}", "lr", + "{sp}", "sp", + "{ip}", "ip", + "{fp}", "fp", + "{sl}", "sl", + "{memory}", "memory", + "{cc}", "cc", + 0,0}; + +ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM): + ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LessPrivateGlobalPrefix = "l"; + StringConstantPrefix = "\1LC"; + BSSSection = 0; // no BSS section + ZeroDirective = "\t.space\t"; + ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill + SetDirective = "\t.set\t"; + WeakRefDirective = "\t.weak_reference\t"; + WeakDefDirective = "\t.weak_definition "; + HiddenDirective = "\t.private_extern\t"; + ProtectedDirective = NULL; + JumpTableDataSection = ".const"; + CStringSection = "\t.cstring"; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + NeedsIndirectEncoding = true; + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorsSection = ".constructor"; + StaticDtorsSection = ".destructor"; + } else { + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + } + + // In non-PIC modes, emit a special label before jump tables so that the + // linker can perform more accurate dead code stripping. + if (TM.getRelocationModel() != Reloc::PIC_) { + // Emit a local label that is preserved until the linker runs. + JumpTableSpecialLabelPrefix = "l"; + } + + NeedsSet = true; + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; +} + +ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): + ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + + NeedsSet = false; + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; + CStringSection = ".rodata.str"; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + DwarfRequiresFrameSection = false; + DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",%progbits"; + DwarfInfoSection = "\t.section\t.debug_info,\"\",%progbits"; + DwarfLineSection = "\t.section\t.debug_line,\"\",%progbits"; + DwarfFrameSection = "\t.section\t.debug_frame,\"\",%progbits"; + DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits"; + DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits"; + DwarfStrSection = "\t.section\t.debug_str,\"\",%progbits"; + DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits"; + DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits"; + DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits"; + DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; + + if (Subtarget->isAAPCS_ABI()) { + StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array"; + StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array"; + } else { + StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; + StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; + } +} + +/// Count the number of comma-separated arguments. +/// Do not try to detect errors. +template <class BaseTAI> +unsigned ARMTargetAsmInfo<BaseTAI>::countArguments(const char* p) const { + unsigned count = 0; + while (*p && isspace(*p) && *p != '\n') + p++; + count++; + while (*p && *p!='\n' && + strncmp(p, BaseTAI::CommentString, + strlen(BaseTAI::CommentString))!=0) { + if (*p==',') + count++; + p++; + } + return count; +} + +/// Count the length of a string enclosed in quote characters. +/// Do not try to detect errors. +template <class BaseTAI> +unsigned ARMTargetAsmInfo<BaseTAI>::countString(const char* p) const { + unsigned count = 0; + while (*p && isspace(*p) && *p!='\n') + p++; + if (!*p || *p != '\"') + return count; + while (*++p && *p != '\"') + count++; + return count; +} + +/// ARM-specific version of TargetAsmInfo::getInlineAsmLength. +template <class BaseTAI> +unsigned ARMTargetAsmInfo<BaseTAI>::getInlineAsmLength(const char *s) const { + // Make a lowercase-folded version of s for counting purposes. + char *q, *s_copy = (char *)malloc(strlen(s) + 1); + strcpy(s_copy, s); + for (q=s_copy; *q; q++) + *q = tolower(*q); + const char *Str = s_copy; + + // Count the number of bytes in the asm. + bool atInsnStart = true; + bool inTextSection = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (atInsnStart) { + // Skip whitespace + while (*Str && isspace(*Str) && *Str != '\n') + Str++; + // Skip label + for (const char* p = Str; *p && !isspace(*p); p++) + if (*p == ':') { + Str = p+1; + while (*Str && isspace(*Str) && *Str != '\n') + Str++; + break; + } + + if (*Str == 0) break; + + // Ignore everything from comment char(s) to EOL + if (strncmp(Str, BaseTAI::CommentString, + strlen(BaseTAI::CommentString)) == 0) + atInsnStart = false; + // FIXME do something like the following for non-Darwin + else if (*Str == '.' && Subtarget->isTargetDarwin()) { + // Directive. + atInsnStart = false; + + // Some change the section, but don't generate code. + if (strncmp(Str, ".literal4", strlen(".literal4"))==0 || + strncmp(Str, ".literal8", strlen(".literal8"))==0 || + strncmp(Str, ".const", strlen(".const"))==0 || + strncmp(Str, ".constructor", strlen(".constructor"))==0 || + strncmp(Str, ".cstring", strlen(".cstring"))==0 || + strncmp(Str, ".data", strlen(".data"))==0 || + strncmp(Str, ".destructor", strlen(".destructor"))==0 || + strncmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 || + strncmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 || + strncmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 || + strncmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 || + strncmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 || + strncmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 || + strncmp(Str, ".static_data", strlen(".static_data"))==0 || + strncmp(Str, ".section", strlen(".section"))==0 || + strncmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 || + strncmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 || + strncmp(Str, ".dyld", strlen(".dyld"))==0 || + strncmp(Str, ".const_data", strlen(".const_data"))==0 || + strncmp(Str, ".objc", strlen(".objc"))==0 || //// many directives + strncmp(Str, ".static_const", strlen(".static_const"))==0) + inTextSection=false; + else if (strncmp(Str, ".text", strlen(".text"))==0) + inTextSection = true; + // Some can't really be handled without implementing significant pieces + // of an assembler. Others require dynamic adjustment of block sizes in + // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every + // instruction in there, and none of these are currently used in the kernel. + else if (strncmp(Str, ".macro", strlen(".macro"))==0 || + strncmp(Str, ".if", strlen(".if"))==0 || + strncmp(Str, ".align", strlen(".align"))==0 || + strncmp(Str, ".fill", strlen(".fill"))==0 || + strncmp(Str, ".space", strlen(".space"))==0 || + strncmp(Str, ".zerofill", strlen(".zerofill"))==0 || + strncmp(Str, ".p2align", strlen(".p2align"))==0 || + strncmp(Str, ".p2alignw", strlen(".p2alignw"))==0 || + strncmp(Str, ".p2alignl", strlen(".p2alignl"))==0 || + strncmp(Str, ".align32", strlen(".p2align32"))==0 || + strncmp(Str, ".include", strlen(".include"))==0) + cerr << "Directive " << Str << " in asm may lead to invalid offsets for" << + " constant pools (the assembler will tell you if this happens).\n"; + // Some generate code, but this is only interesting in the text section. + else if (inTextSection) { + if (strncmp(Str, ".long", strlen(".long"))==0) + Length += 4*countArguments(Str+strlen(".long")); + else if (strncmp(Str, ".short", strlen(".short"))==0) + Length += 2*countArguments(Str+strlen(".short")); + else if (strncmp(Str, ".byte", strlen(".byte"))==0) + Length += 1*countArguments(Str+strlen(".byte")); + else if (strncmp(Str, ".single", strlen(".single"))==0) + Length += 4*countArguments(Str+strlen(".single")); + else if (strncmp(Str, ".double", strlen(".double"))==0) + Length += 8*countArguments(Str+strlen(".double")); + else if (strncmp(Str, ".quad", strlen(".quad"))==0) + Length += 16*countArguments(Str+strlen(".quad")); + else if (strncmp(Str, ".ascii", strlen(".ascii"))==0) + Length += countString(Str+strlen(".ascii")); + else if (strncmp(Str, ".asciz", strlen(".asciz"))==0) + Length += countString(Str+strlen(".asciz"))+1; + } + } else if (inTextSection) { + // An instruction + atInsnStart = false; + if (Subtarget->isThumb()) { + // BL and BLX <non-reg> are 4 bytes, all others 2. + if (strncmp(Str, "blx", strlen("blx"))==0) { + const char* p = Str+3; + while (*p && isspace(*p)) + p++; + if (*p == 'r' || *p=='R') + Length += 2; // BLX reg + else + Length += 4; // BLX non-reg + } else if (strncmp(Str, "bl", strlen("bl"))==0) + Length += 4; // BL + else + Length += 2; // Thumb anything else + } + else + Length += 4; // ARM + } + } + if (*Str == '\n' || *Str == BaseTAI::SeparatorChar) + atInsnStart = true; + } + free(s_copy); + return Length; +} + +// Instantiate default implementation. +TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>); diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h new file mode 100644 index 0000000..9e6f856 --- /dev/null +++ b/lib/Target/ARM/ARMTargetAsmInfo.h @@ -0,0 +1,64 @@ +//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMTargetAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMTARGETASMINFO_H +#define ARMTARGETASMINFO_H + +#include "ARMTargetMachine.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/ELFTargetAsmInfo.h" +#include "llvm/Target/DarwinTargetAsmInfo.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + + extern const char *const arm_asm_table[]; + + template <class BaseTAI> + struct ARMTargetAsmInfo : public BaseTAI { + explicit ARMTargetAsmInfo(const ARMTargetMachine &TM): + BaseTAI(TM) { + BaseTAI::AsmTransCBE = arm_asm_table; + + BaseTAI::AlignmentIsInBytes = false; + BaseTAI::Data64bitsDirective = 0; + BaseTAI::CommentString = "@"; + BaseTAI::ConstantPoolSection = "\t.text\n"; + BaseTAI::COMMDirectiveTakesAlignment = false; + BaseTAI::InlineAsmStart = "@ InlineAsm Start"; + BaseTAI::InlineAsmEnd = "@ InlineAsm End"; + BaseTAI::LCOMMDirective = "\t.lcomm\t"; + } + + const ARMSubtarget *Subtarget; + + virtual unsigned getInlineAsmLength(const char *Str) const; + unsigned countArguments(const char *p) const; + unsigned countString(const char *p) const; + }; + + typedef ARMTargetAsmInfo<TargetAsmInfo> ARMGenericTargetAsmInfo; + + EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>); + + struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> { + explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM); + }; + + struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> { + explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp new file mode 100644 index 0000000..1dc7d19 --- /dev/null +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -0,0 +1,242 @@ +//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetMachine.h" +#include "ARMTargetAsmInfo.h" +#include "ARMFrameInfo.h" +#include "ARM.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, + cl::desc("Disable load store optimization pass")); +static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, + cl::desc("Disable if-conversion pass")); + +/// ARMTargetMachineModule - Note that this is used on hosts that cannot link +/// in a library unless there are references into the library. In particular, +/// it seems that it is not possible to get things to work on Win32 without +/// this. Though it is unused, do not remove it. +extern "C" int ARMTargetMachineModule; +int ARMTargetMachineModule = 0; + +// Register the target. +static RegisterTarget<ARMTargetMachine> X("arm", "ARM"); +static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb"); + +// No assembler printer by default +ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0; + +/// ThumbTargetMachine - Create an Thumb architecture model. +/// +unsigned ThumbTargetMachine::getJITMatchQuality() { +#if defined(__thumb__) + return 10; +#endif + return 0; +} + +unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + // Match thumb-foo-bar, as well as things like thumbv5blah-* + if (TT.size() >= 6 && + (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv")) + return 20; + + // If the target triple is something non-thumb, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + +ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS) + : ARMTargetMachine(M, FS, true) { +} + +/// TargetMachine ctor - Create an ARM architecture model. +/// +ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS, + bool isThumb) + : Subtarget(M, FS, isThumb), + DataLayout(Subtarget.isAPCS_ABI() ? + // APCS ABI + (isThumb ? + std::string("e-p:32:32-f64:32:32-i64:32:32-" + "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + std::string("e-p:32:32-f64:32:32-i64:32:32")) : + // AAPCS ABI + (isThumb ? + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + std::string("e-p:32:32-f64:64:64-i64:64:64"))), + InstrInfo(Subtarget), + FrameInfo(Subtarget), + JITInfo(), + TLInfo(*this) { + DefRelocModel = getRelocationModel(); +} + +unsigned ARMTargetMachine::getJITMatchQuality() { +#if defined(__arm__) + return 10; +#endif + return 0; +} + +unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) { + std::string TT = M.getTargetTriple(); + // Match arm-foo-bar, as well as things like armv5blah-* + if (TT.size() >= 4 && + (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv")) + return 20; + // If the target triple is something non-arm, we don't match. + if (!TT.empty()) return 0; + + if (M.getEndianness() == Module::LittleEndian && + M.getPointerSize() == Module::Pointer32) + return 10; // Weak match + else if (M.getEndianness() != Module::AnyEndianness || + M.getPointerSize() != Module::AnyPointerSize) + return 0; // Match for some other target + + return getJITMatchQuality()/2; +} + + +const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const { + switch (Subtarget.TargetType) { + case ARMSubtarget::isDarwin: + return new ARMDarwinTargetAsmInfo(*this); + case ARMSubtarget::isELF: + return new ARMELFTargetAsmInfo(*this); + default: + return new ARMGenericTargetAsmInfo(*this); + } +} + + +// Pass Pipeline Configuration +bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createARMISelDag(*this)); + return false; +} + +bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass()); + + if (OptLevel != CodeGenOpt::None && + !DisableIfConversion && !Subtarget.isThumb()) + PM.add(createIfConverterPass()); + + PM.add(createARMConstantIslandPass()); + return true; +} + +bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + raw_ostream &Out) { + // Output assembly language. + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + + return false; +} + + +bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // FIXME: Move this to TargetJITInfo! + if (DefRelocModel == Reloc::Default) + setRelocationModel(Reloc::Static); + + // Machine code emitter pass for ARM. + PM.add(createARMCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // FIXME: Move this to TargetJITInfo! + if (DefRelocModel == Reloc::Default) + setRelocationModel(Reloc::Static); + + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE) { + // Machine code emitter pass for ARM. + PM.add(createARMCodeEmitterPass(*this, MCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + +bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &JCE) { + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } + + return false; +} + + diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h new file mode 100644 index 0000000..916a8aa --- /dev/null +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -0,0 +1,104 @@ +//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ARM specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMTARGETMACHINE_H +#define ARMTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "ARMInstrInfo.h" +#include "ARMFrameInfo.h" +#include "ARMJITInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" + +namespace llvm { + +class Module; + +class ARMTargetMachine : public LLVMTargetMachine { + ARMSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + ARMInstrInfo InstrInfo; + ARMFrameInfo FrameInfo; + ARMJITInfo JITInfo; + ARMTargetLowering TLInfo; + Reloc::Model DefRelocModel; // Reloc model before it's overridden. + +protected: + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + ARMTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + +public: + ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false); + + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual ARMJITInfo *getJITInfo() { return &JITInfo; } + virtual const ARMRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual ARMTargetLowering *getTargetLowering() const { + return const_cast<ARMTargetLowering*>(&TLInfo); + } + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } + + static unsigned getModuleMatchQuality(const Module &M); + static unsigned getJITMatchQuality(); + + virtual const TargetAsmInfo *createTargetAsmInfo() const; + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + bool DumpAsm, JITCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + MachineCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DumpAsm, + JITCodeEmitter &MCE); +}; + +/// ThumbTargetMachine - Thumb target machine. +/// +class ThumbTargetMachine : public ARMTargetMachine { +public: + ThumbTargetMachine(const Module &M, const std::string &FS); + + static unsigned getJITMatchQuality(); + static unsigned getModuleMatchQuality(const Module &M); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp new file mode 100644 index 0000000..d908cf4 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -0,0 +1,1117 @@ +//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format ARM assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "ARM.h" +#include "ARMBuildAttrs.h" +#include "ARMTargetMachine.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { + DwarfWriter *DW; + MachineModuleInfo *MMI; + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when printing asm code for different targets. + const ARMSubtarget *Subtarget; + + /// AFI - Keep a pointer to ARMFunctionInfo for the current + /// MachineFunction. + ARMFunctionInfo *AFI; + + /// MCP - Keep a pointer to constantpool entries of the current + /// MachineFunction. + const MachineConstantPool *MCP; + + /// We name each basic block in a Function with a unique number, so + /// that we can consistently refer to them later. This is cleared + /// at the beginning of each call to runOnMachineFunction(). + /// + typedef std::map<const Value *, unsigned> ValueMapTy; + ValueMapTy NumberForBB; + + /// GVNonLazyPtrs - Keeps the set of GlobalValues that require + /// non-lazy-pointers for indirect access. + StringSet<> GVNonLazyPtrs; + + /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden + /// visibility that require non-lazy-pointers for indirect access. + StringSet<> HiddenGVNonLazyPtrs; + + /// FnStubs - Keeps the set of external function GlobalAddresses that the + /// asm printer should generate stubs for. + StringSet<> FnStubs; + + /// True if asm printer is printing a series of CONSTPOOL_ENTRY. + bool InCPMode; + public: + explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, + const TargetAsmInfo *T, CodeGenOpt::Level OL, + bool V) + : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL), + InCPMode(false) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + } + + virtual const char *getPassName() const { + return "ARM Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printSOImmOperand(const MachineInstr *MI, int opNum); + void printSOImm2PartOperand(const MachineInstr *MI, int opNum); + void printSORegOperand(const MachineInstr *MI, int opNum); + void printAddrMode2Operand(const MachineInstr *MI, int OpNo); + void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode3Operand(const MachineInstr *MI, int OpNo); + void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode4Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrMode5Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrModePCOperand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo, + unsigned Scale); + void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo); + void printPredicateOperand(const MachineInstr *MI, int opNum); + void printSBitModifierOperand(const MachineInstr *MI, int opNum); + void printPCLabel(const MachineInstr *MI, int opNum); + void printRegisterList(const MachineInstr *MI, int opNum); + void printCPInstOperand(const MachineInstr *MI, int opNum, + const char *Modifier); + void printJTBlockOperand(const MachineInstr *MI, int opNum); + + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode); + + void printModuleLevelGV(const GlobalVariable* GVar); + bool printInstruction(const MachineInstr *MI); // autogenerated. + void printMachineInstruction(const MachineInstr *MI); + bool runOnMachineFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + /// EmitMachineConstantPoolValue - Print a machine constantpool value to + /// the .s file. + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + printDataDirective(MCPV->getType()); + + ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); + GlobalValue *GV = ACPV->getGV(); + std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix(); + if (!GV) + Name += ACPV->getSymbol(); + if (ACPV->isNonLazyPointer()) { + if (GV->hasHiddenVisibility()) + HiddenGVNonLazyPtrs.insert(Name); + else + GVNonLazyPtrs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } else if (ACPV->isStub()) { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + } else + O << Name; + if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; + if (ACPV->getPCAdjustment() != 0) { + O << "-(" << TAI->getPrivateGlobalPrefix() << "PC" + << utostr(ACPV->getLabelId()) + << "+" << (unsigned)ACPV->getPCAdjustment(); + if (ACPV->mustAddCurrentAddress()) + O << "-."; + O << ")"; + } + O << "\n"; + + // If the constant pool value is a extern weak symbol, remember to emit + // the weak reference. + if (GV && GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<DwarfWriter>(); + } + }; +} // end of anonymous namespace + +#include "ARMGenAsmWriter.inc" + +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + AFI = MF.getInfo<ARMFunctionInfo>(); + MCP = MF.getConstantPool(); + + SetupMachineFunction(MF); + O << "\n"; + + // NOTE: we don't print out constant pools here, they are handled as + // instructions. + + O << "\n"; + // Print out labels for the function. + const Function *F = MF.getFunction(); + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::PrivateLinkage: + case Function::InternalLinkage: + SwitchToTextSection("\t.text", F); + break; + case Function::ExternalLinkage: + SwitchToTextSection("\t.text", F); + O << "\t.globl\t" << CurrentFnName << "\n"; + break; + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + if (Subtarget->isTargetDarwin()) { + SwitchToTextSection( + ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F); + O << "\t.globl\t" << CurrentFnName << "\n"; + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + } else { + O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + } + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + if (AFI->isThumbFunction()) { + EmitAlignment(1, F, AFI->getAlign()); + O << "\t.code\t16\n"; + O << "\t.thumb_func"; + if (Subtarget->isTargetDarwin()) + O << "\t" << CurrentFnName; + O << "\n"; + InCPMode = false; + } else + EmitAlignment(2, F); + + O << CurrentFnName << ":\n"; + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + if (Subtarget->isTargetDarwin()) { + // If the function is empty, then we need to emit *something*. Otherwise, + // the function's label might be associated with something that it wasn't + // meant to be associated with. We emit a noop in this situation. + MachineFunction::iterator I = MF.begin(); + + if (++I == MF.end() && MF.front().empty()) + O << "\tnop\n"; + } + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + if (I != MF.begin()) { + printBasicBlockLabel(I, true, true, VerboseAsm); + O << '\n'; + } + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n"; + + // Emit post-function debug information. + DW->EndFunction(&MF); + + O.flush(); + + return false; +} + +void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + else + assert(0 && "not implemented"); + break; + case MachineOperand::MO_Immediate: { + if (!Modifier || strcmp(Modifier, "no_hash") != 0) + O << "#"; + + O << MO.getImm(); + break; + } + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + if (isExt && isCallOp && Subtarget->isTargetDarwin() && + TM.getRelocationModel() != Reloc::Static) { + printSuffixedName(Name, "$stub"); + FnStubs.insert(Name); + } else + O << Name; + + printOffset(MO.getOffset()); + + if (isCallOp && Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_) + O << "(PLT)"; + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + break; + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + if (isCallOp && Subtarget->isTargetDarwin() && + TM.getRelocationModel() != Reloc::Static) { + printSuffixedName(Name, "$stub"); + FnStubs.insert(Name); + } else + O << Name; + if (isCallOp && Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_) + O << "(PLT)"; + break; + } + case MachineOperand::MO_ConstantPoolIndex: + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + O << "<unknown operand type>"; abort (); break; + } +} + +static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, + const TargetAsmInfo *TAI) { + assert(V < (1 << 12) && "Not a valid so_imm value!"); + unsigned Imm = ARM_AM::getSOImmValImm(V); + unsigned Rot = ARM_AM::getSOImmValRot(V); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + if (VerboseAsm) + O << ' ' << TAI->getCommentString() + << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + } else { + O << "#" << Imm; + } +} + +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + printSOImm(O, MO.getImm(), VerboseAsm, TAI); +} + +/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' +/// followed by an 'orr' to materialize. +void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm()); + printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI); + O << "\n\torr"; + printPredicateOperand(MI, 2); + O << " "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 0); + O << ", "; + printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI); +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) + << " "; + + if (MO2.getReg()) { + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + +void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ImmOffs; + return; + } + + O << (char)ARM_AM::getAM2Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) + << " #" << ShImm; +} + +void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (MO2.getReg()) { + O << ", " + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).AsmName + << "]"; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ImmOffs; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ImmOffs; +} + +void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Modifier && strcmp(Modifier, "submode") == 0) { + if (MO1.getReg() == ARM::SP) { + bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_RET); + O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + } else { + printOperand(MI, Op); + if (ARM_AM::getAM4WBFlag(MO2.getImm())) + O << "!"; + } +} + +void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + + if (Modifier && strcmp(Modifier, "submode") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); + if (MO1.getReg() == ARM::SP) { + bool isFLDM = (MI->getOpcode() == ARM::FLDMD || + MI->getOpcode() == ARM::FLDMS); + O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + return; + } else if (Modifier && strcmp(Modifier, "base") == 0) { + // Used for FSTM{D|S} and LSTM{D|S} operations. + O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (ARM_AM::getAM5WBFlag(MO2.getImm())) + O << "!"; + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ImmOffs*4; + } + O << "]"; +} + +void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, + const char *Modifier) { + if (Modifier && strcmp(Modifier, "label") == 0) { + printPCLabel(MI, Op+1); + return; + } + + const MachineOperand &MO1 = MI->getOperand(Op); + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, + unsigned Scale) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (MO3.getReg()) + O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName; + else if (unsigned ImmOffs = MO2.getImm()) { + O << ", #" << ImmOffs; + if (Scale > 1) + O << " * " << Scale; + } + O << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 1); +} +void +ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 2); +} +void +ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs << " * 4"; + O << "]"; +} + +void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm(); + if (CC != ARMCC::AL) + O << ARMCondCodeToString(CC); +} + +void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){ + unsigned Reg = MI->getOperand(opNum).getReg(); + if (Reg) { + assert(Reg == ARM::CPSR && "Expect ARM CPSR register!"); + O << 's'; + } +} + +void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) { + int Id = (int)MI->getOperand(opNum).getImm(); + O << TAI->getPrivateGlobalPrefix() << "PC" << Id; +} + +void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) { + O << "{"; + for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) { + printOperand(MI, i); + if (i != e-1) O << ", "; + } + O << "}"; +} + +void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo, + const char *Modifier) { + assert(Modifier && "This operand only works with a modifier!"); + // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the + // data itself. + if (!strcmp(Modifier, "label")) { + unsigned ID = MI->getOperand(OpNo).getImm(); + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << ID << ":\n"; + } else { + assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); + unsigned CPI = MI->getOperand(OpNo).getIndex(); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + + if (MCPE.isMachineConstantPoolEntry()) { + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + } else { + EmitGlobalConstant(MCPE.Val.ConstVal); + // remember to emit the weak reference + if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + } + } +} + +void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id + unsigned JTI = MO1.getIndex(); + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ":\n"; + + const char *JTEntryDirective = TAI->getJumpTableDirective(); + if (!JTEntryDirective) + JTEntryDirective = TAI->getData32bitsDirective(); + + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; + std::set<MachineBasicBlock*> JTSets; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (UseSet && JTSets.insert(MBB).second) + printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); + + O << JTEntryDirective << ' '; + if (UseSet) + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() + << "_set_" << MBB->getNumber(); + else if (TM.getRelocationModel() == Reloc::PIC_) { + printBasicBlockLabel(MBB, false, false, false); + // If the arch uses custom Jump Table directives, don't calc relative to JT + if (!TAI->getJumpTableDirective()) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); + } else + printBasicBlockLabel(MBB, false, false, false); + if (i != e-1) + O << '\n'; + } +} + + +bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode){ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'a': // Don't print "#" before a global var name or constant. + case 'c': // Don't print "$" before a global var name or constant. + printOperand(MI, OpNo, "no_hash"); + return false; + case 'P': // Print a VFP double precision register. + printOperand(MI, OpNo); + return false; + case 'Q': + if (TM.getTargetData()->isLittleEndian()) + break; + // Fallthrough + case 'R': + if (TM.getTargetData()->isBigEndian()) + break; + // Fallthrough + case 'H': // Write second word of DI / DF reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + } + } + + printOperand(MI, OpNo); + return false; +} + +bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printAddrMode2Operand(MI, OpNo); + return false; +} + +void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + int Opc = MI->getOpcode(); + switch (Opc) { + case ARM::CONSTPOOL_ENTRY: + if (!InCPMode && AFI->isThumbFunction()) { + EmitAlignment(2); + InCPMode = true; + } + break; + default: { + if (InCPMode && AFI->isThumbFunction()) + InCPMode = false; + }} + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + +bool ARMAsmPrinter::doInitialization(Module &M) { + + bool Result = AsmPrinter::doInitialization(M); + + // Emit initial debug information. + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMI); + DW = getAnalysisIfAvailable<DwarfWriter>(); + assert(DW && "Dwarf Writer is not available"); + DW->BeginModule(&M, MMI, O, this, TAI); + + // Darwin wants symbols to be quoted if they have complex names. + if (Subtarget->isTargetDarwin()) + Mang->setUseQuotes(true); + + // Emit ARM Build Attributes + if (Subtarget->isTargetELF()) { + // CPU Type + std::string CPUString = Subtarget->getCPUString(); + if (CPUString != "generic") + O << "\t.cpu " << CPUString << '\n'; + + // FIXME: Emit FPU type + if (Subtarget->hasVFP2()) + O << "\t.eabi_attribute " << ARMBuildAttrs::VFP_arch << ", 2\n"; + + // Signal various FP modes. + if (!UnsafeFPMath) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_denormal << ", 1\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_exceptions << ", 1\n"; + + if (FiniteOnlyFPMath()) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 1\n"; + else + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 3\n"; + + // 8-bytes alignment stuff. + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n"; + + // FIXME: Should we signal R9 usage? + } + + return Result; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; +} + +void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) // External global require no code + return; + + // Check to see if this is a special global used by LLVM, if so, emit it. + + if (EmitSpecialLLVMGlobal(GVar)) { + if (Subtarget->isTargetDarwin() && + TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + std::string name = Mang->getValueName(GVar); + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + bool isDarwin = Subtarget->isTargetDarwin(); + + printVisibility(name, GVar->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type " << name << ",%object\n"; + + if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() && + !(isDarwin && + TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) { + // FIXME: This seems to be pretty darwin-specific + + if (GVar->hasExternalLinkage()) { + SwitchToSection(TAI->SectionForGlobal(GVar)); + if (const char *Directive = TAI->getZeroFillDirective()) { + O << "\t.globl\t" << name << "\n"; + O << Directive << "__DATA, __common, " << name << ", " + << Size << ", " << Align << "\n"; + return; + } + } + + if (GVar->hasLocalLinkage() || GVar->isWeakForLinker()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (isDarwin) { + if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << "," << Size + << ',' << Align; + } else if (GVar->hasCommonLinkage()) { + O << TAI->getCOMMDirective() << name << "," << Size + << ',' << Align; + } else { + SwitchToSection(TAI->SectionForGlobal(GVar)); + O << "\t.globl " << name << '\n' + << TAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << ' '; + PrintUnmangledNameSafely(GVar, O); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } + } else if (TAI->getLCOMMDirective() != NULL) { + if (GVar->hasLocalLinkage()) { + O << TAI->getLCOMMDirective() << name << "," << Size; + } else { + O << TAI->getCOMMDirective() << name << "," << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } else { + SwitchToSection(TAI->SectionForGlobal(GVar)); + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << "\n"; + O << TAI->getCOMMDirective() << name << "," << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + if (VerboseAsm) { + O << "\t\t" << TAI->getCommentString() << " "; + PrintUnmangledNameSafely(GVar, O); + } + O << "\n"; + return; + } + } + + SwitchToSection(TAI->SectionForGlobal(GVar)); + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + if (isDarwin) { + O << "\t.globl " << name << "\n" + << "\t.weak_definition " << name << "\n"; + } else { + O << "\t.weak " << name << "\n"; + } + break; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + O << "\t.globl " << name << "\n"; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + break; + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << TAI->getCommentString() << " "; + PrintUnmangledNameSafely(GVar, O); + } + O << "\n"; + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size " << name << ", " << Size << "\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; +} + + +bool ARMAsmPrinter::doFinalization(Module &M) { + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I); + + if (Subtarget->isTargetDarwin()) { + SwitchToDataSection(""); + + // Output stubs for dynamically-linked functions + for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i) { + if (TM.getRelocationModel() == Reloc::PIC_) + SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs," + "none,16", 0); + else + SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs," + "none,12", 0); + + EmitAlignment(2); + O << "\t.code\t32\n"; + + const char *p = i->getKeyData(); + printSuffixedName(p, "$stub"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\tldr ip, "; + printSuffixedName(p, "$slp"); + O << "\n"; + if (TM.getRelocationModel() == Reloc::PIC_) { + printSuffixedName(p, "$scv"); + O << ":\n"; + O << "\tadd ip, pc, ip\n"; + } + O << "\tldr pc, [ip, #0]\n"; + printSuffixedName(p, "$slp"); + O << ":\n"; + O << "\t.long\t"; + printSuffixedName(p, "$lazy_ptr"); + if (TM.getRelocationModel() == Reloc::PIC_) { + O << "-("; + printSuffixedName(p, "$scv"); + O << "+8)\n"; + } else + O << "\n"; + SwitchToDataSection(".lazy_symbol_pointer", 0); + printSuffixedName(p, "$lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\t.long\tdyld_stub_binding_helper\n"; + } + O << "\n"; + + // Output non-lazy-pointers for external and common global variables. + if (!GVNonLazyPtrs.empty()) { + SwitchToDataSection("\t.non_lazy_symbol_pointer", 0); + for (StringSet<>::iterator i = GVNonLazyPtrs.begin(), + e = GVNonLazyPtrs.end(); i != e; ++i) { + const char *p = i->getKeyData(); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + O << "\t.indirect_symbol " << p << "\n"; + O << "\t.long\t0\n"; + } + } + + if (!HiddenGVNonLazyPtrs.empty()) { + SwitchToSection(TAI->getDataSection()); + for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(), + e = HiddenGVNonLazyPtrs.end(); i != e; ++i) { + const char *p = i->getKeyData(); + EmitAlignment(2); + printSuffixedName(p, "$non_lazy_ptr"); + O << ":\n"; + O << "\t.long " << p << "\n"; + } + } + + + // Emit initial debug information. + DW->EndModule(); + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + } else { + // Emit final debug information for ELF. + DW->EndModule(); + } + + return AsmPrinter::doFinalization(M); +} + +/// createARMCodePrinterPass - Returns a pass that prints the ARM +/// assembly code for a MachineFunction to the given output stream, +/// using the given target machine description. This should work +/// regardless of whether the function is in SSA form. +/// +FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o, + ARMTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose) { + return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); +} + +namespace { + static struct Register { + Register() { + ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass); + } + } Registrator; +} diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000..524a748 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_partially_linked_object(LLVMARMAsmPrinter + ARMAsmPrinter.cpp + ) + +target_name_of_partially_linked_object(LLVMARMCodeGen n) + +add_dependencies(LLVMARMAsmPrinter ${n}) diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile new file mode 100644 index 0000000..ce36cec --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMAsmPrinter + +# Hack: we need to include 'main' arm target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt new file mode 100644 index 0000000..2ac40f5 --- /dev/null +++ b/lib/Target/ARM/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_TARGET_DEFINITIONS ARM.td) + +tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(ARMGenRegisterNames.inc -gen-register-enums) +tablegen(ARMGenRegisterInfo.inc -gen-register-desc) +tablegen(ARMGenInstrNames.inc -gen-instr-enums) +tablegen(ARMGenInstrInfo.inc -gen-instr-desc) +tablegen(ARMGenCodeEmitter.inc -gen-emitter) +tablegen(ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenCallingConv.inc -gen-callingconv) +tablegen(ARMGenSubtarget.inc -gen-subtarget) + +add_llvm_target(ARMCodeGen + ARMCodeEmitter.cpp + ARMConstantIslandPass.cpp + ARMConstantPoolValue.cpp + ARMInstrInfo.cpp + ARMISelDAGToDAG.cpp + ARMISelLowering.cpp + ARMJITInfo.cpp + ARMLoadStoreOptimizer.cpp + ARMRegisterInfo.cpp + ARMSubtarget.cpp + ARMTargetAsmInfo.cpp + ARMTargetMachine.cpp + ) diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile new file mode 100644 index 0000000..9a3b9be --- /dev/null +++ b/lib/Target/ARM/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMARMCodeGen +TARGET = ARM + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ + ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ + ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ + ARMGenDAGISel.inc ARMGenSubtarget.inc \ + ARMGenCodeEmitter.inc ARMGenCallingConv.inc + +DIRS = AsmPrinter + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt new file mode 100644 index 0000000..4d3200b --- /dev/null +++ b/lib/Target/ARM/README-Thumb.txt @@ -0,0 +1,228 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb specific). +//===---------------------------------------------------------------------===// + +* Add support for compiling functions in both ARM and Thumb mode, then taking + the smallest. + +* Add support for compiling individual basic blocks in thumb mode, when in a + larger ARM function. This can be used for presumed cold code, like paths + to abort (failure path of asserts), EH handling code, etc. + +* Thumb doesn't have normal pre/post increment addressing modes, but you can + load/store 32-bit integers with pre/postinc by using load/store multiple + instrs with a single register. + +* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add + and cmp instructions can use high registers. Also, we can use them as + temporaries to spill values into. + +* In thumb mode, short, byte, and bool preferred alignments are currently set + to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple + of 4). + +//===---------------------------------------------------------------------===// + +Potential jumptable improvements: + +* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit + jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the + function is even smaller. This also applies to ARM. + +* Thumb jumptable codegen can improve given some help from the assembler. This + is what we generate right now: + + .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4)) +LPCRELL0: + mov r1, #PCRELV0 + add r1, pc + ldr r0, [r0, r1] + cpy pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + ... + +Note there is another pc relative add that we can take advantage of. + add r1, pc, #imm_8 * 4 + +We should be able to generate: + +LPCRELL0: + add r1, LJTI1_0_0 + ldr r0, [r0, r1] + cpy pc, r0 + .align 2 +LJTI1_0_0: + .long LBB1_3 + +if the assembler can translate the add to: + add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc) + +Note the assembler also does something similar to constpool load: +LPCRELL0: + ldr r0, LCPI1_0 +=> + ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc) + + +//===---------------------------------------------------------------------===// + +We compiles the following: + +define i16 @func_entry_2E_ce(i32 %i) { + switch i32 %i, label %bb12.exitStub [ + i32 0, label %bb4.exitStub + i32 1, label %bb9.exitStub + i32 2, label %bb4.exitStub + i32 3, label %bb4.exitStub + i32 7, label %bb9.exitStub + i32 8, label %bb.exitStub + i32 9, label %bb9.exitStub + ] + +bb12.exitStub: + ret i16 0 + +bb4.exitStub: + ret i16 1 + +bb9.exitStub: + ret i16 2 + +bb.exitStub: + ret i16 3 +} + +into: + +_func_entry_2E_ce: + mov r2, #1 + lsl r2, r0 + cmp r0, #9 + bhi LBB1_4 @bb12.exitStub +LBB1_1: @newFuncRoot + mov r1, #13 + tst r2, r1 + bne LBB1_5 @bb4.exitStub +LBB1_2: @newFuncRoot + ldr r1, LCPI1_0 + tst r2, r1 + bne LBB1_6 @bb9.exitStub +LBB1_3: @newFuncRoot + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + bne LBB1_7 @bb.exitStub +LBB1_4: @bb12.exitStub + mov r0, #0 + bx lr +LBB1_5: @bb4.exitStub + mov r0, #1 + bx lr +LBB1_6: @bb9.exitStub + mov r0, #2 + bx lr +LBB1_7: @bb.exitStub + mov r0, #3 + bx lr +LBB1_8: + .align 2 +LCPI1_0: + .long 642 + + +gcc compiles to: + + cmp r0, #9 + @ lr needed for prologue + bhi L2 + ldr r3, L11 + mov r2, #1 + mov r1, r2, asl r0 + ands r0, r3, r2, asl r0 + movne r0, #2 + bxne lr + tst r1, #13 + beq L9 +L3: + mov r0, r2 + bx lr +L9: + tst r1, #256 + movne r0, #3 + bxne lr +L2: + mov r0, #0 + bx lr +L12: + .align 2 +L11: + .long 642 + + +GCC is doing a couple of clever things here: + 1. It is predicating one of the returns. This isn't a clear win though: in + cases where that return isn't taken, it is replacing one condbranch with + two 'ne' predicated instructions. + 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of + tst. This will probably require whole function isel. + 3. GCC emits: + tst r1, #256 + we emit: + mov r1, #1 + lsl r1, r1, #8 + tst r2, r1 + + +//===---------------------------------------------------------------------===// + +When spilling in thumb mode and the sp offset is too large to fit in the ldr / +str offset field, we load the offset from a constpool entry and add it to sp: + +ldr r2, LCPI +add r2, sp +ldr r2, [r2] + +These instructions preserve the condition code which is important if the spill +is between a cmp and a bcc instruction. However, we can use the (potentially) +cheaper sequnce if we know it's ok to clobber the condition register. + +add r2, sp, #255 * 4 +add r2, #132 +ldr r2, [r2, #7 * 4] + +This is especially bad when dynamic alloca is used. The all fixed size stack +objects are referenced off the frame pointer with negative offsets. See +oggenc for an example. + +//===---------------------------------------------------------------------===// + +We are reserving R3 as a scratch register under thumb mode. So if it is live in +to the function, we save / restore R3 to / from R12. Until register scavenging +is done, we should save R3 to a high callee saved reg at emitPrologue time +(when hasFP is true or stack size is large) and restore R3 from that register +instead. This allows us to at least get rid of the save to r12 everytime it is +used. + +//===---------------------------------------------------------------------===// + +Poor codegen test/CodeGen/ARM/select.ll f7: + + ldr r5, LCPI1_0 +LPC0: + add r5, pc + ldr r6, LCPI1_1 + ldr r2, LCPI1_2 + cpy r3, r6 + cpy lr, pc + bx r5 + +//===---------------------------------------------------------------------===// + +Make register allocator / spiller smarter so we can re-materialize "mov r, imm", +etc. Almost all Thumb instructions clobber condition code. + +//===---------------------------------------------------------------------===// + +Add ldmia, stmia support. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt new file mode 100644 index 0000000..068c441e --- /dev/null +++ b/lib/Target/ARM/README.txt @@ -0,0 +1,554 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend. +//===---------------------------------------------------------------------===// + +Reimplement 'select' in terms of 'SEL'. + +* We would really like to support UXTAB16, but we need to prove that the + add doesn't need to overflow between the two 16-bit chunks. + +* Implement pre/post increment support. (e.g. PR935) +* Coalesce stack slots! +* Implement smarter constant generation for binops with large immediates. + +* Consider materializing FP constants like 0.0f and 1.0f using integer + immediate instructions then copy to FPU. Slower than load into FPU? + +//===---------------------------------------------------------------------===// + +Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the +time regalloc happens, these values are now in a 32-bit register, usually with +the top-bits known to be sign or zero extended. If spilled, we should be able +to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part +of the reload. + +Doing this reduces the size of the stack frame (important for thumb etc), and +also increases the likelihood that we will be able to reload multiple values +from the stack with a single load. + +//===---------------------------------------------------------------------===// + +The constant island pass is in good shape. Some cleanups might be desirable, +but there is unlikely to be much improvement in the generated code. + +1. There may be some advantage to trying to be smarter about the initial +placement, rather than putting everything at the end. + +2. There might be some compile-time efficiency to be had by representing +consecutive islands as a single block rather than multiple blocks. + +3. Use a priority queue to sort constant pool users in inverse order of + position so we always process the one closed to the end of functions + first. This may simply CreateNewWater. + +//===---------------------------------------------------------------------===// + +Eliminate copysign custom expansion. We are still generating crappy code with +default expansion + if-conversion. + +//===---------------------------------------------------------------------===// + +Eliminate one instruction from: + +define i32 @_Z6slow4bii(i32 %x, i32 %y) { + %tmp = icmp sgt i32 %x, %y + %retval = select i1 %tmp, i32 %x, i32 %y + ret i32 %retval +} + +__Z6slow4bii: + cmp r0, r1 + movgt r1, r0 + mov r0, r1 + bx lr +=> + +__Z6slow4bii: + cmp r0, r1 + movle r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + +Implement long long "X-3" with instructions that fold the immediate in. These +were disabled due to badness with the ARM carry flag on subtracts. + +//===---------------------------------------------------------------------===// + +We currently compile abs: +int foo(int p) { return p < 0 ? -p : p; } + +into: + +_foo: + rsb r1, r0, #0 + cmn r0, #1 + movgt r1, r0 + mov r0, r1 + bx lr + +This is very, uh, literal. This could be a 3 operation sequence: + t = (p sra 31); + res = (p xor t)-t + +Which would be better. This occurs in png decode. + +//===---------------------------------------------------------------------===// + +More load / store optimizations: +1) Look past instructions without side-effects (not load, store, branch, etc.) + when forming the list of loads / stores to optimize. + +2) Smarter register allocation? +We are probably missing some opportunities to use ldm / stm. Consider: + +ldr r5, [r0] +ldr r4, [r0, #4] + +This cannot be merged into a ldm. Perhaps we will need to do the transformation +before register allocation. Then teach the register allocator to allocate a +chunk of consecutive registers. + +3) Better representation for block transfer? This is from Olden/power: + + fldd d0, [r4] + fstd d0, [r4, #+32] + fldd d0, [r4, #+8] + fstd d0, [r4, #+40] + fldd d0, [r4, #+16] + fstd d0, [r4, #+48] + fldd d0, [r4, #+24] + fstd d0, [r4, #+56] + +If we can spare the registers, it would be better to use fldm and fstm here. +Need major register allocator enhancement though. + +4) Can we recognize the relative position of constantpool entries? i.e. Treat + + ldr r0, LCPI17_3 + ldr r1, LCPI17_4 + ldr r2, LCPI17_5 + + as + ldr r0, LCPI17 + ldr r1, LCPI17+4 + ldr r2, LCPI17+8 + + Then the ldr's can be combined into a single ldm. See Olden/power. + +Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a +double 64-bit FP constant: + + adr r0, L6 + ldmia r0, {r0-r1} + + .align 2 +L6: + .long -858993459 + .long 1074318540 + +5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use +ldrd/strd instead if there are only two destination registers that form an +odd/even pair. However, we probably would pay a penalty if the address is not +aligned on 8-byte boundary. This requires more information on load / store +nodes (and MI's?) then we currently carry. + +6) struct copies appear to be done field by field +instead of by words, at least sometimes: + +struct foo { int x; short s; char c1; char c2; }; +void cpy(struct foo*a, struct foo*b) { *a = *b; } + +llvm code (-O2) + ldrb r3, [r1, #+6] + ldr r2, [r1] + ldrb r12, [r1, #+7] + ldrh r1, [r1, #+4] + str r2, [r0] + strh r1, [r0, #+4] + strb r3, [r0, #+6] + strb r12, [r0, #+7] +gcc code (-O2) + ldmia r1, {r1-r2} + stmia r0, {r1-r2} + +In this benchmark poor handling of aggregate copies has shown up as +having a large effect on size, and possibly speed as well (we don't have +a good way to measure on ARM). + +//===---------------------------------------------------------------------===// + +* Consider this silly example: + +double bar(double x) { + double r = foo(3.1); + return x+r; +} + +_bar: + stmfd sp!, {r4, r5, r7, lr} + add r7, sp, #8 + mov r4, r0 + mov r5, r1 + fldd d0, LCPI1_0 + fmrrd r0, r1, d0 + bl _foo + fmdrr d0, r4, r5 + fmsr s2, r0 + fsitod d1, s2 + faddd d0, d1, d0 + fmrrd r0, r1, d0 + ldmfd sp!, {r4, r5, r7, pc} + +Ignore the prologue and epilogue stuff for a second. Note + mov r4, r0 + mov r5, r1 +the copys to callee-save registers and the fact they are only being used by the +fmdrr instruction. It would have been better had the fmdrr been scheduled +before the call and place the result in a callee-save DPR register. The two +mov ops would not have been necessary. + +//===---------------------------------------------------------------------===// + +Calling convention related stuff: + +* gcc's parameter passing implementation is terrible and we suffer as a result: + +e.g. +struct s { + double d1; + int s1; +}; + +void foo(struct s S) { + printf("%g, %d\n", S.d1, S.s1); +} + +'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and +then reload them to r1, r2, and r3 before issuing the call (r0 contains the +address of the format string): + + stmfd sp!, {r7, lr} + add r7, sp, #0 + sub sp, sp, #12 + stmia sp, {r0, r1, r2} + ldmia sp, {r1-r2} + ldr r0, L5 + ldr r3, [sp, #8] +L2: + add r0, pc, r0 + bl L_printf$stub + +Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? + +* Return an aggregate type is even worse: + +e.g. +struct s foo(void) { + struct s S = {1.1, 2}; + return S; +} + + mov ip, r0 + ldr r0, L5 + sub sp, sp, #12 +L2: + add r0, pc, r0 + @ lr needed for prologue + ldmia r0, {r0, r1, r2} + stmia sp, {r0, r1, r2} + stmia ip, {r0, r1, r2} + mov r0, ip + add sp, sp, #12 + bx lr + +r0 (and later ip) is the hidden parameter from caller to store the value in. The +first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, +r2 into the address passed in. However, there is one additional stmia that +stores r0, r1, and r2 to some stack location. The store is dead. + +The llvm-gcc generated code looks like this: + +csretcc void %foo(%struct.s* %agg.result) { +entry: + %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] + %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] + cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) + cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) + cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1] + call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) + ret void +} + +llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from +constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated +into a number of load and stores, or 2) custom lower memcpy (of small size) to +be ldmia / stmia. I think option 2 is better but the current register +allocator cannot allocate a chunk of registers at a time. + +A feasible temporary solution is to use specific physical registers at the +lowering time for small (<= 4 words?) transfer size. + +* ARM CSRet calling convention requires the hidden argument to be returned by +the callee. + +//===---------------------------------------------------------------------===// + +We can definitely do a better job on BB placements to eliminate some branches. +It's very common to see llvm generated assembly code that looks like this: + +LBB3: + ... +LBB4: +... + beq LBB3 + b LBB2 + +If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can +then eliminate beq and and turn the unconditional branch to LBB2 to a bne. + +See McCat/18-imp/ComputeBoundingBoxes for an example. + +//===---------------------------------------------------------------------===// + +Register scavenging is now implemented. The example in the previous version +of this document produces optimal code at -O2. + +//===---------------------------------------------------------------------===// + +Pre-/post- indexed load / stores: + +1) We should not make the pre/post- indexed load/store transform if the base ptr +is guaranteed to be live beyond the load/store. This can happen if the base +ptr is live out of the block we are performing the optimization. e.g. + +mov r1, r2 +ldr r3, [r1], #4 +... + +vs. + +ldr r3, [r2] +add r1, r2, #4 +... + +In most cases, this is just a wasted optimization. However, sometimes it can +negatively impact the performance because two-address code is more restrictive +when it comes to scheduling. + +Unfortunately, liveout information is currently unavailable during DAG combine +time. + +2) Consider spliting a indexed load / store into a pair of add/sub + load/store + to solve #1 (in TwoAddressInstructionPass.cpp). + +3) Enhance LSR to generate more opportunities for indexed ops. + +4) Once we added support for multiple result patterns, write indexed loads + patterns instead of C++ instruction selection code. + +5) Use FLDM / FSTM to emulate indexed FP load / store. + +//===---------------------------------------------------------------------===// + +We should add i64 support to take advantage of the 64-bit load / stores. +We can add a pseudo i64 register class containing pseudo registers that are +register pairs. All other ops (e.g. add, sub) would be expanded as usual. + +We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers +from the i64 register. These are single moves which can be eliminated if the +destination register is a sub-register of the source. We should implement proper +subreg support in the register allocator to coalesce these away. + +There are other minor issues such as multiple instructions for a spill / restore +/ move. + +//===---------------------------------------------------------------------===// + +Implement support for some more tricky ways to materialize immediates. For +example, to get 0xffff8000, we can use: + +mov r9, #&3f8000 +sub r9, r9, #&400000 + +//===---------------------------------------------------------------------===// + +We sometimes generate multiple add / sub instructions to update sp in prologue +and epilogue if the inc / dec value is too large to fit in a single immediate +operand. In some cases, perhaps it might be better to load the value from a +constantpool instead. + +//===---------------------------------------------------------------------===// + +GCC generates significantly better code for this function. + +int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { + int i = 0; + + if (StackPtr != 0) { + while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) + Line[i++] = Stack[--StackPtr]; + if (LineLen > 32768) + { + while (StackPtr != 0 && i < LineLen) + { + i++; + --StackPtr; + } + } + } + return StackPtr; +} + +//===---------------------------------------------------------------------===// + +This should compile to the mlas instruction: +int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } + +//===---------------------------------------------------------------------===// + +At some point, we should triage these to see if they still apply to us: + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 + +http://www.inf.u-szeged.hu/gcc-arm/ +http://citeseer.ist.psu.edu/debus04linktime.html + +//===---------------------------------------------------------------------===// + +gcc generates smaller code for this function at -O2 or -Os: + +void foo(signed char* p) { + if (*p == 3) + bar(); + else if (*p == 4) + baz(); + else if (*p == 5) + quux(); +} + +llvm decides it's a good idea to turn the repeated if...else into a +binary tree, as if it were a switch; the resulting code requires -1 +compare-and-branches when *p<=2 or *p==5, the same number if *p==4 +or *p>6, and +1 if *p==3. So it should be a speed win +(on balance). However, the revised code is larger, with 4 conditional +branches instead of 3. + +More seriously, there is a byte->word extend before +each comparison, where there should be only one, and the condition codes +are not remembered when the same two values are compared twice. + +//===---------------------------------------------------------------------===// + +More register scavenging work: + +1. Use the register scavenger to track frame index materialized into registers + (those that do not fit in addressing modes) to allow reuse in the same BB. +2. Finish scavenging for Thumb. +3. We know some spills and restores are unnecessary. The issue is once live + intervals are merged, they are not never split. So every def is spilled + and every use requires a restore if the register allocator decides the + resulting live interval is not assigned a physical register. It may be + possible (with the help of the scavenger) to turn some spill / restore + pairs into register copies. + +//===---------------------------------------------------------------------===// + +More LSR enhancements possible: + +1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged + in a load / store. +2. Allow iv reuse even when a type conversion is required. For example, i8 + and i32 load / store addressing modes are identical. + + +//===---------------------------------------------------------------------===// + +This: + +int foo(int a, int b, int c, int d) { + long long acc = (long long)a * (long long)b; + acc += (long long)c * (long long)d; + return (int)(acc >> 32); +} + +Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies +two signed 32-bit values to produce a 64-bit value, and accumulates this with +a 64-bit value. + +We currently get this with both v4 and v6: + +_foo: + smull r1, r0, r1, r0 + smull r3, r2, r3, r2 + adds r3, r3, r1 + adc r0, r2, r0 + bx lr + +//===---------------------------------------------------------------------===// + +This: + #include <algorithm> + std::pair<unsigned, bool> full_add(unsigned a, unsigned b) + { return std::make_pair(a + b, a + b < a); } + bool no_overflow(unsigned a, unsigned b) + { return !full_add(a, b).second; } + +Should compile to: + +_Z8full_addjj: + adds r2, r1, r2 + movcc r1, #0 + movcs r1, #1 + str r2, [r0, #0] + strb r1, [r0, #4] + mov pc, lr + +_Z11no_overflowjj: + cmn r0, r1 + movcs r0, #0 + movcc r0, #1 + mov pc, lr + +not: + +__Z8full_addjj: + add r3, r2, r1 + str r3, [r0] + mov r2, #1 + mov r12, #0 + cmp r3, r1 + movlo r12, r2 + str r12, [r0, #+4] + bx lr +__Z11no_overflowjj: + add r3, r1, r0 + mov r2, #1 + mov r1, #0 + cmp r3, r0 + movhs r1, r2 + mov r0, r1 + bx lr + +//===---------------------------------------------------------------------===// + |