diff options
Diffstat (limited to 'contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp | 4931 |
1 files changed, 1952 insertions, 2979 deletions
diff --git a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp index b0939c9..cd27fbc 100644 --- a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp +++ b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp @@ -18,8 +18,9 @@ // CodeGen library. // // Additional validation code can be generated by this file when runHeader() is -// called, rather than the normal run() entry point. A complete set of tests -// for Neon intrinsics can be generated by calling the runTests() entry point. +// called, rather than the normal run() entry point. +// +// See also the documentation in include/clang/Basic/arm_neon.td. // //===----------------------------------------------------------------------===// @@ -31,315 +32,477 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" #include "llvm/TableGen/TableGenBackend.h" #include <string> +#include <sstream> +#include <vector> +#include <map> +#include <algorithm> using namespace llvm; -enum OpKind { - OpNone, - OpUnavailable, - OpAdd, - OpAddl, - OpAddlHi, - OpAddw, - OpAddwHi, - OpSub, - OpSubl, - OpSublHi, - OpSubw, - OpSubwHi, - OpMul, - OpMla, - OpMlal, - OpMullHi, - OpMullHiN, - OpMlalHi, - OpMlalHiN, - OpMls, - OpMlsl, - OpMlslHi, - OpMlslHiN, - OpMulN, - OpMlaN, - OpMlsN, - OpFMlaN, - OpFMlsN, - OpMlalN, - OpMlslN, - OpMulLane, - OpMulXLane, - OpMullLane, - OpMullHiLane, - OpMlaLane, - OpMlsLane, - OpMlalLane, - OpMlalHiLane, - OpMlslLane, - OpMlslHiLane, - OpQDMullLane, - OpQDMullHiLane, - OpQDMlalLane, - OpQDMlalHiLane, - OpQDMlslLane, - OpQDMlslHiLane, - OpQDMulhLane, - OpQRDMulhLane, - OpFMSLane, - OpFMSLaneQ, - OpTrn1, - OpZip1, - OpUzp1, - OpTrn2, - OpZip2, - OpUzp2, - OpEq, - OpGe, - OpLe, - OpGt, - OpLt, - OpNeg, - OpNot, - OpAnd, - OpOr, - OpXor, - OpAndNot, - OpOrNot, - OpCast, - OpConcat, - OpDup, - OpDupLane, - OpHi, - OpLo, - OpSelect, - OpRev16, - OpRev32, - OpRev64, - OpXtnHi, - OpSqxtunHi, - OpQxtnHi, - OpFcvtnHi, - OpFcvtlHi, - OpFcvtxnHi, - OpReinterpret, - OpAddhnHi, - OpRAddhnHi, - OpSubhnHi, - OpRSubhnHi, - OpAbdl, - OpAbdlHi, - OpAba, - OpAbal, - OpAbalHi, - OpQDMullHi, - OpQDMullHiN, - OpQDMlalHi, - OpQDMlalHiN, - OpQDMlslHi, - OpQDMlslHiN, - OpDiv, - OpLongHi, - OpNarrowHi, - OpMovlHi, - OpCopyLane, - OpCopyQLane, - OpCopyLaneQ, - OpScalarMulLane, - OpScalarMulLaneQ, - OpScalarMulXLane, - OpScalarMulXLaneQ, - OpScalarVMulXLane, - OpScalarVMulXLaneQ, - OpScalarQDMullLane, - OpScalarQDMullLaneQ, - OpScalarQDMulHiLane, - OpScalarQDMulHiLaneQ, - OpScalarQRDMulHiLane, - OpScalarQRDMulHiLaneQ, - OpScalarGetLane, - OpScalarSetLane -}; +namespace { + +// While globals are generally bad, this one allows us to perform assertions +// liberally and somehow still trace them back to the def they indirectly +// came from. +static Record *CurrentRecord = nullptr; +static void assert_with_loc(bool Assertion, const std::string &Str) { + if (!Assertion) { + if (CurrentRecord) + PrintFatalError(CurrentRecord->getLoc(), Str); + else + PrintFatalError(Str); + } +} enum ClassKind { ClassNone, - ClassI, // generic integer instruction, e.g., "i8" suffix - ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix - ClassW, // width-specific instruction, e.g., "8" suffix - ClassB, // bitcast arguments with enum argument to specify type - ClassL, // Logical instructions which are op instructions - // but we need to not emit any suffix for in our - // tests. - ClassNoTest // Instructions which we do not test since they are - // not TRUE instructions. + ClassI, // generic integer instruction, e.g., "i8" suffix + ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix + ClassW, // width-specific instruction, e.g., "8" suffix + ClassB, // bitcast arguments with enum argument to specify type + ClassL, // Logical instructions which are op instructions + // but we need to not emit any suffix for in our + // tests. + ClassNoTest // Instructions which we do not test since they are + // not TRUE instructions. }; /// NeonTypeFlags - Flags to identify the types for overloaded Neon /// builtins. These must be kept in sync with the flags in /// include/clang/Basic/TargetBuiltins.h. -namespace { -class NeonTypeFlags { - enum { - EltTypeMask = 0xf, - UnsignedFlag = 0x10, - QuadFlag = 0x20 - }; - uint32_t Flags; +namespace NeonTypeFlags { +enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; + +enum EltType { + Int8, + Int16, + Int32, + Int64, + Poly8, + Poly16, + Poly64, + Poly128, + Float16, + Float32, + Float64 +}; +} + +class Intrinsic; +class NeonEmitter; +class Type; +class Variable; + +//===----------------------------------------------------------------------===// +// TypeSpec +//===----------------------------------------------------------------------===// +/// A TypeSpec is just a simple wrapper around a string, but gets its own type +/// for strong typing purposes. +/// +/// A TypeSpec can be used to create a type. +class TypeSpec : public std::string { public: - enum EltType { - Int8, - Int16, - Int32, - Int64, - Poly8, - Poly16, - Poly64, - Float16, - Float32, - Float64 - }; + static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { + std::vector<TypeSpec> Ret; + TypeSpec Acc; + for (char I : Str.str()) { + if (islower(I)) { + Acc.push_back(I); + Ret.push_back(TypeSpec(Acc)); + Acc.clear(); + } else { + Acc.push_back(I); + } + } + return Ret; + } +}; - NeonTypeFlags(unsigned F) : Flags(F) {} - NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { - if (IsUnsigned) - Flags |= UnsignedFlag; - if (IsQuad) - Flags |= QuadFlag; +//===----------------------------------------------------------------------===// +// Type +//===----------------------------------------------------------------------===// + +/// A Type. Not much more to say here. +class Type { +private: + TypeSpec TS; + + bool Float, Signed, Void, Poly, Constant, Pointer; + // ScalarForMangling and NoManglingQ are really not suited to live here as + // they are not related to the type. But they live in the TypeSpec (not the + // prototype), so this is really the only place to store them. + bool ScalarForMangling, NoManglingQ; + unsigned Bitwidth, ElementBitwidth, NumVectors; + +public: + Type() + : Float(false), Signed(false), Void(true), Poly(false), Constant(false), + Pointer(false), ScalarForMangling(false), NoManglingQ(false), + Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} + + Type(TypeSpec TS, char CharMod) + : TS(TS), Float(false), Signed(false), Void(false), Poly(false), + Constant(false), Pointer(false), ScalarForMangling(false), + NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { + applyModifier(CharMod); + } + + /// Returns a type representing "void". + static Type getVoid() { return Type(); } + + bool operator==(const Type &Other) const { return str() == Other.str(); } + bool operator!=(const Type &Other) const { return !operator==(Other); } + + // + // Query functions + // + bool isScalarForMangling() const { return ScalarForMangling; } + bool noManglingQ() const { return NoManglingQ; } + + bool isPointer() const { return Pointer; } + bool isFloating() const { return Float; } + bool isInteger() const { return !Float && !Poly; } + bool isSigned() const { return Signed; } + bool isScalar() const { return NumVectors == 0; } + bool isVector() const { return NumVectors > 0; } + bool isFloat() const { return Float && ElementBitwidth == 32; } + bool isDouble() const { return Float && ElementBitwidth == 64; } + bool isHalf() const { return Float && ElementBitwidth == 16; } + bool isPoly() const { return Poly; } + bool isChar() const { return ElementBitwidth == 8; } + bool isShort() const { return !Float && ElementBitwidth == 16; } + bool isInt() const { return !Float && ElementBitwidth == 32; } + bool isLong() const { return !Float && ElementBitwidth == 64; } + bool isVoid() const { return Void; } + unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } + unsigned getSizeInBits() const { return Bitwidth; } + unsigned getElementSizeInBits() const { return ElementBitwidth; } + unsigned getNumVectors() const { return NumVectors; } + + // + // Mutator functions + // + void makeUnsigned() { Signed = false; } + void makeSigned() { Signed = true; } + void makeInteger(unsigned ElemWidth, bool Sign) { + Float = false; + Poly = false; + Signed = Sign; + ElementBitwidth = ElemWidth; + } + void makeScalar() { + Bitwidth = ElementBitwidth; + NumVectors = 0; + } + void makeOneVector() { + assert(isVector()); + NumVectors = 1; + } + void doubleLanes() { + assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); + Bitwidth = 128; + } + void halveLanes() { + assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); + Bitwidth = 64; } - uint32_t getFlags() const { return Flags; } + /// Return the C string representation of a type, which is the typename + /// defined in stdint.h or arm_neon.h. + std::string str() const; + + /// Return the string representation of a type, which is an encoded + /// string for passing to the BUILTIN() macro in Builtins.def. + std::string builtin_str() const; + + /// Return the value in NeonTypeFlags for this type. + unsigned getNeonEnum() const; + + /// Parse a type from a stdint.h or arm_neon.h typedef name, + /// for example uint32x2_t or int64_t. + static Type fromTypedefName(StringRef Name); + +private: + /// Creates the type based on the typespec string in TS. + /// Sets "Quad" to true if the "Q" or "H" modifiers were + /// seen. This is needed by applyModifier as some modifiers + /// only take effect if the type size was changed by "Q" or "H". + void applyTypespec(bool &Quad); + /// Applies a prototype modifier to the type. + void applyModifier(char Mod); }; -} // end anonymous namespace -namespace { +//===----------------------------------------------------------------------===// +// Variable +//===----------------------------------------------------------------------===// + +/// A variable is a simple class that just has a type and a name. +class Variable { + Type T; + std::string N; + +public: + Variable() : T(Type::getVoid()), N("") {} + Variable(Type T, std::string N) : T(T), N(N) {} + + Type getType() const { return T; } + std::string getName() const { return "__" + N; } +}; + +//===----------------------------------------------------------------------===// +// Intrinsic +//===----------------------------------------------------------------------===// + +/// The main grunt class. This represents an instantiation of an intrinsic with +/// a particular typespec and prototype. +class Intrinsic { + friend class DagEmitter; + + /// The Record this intrinsic was created from. + Record *R; + /// The unmangled name and prototype. + std::string Name, Proto; + /// The input and output typespecs. InTS == OutTS except when + /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. + TypeSpec OutTS, InTS; + /// The base class kind. Most intrinsics use ClassS, which has full type + /// info for integers (s32/u32). Some use ClassI, which doesn't care about + /// signedness (i32), while some (ClassB) have no type at all, only a width + /// (32). + ClassKind CK; + /// The list of DAGs for the body. May be empty, in which case we should + /// emit a builtin call. + ListInit *Body; + /// The architectural #ifdef guard. + std::string Guard; + /// Set if the Unvailable bit is 1. This means we don't generate a body, + /// just an "unavailable" attribute on a declaration. + bool IsUnavailable; + /// Is this intrinsic safe for big-endian? or does it need its arguments + /// reversing? + bool BigEndianSafe; + + /// The types of return value [0] and parameters [1..]. + std::vector<Type> Types; + /// The local variables defined. + std::map<std::string, Variable> Variables; + /// NeededEarly - set if any other intrinsic depends on this intrinsic. + bool NeededEarly; + /// UseMacro - set if we should implement using a macro or unset for a + /// function. + bool UseMacro; + /// The set of intrinsics that this intrinsic uses/requires. + std::set<Intrinsic *> Dependencies; + /// The "base type", which is Type('d', OutTS). InBaseType is only + /// different if CartesianProductOfTypes = 1 (for vreinterpret). + Type BaseType, InBaseType; + /// The return variable. + Variable RetVar; + /// A postfix to apply to every variable. Defaults to "". + std::string VariablePostfix; + + NeonEmitter &Emitter; + std::stringstream OS; + +public: + Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, + TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, + StringRef Guard, bool IsUnavailable, bool BigEndianSafe) + : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), + CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), + BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), + BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { + // If this builtin takes an immediate argument, we need to #define it rather + // than use a standard declaration, so that SemaChecking can range check + // the immediate passed by the user. + if (Proto.find('i') != std::string::npos) + UseMacro = true; + + // Pointer arguments need to use macros to avoid hiding aligned attributes + // from the pointer type. + if (Proto.find('p') != std::string::npos || + Proto.find('c') != std::string::npos) + UseMacro = true; + + // It is not permitted to pass or return an __fp16 by value, so intrinsics + // taking a scalar float16_t must be implemented as macros. + if (OutTS.find('h') != std::string::npos && + Proto.find('s') != std::string::npos) + UseMacro = true; + + // Modify the TypeSpec per-argument to get a concrete Type, and create + // known variables for each. + // Types[0] is the return value. + Types.push_back(Type(OutTS, Proto[0])); + for (unsigned I = 1; I < Proto.size(); ++I) + Types.push_back(Type(InTS, Proto[I])); + } + + /// Get the Record that this intrinsic is based off. + Record *getRecord() const { return R; } + /// Get the set of Intrinsics that this intrinsic calls. + /// this is the set of immediate dependencies, NOT the + /// transitive closure. + const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } + /// Get the architectural guard string (#ifdef). + std::string getGuard() const { return Guard; } + /// Get the non-mangled name. + std::string getName() const { return Name; } + + /// Return true if the intrinsic takes an immediate operand. + bool hasImmediate() const { + return Proto.find('i') != std::string::npos; + } + /// Return the parameter index of the immediate operand. + unsigned getImmediateIdx() const { + assert(hasImmediate()); + unsigned Idx = Proto.find('i'); + assert(Idx > 0 && "Can't return an immediate!"); + return Idx - 1; + } + + /// Return true if the intrinsic takes an splat operand. + bool hasSplat() const { return Proto.find('a') != std::string::npos; } + /// Return the parameter index of the splat operand. + unsigned getSplatIdx() const { + assert(hasSplat()); + unsigned Idx = Proto.find('a'); + assert(Idx > 0 && "Can't return a splat!"); + return Idx - 1; + } + + unsigned getNumParams() const { return Proto.size() - 1; } + Type getReturnType() const { return Types[0]; } + Type getParamType(unsigned I) const { return Types[I + 1]; } + Type getBaseType() const { return BaseType; } + /// Return the raw prototype string. + std::string getProto() const { return Proto; } + + /// Return true if the prototype has a scalar argument. + /// This does not return true for the "splat" code ('a'). + bool protoHasScalar(); + + /// Return the index that parameter PIndex will sit at + /// in a generated function call. This is often just PIndex, + /// but may not be as things such as multiple-vector operands + /// and sret parameters need to be taken into accont. + unsigned getGeneratedParamIdx(unsigned PIndex) { + unsigned Idx = 0; + if (getReturnType().getNumVectors() > 1) + // Multiple vectors are passed as sret. + ++Idx; + + for (unsigned I = 0; I < PIndex; ++I) + Idx += std::max(1U, getParamType(I).getNumVectors()); + + return Idx; + } + + bool hasBody() const { return Body && Body->getValues().size() > 0; } + + void setNeededEarly() { NeededEarly = true; } + + bool operator<(const Intrinsic &Other) const { + // Sort lexicographically on a two-tuple (Guard, Name) + if (Guard != Other.Guard) + return Guard < Other.Guard; + return Name < Other.Name; + } + + ClassKind getClassKind(bool UseClassBIfScalar = false) { + if (UseClassBIfScalar && !protoHasScalar()) + return ClassB; + return CK; + } + + /// Return the name, mangled with type information. + /// If ForceClassS is true, use ClassS (u32/s32) instead + /// of the intrinsic's own type class. + std::string getMangledName(bool ForceClassS = false); + /// Return the type code for a builtin function call. + std::string getInstTypeCode(Type T, ClassKind CK); + /// Return the type string for a BUILTIN() macro in Builtins.def. + std::string getBuiltinTypeStr(); + + /// Generate the intrinsic, returning code. + std::string generate(); + /// Perform type checking and populate the dependency graph, but + /// don't generate code yet. + void indexBody(); + +private: + std::string mangleName(std::string Name, ClassKind CK); + + void initVariables(); + std::string replaceParamsIn(std::string S); + + void emitBodyAsBuiltinCall(); + + void generateImpl(bool ReverseArguments, + StringRef NamePrefix, StringRef CallPrefix); + void emitReturn(); + void emitBody(StringRef CallPrefix); + void emitShadowedArgs(); + void emitArgumentReversal(); + void emitReturnReversal(); + void emitReverseVariable(Variable &Dest, Variable &Src); + void emitNewLine(); + void emitClosingBrace(); + void emitOpeningBrace(); + void emitPrototype(StringRef NamePrefix); + + class DagEmitter { + Intrinsic &Intr; + StringRef CallPrefix; + + public: + DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : + Intr(Intr), CallPrefix(CallPrefix) { + } + std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); + std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); + std::pair<Type, std::string> emitDagSplat(DagInit *DI); + std::pair<Type, std::string> emitDagDup(DagInit *DI); + std::pair<Type, std::string> emitDagShuffle(DagInit *DI); + std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); + std::pair<Type, std::string> emitDagCall(DagInit *DI); + std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); + std::pair<Type, std::string> emitDagLiteral(DagInit *DI); + std::pair<Type, std::string> emitDagOp(DagInit *DI); + std::pair<Type, std::string> emitDag(DagInit *DI); + }; + +}; + +//===----------------------------------------------------------------------===// +// NeonEmitter +//===----------------------------------------------------------------------===// + class NeonEmitter { RecordKeeper &Records; - StringMap<OpKind> OpMap; - DenseMap<Record*, ClassKind> ClassMap; + DenseMap<Record *, ClassKind> ClassMap; + std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap; + unsigned UniqueNumber; + + void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); + void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); + void genOverloadTypeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs); + void genIntrinsicRangeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs); public: - NeonEmitter(RecordKeeper &R) : Records(R) { - OpMap["OP_NONE"] = OpNone; - OpMap["OP_UNAVAILABLE"] = OpUnavailable; - OpMap["OP_ADD"] = OpAdd; - OpMap["OP_ADDL"] = OpAddl; - OpMap["OP_ADDLHi"] = OpAddlHi; - OpMap["OP_ADDW"] = OpAddw; - OpMap["OP_ADDWHi"] = OpAddwHi; - OpMap["OP_SUB"] = OpSub; - OpMap["OP_SUBL"] = OpSubl; - OpMap["OP_SUBLHi"] = OpSublHi; - OpMap["OP_SUBW"] = OpSubw; - OpMap["OP_SUBWHi"] = OpSubwHi; - OpMap["OP_MUL"] = OpMul; - OpMap["OP_MLA"] = OpMla; - OpMap["OP_MLAL"] = OpMlal; - OpMap["OP_MULLHi"] = OpMullHi; - OpMap["OP_MULLHi_N"] = OpMullHiN; - OpMap["OP_MLALHi"] = OpMlalHi; - OpMap["OP_MLALHi_N"] = OpMlalHiN; - OpMap["OP_MLS"] = OpMls; - OpMap["OP_MLSL"] = OpMlsl; - OpMap["OP_MLSLHi"] = OpMlslHi; - OpMap["OP_MLSLHi_N"] = OpMlslHiN; - OpMap["OP_MUL_N"] = OpMulN; - OpMap["OP_MLA_N"] = OpMlaN; - OpMap["OP_MLS_N"] = OpMlsN; - OpMap["OP_FMLA_N"] = OpFMlaN; - OpMap["OP_FMLS_N"] = OpFMlsN; - OpMap["OP_MLAL_N"] = OpMlalN; - OpMap["OP_MLSL_N"] = OpMlslN; - OpMap["OP_MUL_LN"]= OpMulLane; - OpMap["OP_MULX_LN"]= OpMulXLane; - OpMap["OP_MULL_LN"] = OpMullLane; - OpMap["OP_MULLHi_LN"] = OpMullHiLane; - OpMap["OP_MLA_LN"]= OpMlaLane; - OpMap["OP_MLS_LN"]= OpMlsLane; - OpMap["OP_MLAL_LN"] = OpMlalLane; - OpMap["OP_MLALHi_LN"] = OpMlalHiLane; - OpMap["OP_MLSL_LN"] = OpMlslLane; - OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; - OpMap["OP_QDMULL_LN"] = OpQDMullLane; - OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; - OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; - OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; - OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; - OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; - OpMap["OP_QDMULH_LN"] = OpQDMulhLane; - OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; - OpMap["OP_FMS_LN"] = OpFMSLane; - OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; - OpMap["OP_TRN1"] = OpTrn1; - OpMap["OP_ZIP1"] = OpZip1; - OpMap["OP_UZP1"] = OpUzp1; - OpMap["OP_TRN2"] = OpTrn2; - OpMap["OP_ZIP2"] = OpZip2; - OpMap["OP_UZP2"] = OpUzp2; - OpMap["OP_EQ"] = OpEq; - OpMap["OP_GE"] = OpGe; - OpMap["OP_LE"] = OpLe; - OpMap["OP_GT"] = OpGt; - OpMap["OP_LT"] = OpLt; - OpMap["OP_NEG"] = OpNeg; - OpMap["OP_NOT"] = OpNot; - OpMap["OP_AND"] = OpAnd; - OpMap["OP_OR"] = OpOr; - OpMap["OP_XOR"] = OpXor; - OpMap["OP_ANDN"] = OpAndNot; - OpMap["OP_ORN"] = OpOrNot; - OpMap["OP_CAST"] = OpCast; - OpMap["OP_CONC"] = OpConcat; - OpMap["OP_HI"] = OpHi; - OpMap["OP_LO"] = OpLo; - OpMap["OP_DUP"] = OpDup; - OpMap["OP_DUP_LN"] = OpDupLane; - OpMap["OP_SEL"] = OpSelect; - OpMap["OP_REV16"] = OpRev16; - OpMap["OP_REV32"] = OpRev32; - OpMap["OP_REV64"] = OpRev64; - OpMap["OP_XTN"] = OpXtnHi; - OpMap["OP_SQXTUN"] = OpSqxtunHi; - OpMap["OP_QXTN"] = OpQxtnHi; - OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; - OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; - OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; - OpMap["OP_REINT"] = OpReinterpret; - OpMap["OP_ADDHNHi"] = OpAddhnHi; - OpMap["OP_RADDHNHi"] = OpRAddhnHi; - OpMap["OP_SUBHNHi"] = OpSubhnHi; - OpMap["OP_RSUBHNHi"] = OpRSubhnHi; - OpMap["OP_ABDL"] = OpAbdl; - OpMap["OP_ABDLHi"] = OpAbdlHi; - OpMap["OP_ABA"] = OpAba; - OpMap["OP_ABAL"] = OpAbal; - OpMap["OP_ABALHi"] = OpAbalHi; - OpMap["OP_QDMULLHi"] = OpQDMullHi; - OpMap["OP_QDMULLHi_N"] = OpQDMullHiN; - OpMap["OP_QDMLALHi"] = OpQDMlalHi; - OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN; - OpMap["OP_QDMLSLHi"] = OpQDMlslHi; - OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN; - OpMap["OP_DIV"] = OpDiv; - OpMap["OP_LONG_HI"] = OpLongHi; - OpMap["OP_NARROW_HI"] = OpNarrowHi; - OpMap["OP_MOVL_HI"] = OpMovlHi; - OpMap["OP_COPY_LN"] = OpCopyLane; - OpMap["OP_COPYQ_LN"] = OpCopyQLane; - OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; - OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; - OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; - OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; - OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; - OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; - OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; - OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane; - OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ; - OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane; - OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; - OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; - OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; - OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane; - OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane; + /// Called by Intrinsic - this attempts to get an intrinsic that takes + /// the given types as arguments. + Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types); + + /// Called by Intrinsic - returns a globally-unique number. + unsigned getUniqueNumber() { return UniqueNumber++; } + NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); Record *WI = R.getClass("WInst"); @@ -367,2821 +530,1633 @@ public: // runTests - Emit tests for all the Neon intrinsics. void runTests(raw_ostream &o); - -private: - void emitIntrinsic(raw_ostream &OS, Record *R, - StringMap<ClassKind> &EmittedMap); - void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, - bool isA64GenBuiltinDef); - void genOverloadTypeCheckCode(raw_ostream &OS, - StringMap<ClassKind> &A64IntrinsicMap, - bool isA64TypeCheck); - void genIntrinsicRangeCheckCode(raw_ostream &OS, - StringMap<ClassKind> &A64IntrinsicMap, - bool isA64RangeCheck); - void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, - bool isA64TestGen); }; + } // end anonymous namespace -/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, -/// which each StringRef representing a single type declared in the string. -/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing -/// 2xfloat and 4xfloat respectively. -static void ParseTypes(Record *r, std::string &s, - SmallVectorImpl<StringRef> &TV) { - const char *data = s.data(); - int len = 0; - - for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { - if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' - || data[len] == 'H' || data[len] == 'S') - continue; +//===----------------------------------------------------------------------===// +// Type implementation +//===----------------------------------------------------------------------===// - switch (data[len]) { - case 'c': - case 's': - case 'i': - case 'l': - case 'h': - case 'f': - case 'd': - break; - default: - PrintFatalError(r->getLoc(), - "Unexpected letter: " + std::string(data + len, 1)); - } - TV.push_back(StringRef(data, len + 1)); - data += len + 1; - len = -1; - } +std::string Type::str() const { + if (Void) + return "void"; + std::string S; + + if (!Signed && isInteger()) + S += "u"; + + if (Poly) + S += "poly"; + else if (Float) + S += "float"; + else + S += "int"; + + S += utostr(ElementBitwidth); + if (isVector()) + S += "x" + utostr(getNumElements()); + if (NumVectors > 1) + S += "x" + utostr(NumVectors); + S += "_t"; + + if (Constant) + S += " const"; + if (Pointer) + S += " *"; + + return S; } -/// Widen - Convert a type code into the next wider type. char -> short, -/// short -> int, etc. -static char Widen(const char t) { - switch (t) { - case 'c': - return 's'; - case 's': - return 'i'; - case 'i': - return 'l'; - case 'h': - return 'f'; - case 'f': - return 'd'; - default: - PrintFatalError("unhandled type in widen!"); +std::string Type::builtin_str() const { + std::string S; + if (isVoid()) + return "v"; + + if (Pointer) + // All pointers are void pointers. + S += "v"; + else if (isInteger()) + switch (ElementBitwidth) { + case 8: S += "c"; break; + case 16: S += "s"; break; + case 32: S += "i"; break; + case 64: S += "Wi"; break; + case 128: S += "LLLi"; break; + default: llvm_unreachable("Unhandled case!"); + } + else + switch (ElementBitwidth) { + case 16: S += "h"; break; + case 32: S += "f"; break; + case 64: S += "d"; break; + default: llvm_unreachable("Unhandled case!"); + } + + if (isChar() && !Pointer) + // Make chars explicitly signed. + S = "S" + S; + else if (isInteger() && !Pointer && !Signed) + S = "U" + S; + + if (isScalar()) { + if (Constant) S += "C"; + if (Pointer) S += "*"; + return S; } + + std::string Ret; + for (unsigned I = 0; I < NumVectors; ++I) + Ret += "V" + utostr(getNumElements()) + S; + + return Ret; } -/// Narrow - Convert a type code into the next smaller type. short -> char, -/// float -> half float, etc. -static char Narrow(const char t) { - switch (t) { - case 's': - return 'c'; - case 'i': - return 's'; - case 'l': - return 'i'; - case 'f': - return 'h'; - case 'd': - return 'f'; - default: - PrintFatalError("unhandled type in narrow!"); +unsigned Type::getNeonEnum() const { + unsigned Addend; + switch (ElementBitwidth) { + case 8: Addend = 0; break; + case 16: Addend = 1; break; + case 32: Addend = 2; break; + case 64: Addend = 3; break; + case 128: Addend = 4; break; + default: llvm_unreachable("Unhandled element bitwidth!"); } -} -static std::string GetNarrowTypestr(StringRef ty) -{ - std::string s; - for (size_t i = 0, end = ty.size(); i < end; i++) { - switch (ty[i]) { - case 's': - s += 'c'; - break; - case 'i': - s += 's'; - break; - case 'l': - s += 'i'; - break; - default: - s += ty[i]; - break; - } + unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; + if (Poly) { + // Adjustment needed because Poly32 doesn't exist. + if (Addend >= 2) + --Addend; + Base = (unsigned)NeonTypeFlags::Poly8 + Addend; } + if (Float) { + assert(Addend != 0 && "Float8 doesn't exist!"); + Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); + } + + if (Bitwidth == 128) + Base |= (unsigned)NeonTypeFlags::QuadFlag; + if (isInteger() && !Signed) + Base |= (unsigned)NeonTypeFlags::UnsignedFlag; - return s; + return Base; } -/// For a particular StringRef, return the base type code, and whether it has -/// the quad-vector, polynomial, or unsigned modifiers set. -static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { - unsigned off = 0; - // ignore scalar. - if (ty[off] == 'S') { - ++off; +Type Type::fromTypedefName(StringRef Name) { + Type T; + T.Void = false; + T.Float = false; + T.Poly = false; + + if (Name.front() == 'u') { + T.Signed = false; + Name = Name.drop_front(); + } else { + T.Signed = true; } - // remember quad. - if (ty[off] == 'Q' || ty[off] == 'H') { - quad = true; - ++off; + + if (Name.startswith("float")) { + T.Float = true; + Name = Name.drop_front(5); + } else if (Name.startswith("poly")) { + T.Poly = true; + Name = Name.drop_front(4); + } else { + assert(Name.startswith("int")); + Name = Name.drop_front(3); } - // remember poly. - if (ty[off] == 'P') { - poly = true; - ++off; + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; } + Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); + Name = Name.drop_front(I); - // remember unsigned. - if (ty[off] == 'U') { - usgn = true; - ++off; + T.Bitwidth = T.ElementBitwidth; + T.NumVectors = 1; + + if (Name.front() == 'x') { + Name = Name.drop_front(); + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; + } + unsigned NumLanes; + Name.substr(0, I).getAsInteger(10, NumLanes); + Name = Name.drop_front(I); + T.Bitwidth = T.ElementBitwidth * NumLanes; + } else { + // Was scalar. + T.NumVectors = 0; + } + if (Name.front() == 'x') { + Name = Name.drop_front(); + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; + } + Name.substr(0, I).getAsInteger(10, T.NumVectors); + Name = Name.drop_front(I); } - // base type to get the type string for. - return ty[off]; + assert(Name.startswith("_t") && "Malformed typedef!"); + return T; } -/// ModType - Transform a type code and its modifiers based on a mod code. The -/// mod code definitions may be found at the top of arm_neon.td. -static char ModType(const char mod, char type, bool &quad, bool &poly, - bool &usgn, bool &scal, bool &cnst, bool &pntr) { - switch (mod) { - case 't': - if (poly) { - poly = false; - usgn = true; - } - break; - case 'b': - scal = true; - case 'u': - usgn = true; - poly = false; - if (type == 'f') - type = 'i'; - if (type == 'd') - type = 'l'; +void Type::applyTypespec(bool &Quad) { + std::string S = TS; + ScalarForMangling = false; + Void = false; + Poly = Float = false; + ElementBitwidth = ~0U; + Signed = true; + NumVectors = 1; + + for (char I : S) { + switch (I) { + case 'S': + ScalarForMangling = true; break; - case '$': - scal = true; - case 'x': - usgn = false; - poly = false; - if (type == 'f') - type = 'i'; - if (type == 'd') - type = 'l'; - break; - case 'o': - scal = true; - type = 'd'; - usgn = false; - break; - case 'y': - scal = true; - case 'f': - if (type == 'h') - quad = true; - type = 'f'; - usgn = false; + case 'H': + NoManglingQ = true; + Quad = true; break; - case 'F': - type = 'd'; - usgn = false; + case 'Q': + Quad = true; break; - case 'g': - quad = false; + case 'P': + Poly = true; break; - case 'B': - case 'C': - case 'D': - case 'j': - quad = true; - break; - case 'w': - type = Widen(type); - quad = true; - break; - case 'n': - type = Widen(type); - break; - case 'i': - type = 'i'; - scal = true; - break; - case 'l': - type = 'l'; - scal = true; - usgn = true; - break; - case 'z': - type = Narrow(type); - scal = true; - break; - case 'r': - type = Widen(type); - scal = true; - break; - case 's': - case 'a': - scal = true; - break; - case 'k': - quad = true; + case 'U': + Signed = false; break; case 'c': - cnst = true; - case 'p': - pntr = true; - scal = true; + ElementBitwidth = 8; break; case 'h': - type = Narrow(type); - if (type == 'h') - quad = false; - break; - case 'q': - type = Narrow(type); - quad = true; - break; - case 'e': - type = Narrow(type); - usgn = true; - break; - case 'm': - type = Narrow(type); - quad = false; - break; - default: - break; - } - return type; -} - -static bool IsMultiVecProto(const char p) { - return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); -} - -/// TypeString - for a modifier and type, generate the name of the typedef for -/// that type. QUc -> uint8x8_t. -static std::string TypeString(const char mod, StringRef typestr) { - bool quad = false; - bool poly = false; - bool usgn = false; - bool scal = false; - bool cnst = false; - bool pntr = false; - - if (mod == 'v') - return "void"; - if (mod == 'i') - return "int"; - - // base type to get the type string for. - char type = ClassifyType(typestr, quad, poly, usgn); - - // Based on the modifying character, change the type and width if necessary. - type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); - - SmallString<128> s; - - if (usgn) - s.push_back('u'); - - switch (type) { - case 'c': - s += poly ? "poly8" : "int8"; - if (scal) - break; - s += quad ? "x16" : "x8"; - break; + Float = true; + // Fall through case 's': - s += poly ? "poly16" : "int16"; - if (scal) - break; - s += quad ? "x8" : "x4"; + ElementBitwidth = 16; break; + case 'f': + Float = true; + // Fall through case 'i': - s += "int32"; - if (scal) - break; - s += quad ? "x4" : "x2"; + ElementBitwidth = 32; break; + case 'd': + Float = true; + // Fall through case 'l': - s += (poly && !usgn)? "poly64" : "int64"; - if (scal) - break; - s += quad ? "x2" : "x1"; - break; - case 'h': - s += "float16"; - if (scal) - break; - s += quad ? "x8" : "x4"; + ElementBitwidth = 64; break; - case 'f': - s += "float32"; - if (scal) - break; - s += quad ? "x4" : "x2"; - break; - case 'd': - s += "float64"; - if (scal) - break; - s += quad ? "x2" : "x1"; + case 'k': + ElementBitwidth = 128; + // Poly doesn't have a 128x1 type. + if (Poly) + NumVectors = 0; break; - default: - PrintFatalError("unhandled type!"); + llvm_unreachable("Unhandled type code!"); + } } + assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); - if (mod == '2' || mod == 'B') - s += "x2"; - if (mod == '3' || mod == 'C') - s += "x3"; - if (mod == '4' || mod == 'D') - s += "x4"; - - // Append _t, finishing the type string typedef type. - s += "_t"; - - if (cnst) - s += " const"; - - if (pntr) - s += " *"; - - return s.str(); + Bitwidth = Quad ? 128 : 64; } -/// BuiltinTypeString - for a modifier and type, generate the clang -/// BuiltinsARM.def prototype code for the function. See the top of clang's -/// Builtins.def for a description of the type strings. -static std::string BuiltinTypeString(const char mod, StringRef typestr, - ClassKind ck, bool ret) { - bool quad = false; - bool poly = false; - bool usgn = false; - bool scal = false; - bool cnst = false; - bool pntr = false; - - if (mod == 'v') - return "v"; // void - if (mod == 'i') - return "i"; // int - - // base type to get the type string for. - char type = ClassifyType(typestr, quad, poly, usgn); - - // Based on the modifying character, change the type and width if necessary. - type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); - - // All pointers are void* pointers. Change type to 'v' now. - if (pntr) { - usgn = false; - poly = false; - type = 'v'; - } - // Treat half-float ('h') types as unsigned short ('s') types. - if (type == 'h') { - type = 's'; - usgn = true; - } - usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && - scal && type != 'f' && type != 'd'); - - if (scal) { - SmallString<128> s; - - if (usgn) - s.push_back('U'); - else if (type == 'c') - s.push_back('S'); // make chars explicitly signed - - if (type == 'l') // 64-bit long - s += "LLi"; - else - s.push_back(type); +void Type::applyModifier(char Mod) { + bool AppliedQuad = false; + applyTypespec(AppliedQuad); - if (cnst) - s.push_back('C'); - if (pntr) - s.push_back('*'); - return s.str(); - } - - // Since the return value must be one type, return a vector type of the - // appropriate width which we will bitcast. An exception is made for - // returning structs of 2, 3, or 4 vectors which are returned in a sret-like - // fashion, storing them to a pointer arg. - if (ret) { - if (IsMultiVecProto(mod)) - return "vv*"; // void result with void* first argument - if (mod == 'f' || (ck != ClassB && type == 'f')) - return quad ? "V4f" : "V2f"; - if (mod == 'F' || (ck != ClassB && type == 'd')) - return quad ? "V2d" : "V1d"; - if (ck != ClassB && type == 's') - return quad ? "V8s" : "V4s"; - if (ck != ClassB && type == 'i') - return quad ? "V4i" : "V2i"; - if (ck != ClassB && type == 'l') - return quad ? "V2LLi" : "V1LLi"; - - return quad ? "V16Sc" : "V8Sc"; - } - - // Non-return array types are passed as individual vectors. - if (mod == '2' || mod == 'B') - return quad ? "V16ScV16Sc" : "V8ScV8Sc"; - if (mod == '3' || mod == 'C') - return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; - if (mod == '4' || mod == 'D') - return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; - - if (mod == 'f' || (ck != ClassB && type == 'f')) - return quad ? "V4f" : "V2f"; - if (mod == 'F' || (ck != ClassB && type == 'd')) - return quad ? "V2d" : "V1d"; - if (ck != ClassB && type == 's') - return quad ? "V8s" : "V4s"; - if (ck != ClassB && type == 'i') - return quad ? "V4i" : "V2i"; - if (ck != ClassB && type == 'l') - return quad ? "V2LLi" : "V1LLi"; - - return quad ? "V16Sc" : "V8Sc"; -} - -/// InstructionTypeCode - Computes the ARM argument character code and -/// quad status for a specific type string and ClassKind. -static void InstructionTypeCode(const StringRef &typeStr, - const ClassKind ck, - bool &quad, - std::string &typeCode) { - bool poly = false; - bool usgn = false; - char type = ClassifyType(typeStr, quad, poly, usgn); - - switch (type) { - case 'c': - switch (ck) { - case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; - case ClassI: typeCode = "i8"; break; - case ClassW: typeCode = "8"; break; - default: break; - } + switch (Mod) { + case 'v': + Void = true; break; - case 's': - switch (ck) { - case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; - case ClassI: typeCode = "i16"; break; - case ClassW: typeCode = "16"; break; - default: break; + case 't': + if (Poly) { + Poly = false; + Signed = false; } break; + case 'b': + Signed = false; + Float = false; + Poly = false; + NumVectors = 0; + Bitwidth = ElementBitwidth; + break; + case '$': + Signed = true; + Float = false; + Poly = false; + NumVectors = 0; + Bitwidth = ElementBitwidth; + break; + case 'u': + Signed = false; + Poly = false; + Float = false; + break; + case 'x': + Signed = true; + assert(!Poly && "'u' can't be used with poly types!"); + Float = false; + break; + case 'o': + Bitwidth = ElementBitwidth = 64; + NumVectors = 0; + Float = true; + break; + case 'y': + Bitwidth = ElementBitwidth = 32; + NumVectors = 0; + Float = true; + break; + case 'f': + // Special case - if we're half-precision, a floating + // point argument needs to be 128-bits (double size). + if (isHalf()) + Bitwidth = 128; + Float = true; + ElementBitwidth = 32; + break; + case 'F': + Float = true; + ElementBitwidth = 64; + break; + case 'g': + if (AppliedQuad) + Bitwidth /= 2; + break; + case 'j': + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'w': + ElementBitwidth *= 2; + Bitwidth *= 2; + break; + case 'n': + ElementBitwidth *= 2; + break; case 'i': - switch (ck) { - case ClassS: typeCode = usgn ? "u32" : "s32"; break; - case ClassI: typeCode = "i32"; break; - case ClassW: typeCode = "32"; break; - default: break; - } + Float = false; + Poly = false; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = true; break; case 'l': - switch (ck) { - case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; - case ClassI: typeCode = "i64"; break; - case ClassW: typeCode = "64"; break; - default: break; - } + Float = false; + Poly = false; + ElementBitwidth = Bitwidth = 64; + NumVectors = 0; + Signed = false; + break; + case 'z': + ElementBitwidth /= 2; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'r': + ElementBitwidth *= 2; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 's': + case 'a': + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'k': + Bitwidth *= 2; + break; + case 'c': + Constant = true; + // Fall through + case 'p': + Pointer = true; + Bitwidth = ElementBitwidth; + NumVectors = 0; break; case 'h': - switch (ck) { - case ClassS: - case ClassI: typeCode = "f16"; break; - case ClassW: typeCode = "16"; break; - default: break; - } + ElementBitwidth /= 2; break; - case 'f': - switch (ck) { - case ClassS: - case ClassI: typeCode = "f32"; break; - case ClassW: typeCode = "32"; break; - default: break; - } + case 'q': + ElementBitwidth /= 2; + Bitwidth *= 2; + break; + case 'e': + ElementBitwidth /= 2; + Signed = false; + break; + case 'm': + ElementBitwidth /= 2; + Bitwidth /= 2; break; case 'd': - switch (ck) { - case ClassS: - case ClassI: - typeCode += "f64"; - break; - case ClassW: - PrintFatalError("unhandled type!"); - default: - break; - } + break; + case '2': + NumVectors = 2; + break; + case '3': + NumVectors = 3; + break; + case '4': + NumVectors = 4; + break; + case 'B': + NumVectors = 2; + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'C': + NumVectors = 3; + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'D': + NumVectors = 4; + if (!AppliedQuad) + Bitwidth *= 2; break; default: - PrintFatalError("unhandled type!"); + llvm_unreachable("Unhandled character!"); } } -static char Insert_BHSD_Suffix(StringRef typestr){ - unsigned off = 0; - if(typestr[off++] == 'S'){ - while(typestr[off] == 'Q' || typestr[off] == 'H'|| - typestr[off] == 'P' || typestr[off] == 'U') - ++off; - switch (typestr[off]){ - default : break; - case 'c' : return 'b'; - case 's' : return 'h'; - case 'i' : - case 'f' : return 's'; - case 'l' : - case 'd' : return 'd'; +//===----------------------------------------------------------------------===// +// Intrinsic implementation +//===----------------------------------------------------------------------===// + +std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) { + char typeCode = '\0'; + bool printNumber = true; + + if (CK == ClassB) + return ""; + + if (T.isPoly()) + typeCode = 'p'; + else if (T.isInteger()) + typeCode = T.isSigned() ? 's' : 'u'; + else + typeCode = 'f'; + + if (CK == ClassI) { + switch (typeCode) { + default: + break; + case 's': + case 'u': + case 'p': + typeCode = 'i'; + break; } } - return 0; -} - -static bool endsWith_xN(std::string const &name) { - if (name.length() > 3) { - if (name.compare(name.length() - 3, 3, "_x2") == 0 || - name.compare(name.length() - 3, 3, "_x3") == 0 || - name.compare(name.length() - 3, 3, "_x4") == 0) - return true; + if (CK == ClassB) { + typeCode = '\0'; } - return false; -} -/// MangleName - Append a type or width suffix to a base neon function name, -/// and insert a 'q' in the appropriate location if type string starts with 'Q'. -/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. -/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. -static std::string MangleName(const std::string &name, StringRef typestr, - ClassKind ck) { - if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" || - name == "vcvt_f64_f32") - return name; + std::string S; + if (typeCode != '\0') + S.push_back(typeCode); + if (printNumber) + S += utostr(T.getElementSizeInBits()); - bool quad = false; - std::string typeCode = ""; + return S; +} - InstructionTypeCode(typestr, ck, quad, typeCode); +std::string Intrinsic::getBuiltinTypeStr() { + ClassKind LocalCK = getClassKind(true); + std::string S; - std::string s = name; + Type RetT = getReturnType(); + if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && + !RetT.isFloating()) + RetT.makeInteger(RetT.getElementSizeInBits(), false); - if (typeCode.size() > 0) { - // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. - if (endsWith_xN(s)) - s.insert(s.length() - 3, "_" + typeCode); - else - s += "_" + typeCode; - } + // Since the return value must be one type, return a vector type of the + // appropriate width which we will bitcast. An exception is made for + // returning structs of 2, 3, or 4 vectors which are returned in a sret-like + // fashion, storing them to a pointer arg. + if (RetT.getNumVectors() > 1) { + S += "vv*"; // void result with void* first argument + } else { + if (RetT.isPoly()) + RetT.makeInteger(RetT.getElementSizeInBits(), false); + if (!RetT.isScalar() && !RetT.isSigned()) + RetT.makeSigned(); - if (ck == ClassB) - s += "_v"; + bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f'; + if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) + // Cast to vector of 8-bit elements. + RetT.makeInteger(8, true); - // Insert a 'q' before the first '_' character so that it ends up before - // _lane or _n on vector-scalar operations. - if (typestr.find("Q") != StringRef::npos) { - size_t pos = s.find('_'); - s = s.insert(pos, "q"); - } - char ins = Insert_BHSD_Suffix(typestr); - if(ins){ - size_t pos = s.find('_'); - s = s.insert(pos, &ins, 1); + S += RetT.builtin_str(); } - return s; -} + for (unsigned I = 0; I < getNumParams(); ++I) { + Type T = getParamType(I); + if (T.isPoly()) + T.makeInteger(T.getElementSizeInBits(), false); -static void PreprocessInstruction(const StringRef &Name, - const std::string &InstName, - std::string &Prefix, - bool &HasNPostfix, - bool &HasLanePostfix, - bool &HasDupPostfix, - bool &IsSpecialVCvt, - size_t &TBNumber) { - // All of our instruction name fields from arm_neon.td are of the form - // <instructionname>_... - // Thus we grab our instruction name via computation of said Prefix. - const size_t PrefixEnd = Name.find_first_of('_'); - // If InstName is passed in, we use that instead of our name Prefix. - Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; - - const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); - - HasNPostfix = Postfix.count("_n"); - HasLanePostfix = Postfix.count("_lane"); - HasDupPostfix = Postfix.count("_dup"); - IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); - - if (InstName.compare("vtbl") == 0 || - InstName.compare("vtbx") == 0) { - // If we have a vtblN/vtbxN instruction, use the instruction's ASCII - // encoding to get its true value. - TBNumber = Name[Name.size()-1] - 48; - } -} + bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f'; + if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) + T.makeInteger(8, true); + // Halves always get converted to 8-bit elements. + if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) + T.makeInteger(8, true); -/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have -/// extracted, generate a FileCheck pattern for a Load Or Store -static void -GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, - const std::string& OutTypeCode, - const bool &IsQuad, - const bool &HasDupPostfix, - const bool &HasLanePostfix, - const size_t Count, - std::string &RegisterSuffix) { - const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); - // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang - // will output a series of v{ld,st}1s, so we have to handle it specially. - if ((Count == 3 || Count == 4) && IsQuad) { - RegisterSuffix += "{"; - for (size_t i = 0; i < Count; i++) { - RegisterSuffix += "d{{[0-9]+}}"; - if (HasDupPostfix) { - RegisterSuffix += "[]"; - } - if (HasLanePostfix) { - RegisterSuffix += "[{{[0-9]+}}]"; - } - if (i < Count-1) { - RegisterSuffix += ", "; - } - } - RegisterSuffix += "}"; - } else { - - // Handle normal loads and stores. - RegisterSuffix += "{"; - for (size_t i = 0; i < Count; i++) { - RegisterSuffix += "d{{[0-9]+}}"; - if (HasDupPostfix) { - RegisterSuffix += "[]"; - } - if (HasLanePostfix) { - RegisterSuffix += "[{{[0-9]+}}]"; - } - if (IsQuad && !HasLanePostfix) { - RegisterSuffix += ", d{{[0-9]+}}"; - if (HasDupPostfix) { - RegisterSuffix += "[]"; - } - } - if (i < Count-1) { - RegisterSuffix += ", "; - } - } - RegisterSuffix += "}, [r{{[0-9]+}}"; + if (LocalCK == ClassI) + T.makeSigned(); - // We only include the alignment hint if we have a vld1.*64 or - // a dup/lane instruction. - if (IsLDSTOne) { - if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { - RegisterSuffix += ":" + OutTypeCode; - } - } + // Constant indices are always just "int". + if (hasImmediate() && getImmediateIdx() == I) + T.makeInteger(32, true); - RegisterSuffix += "]"; + S += T.builtin_str(); } -} -static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, - const bool &HasNPostfix) { - return (NameRef.count("vmla") || - NameRef.count("vmlal") || - NameRef.count("vmlsl") || - NameRef.count("vmull") || - NameRef.count("vqdmlal") || - NameRef.count("vqdmlsl") || - NameRef.count("vqdmulh") || - NameRef.count("vqdmull") || - NameRef.count("vqrdmulh")) && HasNPostfix; -} + // Extra constant integer to hold type class enum for this function, e.g. s8 + if (LocalCK == ClassB) + S += "i"; -static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, - const bool &HasLanePostfix) { - return (NameRef.count("vmla") || - NameRef.count("vmls") || - NameRef.count("vmlal") || - NameRef.count("vmlsl") || - (NameRef.count("vmul") && NameRef.size() == 3)|| - NameRef.count("vqdmlal") || - NameRef.count("vqdmlsl") || - NameRef.count("vqdmulh") || - NameRef.count("vqrdmulh")) && HasLanePostfix; + return S; } -static bool IsSpecialLaneMultiply(const StringRef &NameRef, - const bool &HasLanePostfix, - const bool &IsQuad) { - const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) - && IsQuad; - const bool IsVMull = NameRef.count("mull") && !IsQuad; - return (IsVMulOrMulh || IsVMull) && HasLanePostfix; +std::string Intrinsic::getMangledName(bool ForceClassS) { + // Check if the prototype has a scalar operand with the type of the vector + // elements. If not, bitcasting the args will take care of arg checking. + // The actual signedness etc. will be taken care of with special enums. + ClassKind LocalCK = CK; + if (!protoHasScalar()) + LocalCK = ClassB; + + return mangleName(Name, ForceClassS ? ClassS : LocalCK); } -static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, - const std::string &Proto, - const bool &HasNPostfix, - const bool &IsQuad, - const bool &HasLanePostfix, - const bool &HasDupPostfix, - std::string &NormedProto) { - // Handle generic case. - const StringRef NameRef(Name); - for (size_t i = 0, end = Proto.size(); i < end; i++) { - switch (Proto[i]) { - case 'u': - case 'f': - case 'F': - case 'd': - case 's': - case 'x': - case 't': - case 'n': - NormedProto += IsQuad? 'q' : 'd'; - break; - case 'w': - case 'k': - NormedProto += 'q'; - break; - case 'g': - case 'j': - case 'h': - case 'e': - NormedProto += 'd'; - break; - case 'i': - NormedProto += HasLanePostfix? 'a' : 'i'; - break; - case 'a': - if (HasLanePostfix) { - NormedProto += 'a'; - } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { - NormedProto += IsQuad? 'q' : 'd'; - } else { - NormedProto += 'i'; - } - break; - } +std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) { + std::string typeCode = getInstTypeCode(BaseType, LocalCK); + std::string S = Name; + + if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" || + Name == "vcvt_f64_f32") + return Name; + + if (typeCode.size() > 0) { + // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. + if (Name.size() >= 3 && isdigit(Name.back()) && + Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') + S.insert(S.length() - 3, "_" + typeCode); + else + S += "_" + typeCode; } - // Handle Special Cases. - const bool IsNotVExt = !NameRef.count("vext"); - const bool IsVPADAL = NameRef.count("vpadal"); - const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, - HasLanePostfix); - const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, - IsQuad); - - if (IsSpecialLaneMul) { - // If - NormedProto[2] = NormedProto[3]; - NormedProto.erase(3); - } else if (NormedProto.size() == 4 && - NormedProto[0] == NormedProto[1] && - IsNotVExt) { - // If NormedProto.size() == 4 and the first two proto characters are the - // same, ignore the first. - NormedProto = NormedProto.substr(1, 3); - } else if (Is5OpLaneAccum) { - // If we have a 5 op lane accumulator operation, we take characters 1,2,4 - std::string tmp = NormedProto.substr(1,2); - tmp += NormedProto[4]; - NormedProto = tmp; - } else if (IsVPADAL) { - // If we have VPADAL, ignore the first character. - NormedProto = NormedProto.substr(0, 2); - } else if (NameRef.count("vdup") && NormedProto.size() > 2) { - // If our instruction is a dup instruction, keep only the first and - // last characters. - std::string tmp = ""; - tmp += NormedProto[0]; - tmp += NormedProto[NormedProto.size()-1]; - NormedProto = tmp; + if (BaseType != InBaseType) { + // A reinterpret - out the input base type at the end. + S += "_" + getInstTypeCode(InBaseType, LocalCK); } -} -/// GenerateRegisterCheckPatterns - Given a bunch of data we have -/// extracted, generate a FileCheck pattern to check that an -/// instruction's arguments are correct. -static void GenerateRegisterCheckPattern(const std::string &Name, - const std::string &Proto, - const std::string &OutTypeCode, - const bool &HasNPostfix, - const bool &IsQuad, - const bool &HasLanePostfix, - const bool &HasDupPostfix, - const size_t &TBNumber, - std::string &RegisterSuffix) { - - RegisterSuffix = ""; - - const StringRef NameRef(Name); - const StringRef ProtoRef(Proto); - - if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { - return; + if (LocalCK == ClassB) + S += "_v"; + + // Insert a 'q' before the first '_' character so that it ends up before + // _lane or _n on vector-scalar operations. + if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { + size_t Pos = S.find('_'); + S.insert(Pos, "q"); + } + + char Suffix = '\0'; + if (BaseType.isScalarForMangling()) { + switch (BaseType.getElementSizeInBits()) { + case 8: Suffix = 'b'; break; + case 16: Suffix = 'h'; break; + case 32: Suffix = 's'; break; + case 64: Suffix = 'd'; break; + default: llvm_unreachable("Bad suffix!"); + } + } + if (Suffix != '\0') { + size_t Pos = S.find('_'); + S.insert(Pos, &Suffix, 1); } - const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); - const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); + return S; +} - if (IsLoadStore) { - // Grab N value from v{ld,st}N using its ascii representation. - const size_t Count = NameRef[3] - 48; +std::string Intrinsic::replaceParamsIn(std::string S) { + while (S.find('$') != std::string::npos) { + size_t Pos = S.find('$'); + size_t End = Pos + 1; + while (isalpha(S[End])) + ++End; - GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, - HasDupPostfix, HasLanePostfix, - Count, RegisterSuffix); - } else if (IsTBXOrTBL) { - RegisterSuffix += "d{{[0-9]+}}, {"; - for (size_t i = 0; i < TBNumber-1; i++) { - RegisterSuffix += "d{{[0-9]+}}, "; - } - RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; - } else { - // Handle a normal instruction. - if (NameRef.count("vget") || NameRef.count("vset")) - return; - - // We first normalize our proto, since we only need to emit 4 - // different types of checks, yet have more than 4 proto types - // that map onto those 4 patterns. - std::string NormalizedProto(""); - NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, - HasLanePostfix, HasDupPostfix, - NormalizedProto); - - for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { - const char &c = NormalizedProto[i]; - switch (c) { - case 'q': - RegisterSuffix += "q{{[0-9]+}}, "; - break; + std::string VarName = S.substr(Pos + 1, End - Pos - 1); + assert_with_loc(Variables.find(VarName) != Variables.end(), + "Variable not defined!"); + S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); + } - case 'd': - RegisterSuffix += "d{{[0-9]+}}, "; - break; + return S; +} - case 'i': - RegisterSuffix += "#{{[0-9]+}}, "; - break; +void Intrinsic::initVariables() { + Variables.clear(); - case 'a': - RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; - break; - } - } + // Modify the TypeSpec per-argument to get a concrete Type, and create + // known variables for each. + for (unsigned I = 1; I < Proto.size(); ++I) { + char NameC = '0' + (I - 1); + std::string Name = "p"; + Name.push_back(NameC); - // Remove extra ", ". - RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); + Variables[Name] = Variable(Types[I], Name + VariablePostfix); } + RetVar = Variable(Types[0], "ret" + VariablePostfix); } -/// GenerateChecksForIntrinsic - Given a specific instruction name + -/// typestr + class kind, generate the proper set of FileCheck -/// Patterns to check for. We could just return a string, but instead -/// use a vector since it provides us with the extra flexibility of -/// emitting multiple checks, which comes in handy for certain cases -/// like mla where we want to check for 2 different instructions. -static void GenerateChecksForIntrinsic(const std::string &Name, - const std::string &Proto, - StringRef &OutTypeStr, - StringRef &InTypeStr, - ClassKind Ck, - const std::string &InstName, - bool IsHiddenLOp, - std::vector<std::string>& Result) { - - // If Ck is a ClassNoTest instruction, just return so no test is - // emitted. - if(Ck == ClassNoTest) - return; +void Intrinsic::emitPrototype(StringRef NamePrefix) { + if (UseMacro) + OS << "#define "; + else + OS << "__ai " << Types[0].str() << " "; - if (Name == "vcvt_f32_f16") { - Result.push_back("vcvt.f32.f16"); - return; - } + OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; + for (unsigned I = 0; I < getNumParams(); ++I) { + if (I != 0) + OS << ", "; - // Now we preprocess our instruction given the data we have to get the - // data that we need. - // Create a StringRef for String Manipulation of our Name. - const StringRef NameRef(Name); - // Instruction Prefix. - std::string Prefix; - // The type code for our out type string. - std::string OutTypeCode; - // To handle our different cases, we need to check for different postfixes. - // Is our instruction a quad instruction. - bool IsQuad = false; - // Our instruction is of the form <instructionname>_n. - bool HasNPostfix = false; - // Our instruction is of the form <instructionname>_lane. - bool HasLanePostfix = false; - // Our instruction is of the form <instructionname>_dup. - bool HasDupPostfix = false; - // Our instruction is a vcvt instruction which requires special handling. - bool IsSpecialVCvt = false; - // If we have a vtbxN or vtblN instruction, this is set to N. - size_t TBNumber = -1; - // Register Suffix - std::string RegisterSuffix; - - PreprocessInstruction(NameRef, InstName, Prefix, - HasNPostfix, HasLanePostfix, HasDupPostfix, - IsSpecialVCvt, TBNumber); - - InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); - GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, - HasLanePostfix, HasDupPostfix, TBNumber, - RegisterSuffix); - - // In the following section, we handle a bunch of special cases. You can tell - // a special case by the fact we are returning early. - - // If our instruction is a logical instruction without postfix or a - // hidden LOp just return the current Prefix. - if (Ck == ClassL || IsHiddenLOp) { - Result.push_back(Prefix + " " + RegisterSuffix); - return; - } + char NameC = '0' + I; + std::string Name = "p"; + Name.push_back(NameC); + assert(Variables.find(Name) != Variables.end()); + Variable &V = Variables[Name]; - // If we have a vmov, due to the many different cases, some of which - // vary within the different intrinsics generated for a single - // instruction type, just output a vmov. (e.g. given an instruction - // A, A.u32 might be vmov and A.u8 might be vmov.8). - // - // FIXME: Maybe something can be done about this. The two cases that we care - // about are vmov as an LType and vmov as a WType. - if (Prefix == "vmov") { - Result.push_back(Prefix + " " + RegisterSuffix); - return; + if (!UseMacro) + OS << V.getType().str() << " "; + OS << V.getName(); } - // In the following section, we handle special cases. + OS << ")"; +} - if (OutTypeCode == "64") { - // If we have a 64 bit vdup/vext and are handling an uint64x1_t - // type, the intrinsic will be optimized away, so just return - // nothing. On the other hand if we are handling an uint64x2_t - // (i.e. quad instruction), vdup/vmov instructions should be - // emitted. - if (Prefix == "vdup" || Prefix == "vext") { - if (IsQuad) { - Result.push_back("{{vmov|vdup}}"); - } - return; - } +void Intrinsic::emitOpeningBrace() { + if (UseMacro) + OS << " __extension__ ({"; + else + OS << " {"; + emitNewLine(); +} - // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with - // multiple register operands. - bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" - || Prefix == "vld4"; - bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" - || Prefix == "vst4"; - if (MultiLoadPrefix || MultiStorePrefix) { - Result.push_back(NameRef.slice(0, 3).str() + "1.64"); - return; - } +void Intrinsic::emitClosingBrace() { + if (UseMacro) + OS << "})"; + else + OS << "}"; +} - // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of - // emitting said instructions. So return a check for - // vldr/vstr/vmov/str instead. - if (HasLanePostfix || HasDupPostfix) { - if (Prefix == "vst1") { - Result.push_back("{{str|vstr|vmov}}"); - return; - } else if (Prefix == "vld1") { - Result.push_back("{{ldr|vldr|vmov}}"); - return; - } +void Intrinsic::emitNewLine() { + if (UseMacro) + OS << " \\\n"; + else + OS << "\n"; +} + +void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { + if (Dest.getType().getNumVectors() > 1) { + emitNewLine(); + + for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { + OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = " + << "__builtin_shufflevector(" + << Src.getName() << ".val[" << utostr(K) << "], " + << Src.getName() << ".val[" << utostr(K) << "]"; + for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) + OS << ", " << utostr(J); + OS << ");"; + emitNewLine(); } + } else { + OS << " " << Dest.getName() + << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); + for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) + OS << ", " << utostr(J); + OS << ");"; + emitNewLine(); } +} - // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are - // sometimes disassembled as vtrn.32. We use a regex to handle both - // cases. - if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { - Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); +void Intrinsic::emitArgumentReversal() { + if (BigEndianSafe) return; - } - // Currently on most ARM processors, we do not use vmla/vmls for - // quad floating point operations. Instead we output vmul + vadd. So - // check if we have one of those instructions and just output a - // check for vmul. - if (OutTypeCode == "f32") { - if (Prefix == "vmls") { - Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); - Result.push_back("vsub." + OutTypeCode); - return; - } else if (Prefix == "vmla") { - Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); - Result.push_back("vadd." + OutTypeCode); - return; - } - } + // Reverse all vector arguments. + for (unsigned I = 0; I < getNumParams(); ++I) { + std::string Name = "p" + utostr(I); + std::string NewName = "rev" + utostr(I); - // If we have vcvt, get the input type from the instruction name - // (which should be of the form instname_inputtype) and append it - // before the output type. - if (Prefix == "vcvt") { - const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); - Prefix += "." + inTypeCode; - } + Variable &V = Variables[Name]; + Variable NewV(V.getType(), NewName + VariablePostfix); - // Append output type code to get our final mangled instruction. - Prefix += "." + OutTypeCode; + if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) + continue; - Result.push_back(Prefix + " " + RegisterSuffix); + OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; + emitReverseVariable(NewV, V); + V = NewV; + } } -/// UseMacro - Examine the prototype string to determine if the intrinsic -/// should be defined as a preprocessor macro instead of an inline function. -static bool UseMacro(const std::string &proto) { - // If this builtin takes an immediate argument, we need to #define it rather - // than use a standard declaration, so that SemaChecking can range check - // the immediate passed by the user. - if (proto.find('i') != std::string::npos) - return true; - - // Pointer arguments need to use macros to avoid hiding aligned attributes - // from the pointer type. - if (proto.find('p') != std::string::npos || - proto.find('c') != std::string::npos) - return true; - - return false; +void Intrinsic::emitReturnReversal() { + if (BigEndianSafe) + return; + if (!getReturnType().isVector() || getReturnType().isVoid() || + getReturnType().getNumElements() == 1) + return; + emitReverseVariable(RetVar, RetVar); } -/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is -/// defined as a macro should be accessed directly instead of being first -/// assigned to a local temporary. -static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { - // True for constant ints (i), pointers (p) and const pointers (c). - return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); -} -// Generate the string "(argtype a, argtype b, ...)" -static std::string GenArgs(const std::string &proto, StringRef typestr, - const std::string &name) { - bool define = UseMacro(proto); - char arg = 'a'; - - std::string s; - s += "("; - - for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { - if (define) { - // Some macro arguments are used directly instead of being assigned - // to local temporaries; prepend an underscore prefix to make their - // names consistent with the local temporaries. - if (MacroArgUsedDirectly(proto, i)) - s += "__"; - } else { - s += TypeString(proto[i], typestr) + " __"; - } - s.push_back(arg); - //To avoid argument being multiple defined, add extra number for renaming. - if (name == "vcopy_lane" || name == "vcopy_laneq") - s.push_back('1'); - if ((i + 1) < e) - s += ", "; - } - - s += ")"; - return s; -} - -// Macro arguments are not type-checked like inline function arguments, so -// assign them to local temporaries to get the right type checking. -static std::string GenMacroLocals(const std::string &proto, StringRef typestr, - const std::string &name ) { - char arg = 'a'; - std::string s; - bool generatedLocal = false; +void Intrinsic::emitShadowedArgs() { + // Macro arguments are not type-checked like inline function arguments, + // so assign them to local temporaries to get the right type checking. + if (!UseMacro) + return; - for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { + for (unsigned I = 0; I < getNumParams(); ++I) { // Do not create a temporary for an immediate argument. // That would defeat the whole point of using a macro! - if (MacroArgUsedDirectly(proto, i)) + if (hasImmediate() && Proto[I+1] == 'i') + continue; + // Do not create a temporary for pointer arguments. The input + // pointer may have an alignment hint. + if (getParamType(I).isPointer()) continue; - generatedLocal = true; - bool extranumber = false; - if (name == "vcopy_lane" || name == "vcopy_laneq") - extranumber = true; - - s += TypeString(proto[i], typestr) + " __"; - s.push_back(arg); - if(extranumber) - s.push_back('1'); - s += " = ("; - s.push_back(arg); - if(extranumber) - s.push_back('1'); - s += "); "; - } - - if (generatedLocal) - s += "\\\n "; - return s; -} -// Use the vmovl builtin to sign-extend or zero-extend a vector. -static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { - std::string s, high; - high = h ? "_high" : ""; - s = MangleName("vmovl" + high, typestr, ClassS); - s += "(" + a + ")"; - return s; -} + std::string Name = "p" + utostr(I); -// Get the high 64-bit part of a vector -static std::string GetHigh(const std::string &a, StringRef typestr) { - std::string s; - s = MangleName("vget_high", typestr, ClassS); - s += "(" + a + ")"; - return s; -} + assert(Variables.find(Name) != Variables.end()); + Variable &V = Variables[Name]; -// Gen operation with two operands and get high 64-bit for both of two operands. -static std::string Gen2OpWith2High(StringRef typestr, - const std::string &op, - const std::string &a, - const std::string &b) { - std::string s; - std::string Op1 = GetHigh(a, typestr); - std::string Op2 = GetHigh(b, typestr); - s = MangleName(op, typestr, ClassS); - s += "(" + Op1 + ", " + Op2 + ");"; - return s; -} + std::string NewName = "s" + utostr(I); + Variable V2(V.getType(), NewName + VariablePostfix); -// Gen operation with three operands and get high 64-bit of the latter -// two operands. -static std::string Gen3OpWith2High(StringRef typestr, - const std::string &op, - const std::string &a, - const std::string &b, - const std::string &c) { - std::string s; - std::string Op1 = GetHigh(b, typestr); - std::string Op2 = GetHigh(c, typestr); - s = MangleName(op, typestr, ClassS); - s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; - return s; + OS << " " << V2.getType().str() << " " << V2.getName() << " = " + << V.getName() << ";"; + emitNewLine(); + + V = V2; + } } -// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. -static std::string GenCombine(std::string typestr, - const std::string &a, - const std::string &b) { - std::string s; - s = MangleName("vcombine", typestr, ClassS); - s += "(" + a + ", " + b + ")"; - return s; +// We don't check 'a' in this function, because for builtin function the +// argument matching to 'a' uses a vector type splatted from a scalar type. +bool Intrinsic::protoHasScalar() { + return (Proto.find('s') != std::string::npos || + Proto.find('z') != std::string::npos || + Proto.find('r') != std::string::npos || + Proto.find('b') != std::string::npos || + Proto.find('$') != std::string::npos || + Proto.find('y') != std::string::npos || + Proto.find('o') != std::string::npos); } -static std::string Duplicate(unsigned nElts, StringRef typestr, - const std::string &a) { - std::string s; +void Intrinsic::emitBodyAsBuiltinCall() { + std::string S; - s = "(" + TypeString('d', typestr) + "){ "; - for (unsigned i = 0; i != nElts; ++i) { - s += a; - if ((i + 1) < nElts) - s += ", "; + // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit + // sret-like argument. + bool SRet = getReturnType().getNumVectors() >= 2; + + StringRef N = Name; + if (hasSplat()) { + // Call the non-splat builtin: chop off the "_n" suffix from the name. + assert(N.endswith("_n")); + N = N.drop_back(2); } - s += " }"; - return s; -} + ClassKind LocalCK = CK; + if (!protoHasScalar()) + LocalCK = ClassB; -static std::string SplatLane(unsigned nElts, const std::string &vec, - const std::string &lane) { - std::string s = "__builtin_shufflevector(" + vec + ", " + vec; - for (unsigned i = 0; i < nElts; ++i) - s += ", " + lane; - s += ")"; - return s; -} + if (!getReturnType().isVoid() && !SRet) + S += "(" + RetVar.getType().str() + ") "; -static std::string RemoveHigh(const std::string &name) { - std::string s = name; - std::size_t found = s.find("_high_"); - if (found == std::string::npos) - PrintFatalError("name should contain \"_high_\" for high intrinsics"); - s.replace(found, 5, ""); - return s; -} + S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; -static unsigned GetNumElements(StringRef typestr, bool &quad) { - quad = false; - bool dummy = false; - char type = ClassifyType(typestr, quad, dummy, dummy); - unsigned nElts = 0; - switch (type) { - case 'c': nElts = 8; break; - case 's': nElts = 4; break; - case 'i': nElts = 2; break; - case 'l': nElts = 1; break; - case 'h': nElts = 4; break; - case 'f': nElts = 2; break; - case 'd': - nElts = 1; - break; - default: - PrintFatalError("unhandled type!"); - } - if (quad) nElts <<= 1; - return nElts; -} + if (SRet) + S += "&" + RetVar.getName() + ", "; -// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. -static std::string GenOpString(const std::string &name, OpKind op, - const std::string &proto, StringRef typestr) { - bool quad; - unsigned nElts = GetNumElements(typestr, quad); - bool define = UseMacro(proto); + for (unsigned I = 0; I < getNumParams(); ++I) { + Variable &V = Variables["p" + utostr(I)]; + Type T = V.getType(); - std::string ts = TypeString(proto[0], typestr); - std::string s; - if (!define) { - s = "return "; - } + // Handle multiple-vector values specially, emitting each subvector as an + // argument to the builtin. + if (T.getNumVectors() > 1) { + // Check if an explicit cast is needed. + std::string Cast; + if (T.isChar() || T.isPoly() || !T.isSigned()) { + Type T2 = T; + T2.makeOneVector(); + T2.makeInteger(8, /*Signed=*/true); + Cast = "(" + T2.str() + ")"; + } - switch(op) { - case OpAdd: - s += "__a + __b;"; - break; - case OpAddl: - s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; - break; - case OpAddlHi: - s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; - break; - case OpAddw: - s += "__a + " + Extend(typestr, "__b") + ";"; - break; - case OpAddwHi: - s += "__a + " + Extend(typestr, "__b", 1) + ";"; - break; - case OpSub: - s += "__a - __b;"; - break; - case OpSubl: - s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; - break; - case OpSublHi: - s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; - break; - case OpSubw: - s += "__a - " + Extend(typestr, "__b") + ";"; - break; - case OpSubwHi: - s += "__a - " + Extend(typestr, "__b", 1) + ";"; - break; - case OpMulN: - s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; - break; - case OpMulLane: - s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; - break; - case OpMulXLane: - s += MangleName("vmulx", typestr, ClassS) + "(__a, " + - SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpMul: - s += "__a * __b;"; - break; - case OpFMlaN: - s += MangleName("vfma", typestr, ClassS); - s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; - break; - case OpFMlsN: - s += MangleName("vfms", typestr, ClassS); - s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; - break; - case OpMullLane: - s += MangleName("vmull", typestr, ClassS) + "(__a, " + - SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpMullHiLane: - s += MangleName("vmull", typestr, ClassS) + "(" + - GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpMlaN: - s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; - break; - case OpMlaLane: - s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpMla: - s += "__a + (__b * __c);"; - break; - case OpMlalN: - s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + - Duplicate(nElts, typestr, "__c") + ");"; - break; - case OpMlalLane: - s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + - SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpMlalHiLane: - s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + - GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpMlal: - s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; - break; - case OpMullHi: - s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); - break; - case OpMullHiN: - s += MangleName("vmull_n", typestr, ClassS); - s += "(" + GetHigh("__a", typestr) + ", __b);"; - return s; - case OpMlalHi: - s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); - break; - case OpMlalHiN: - s += MangleName("vmlal_n", typestr, ClassS); - s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; - return s; - case OpMlsN: - s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; - break; - case OpMlsLane: - s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpFMSLane: - s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; - s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; - s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; - s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; - break; - case OpFMSLaneQ: - s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; - s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; - s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; - s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; - break; - case OpMls: - s += "__a - (__b * __c);"; - break; - case OpMlslN: - s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + - Duplicate(nElts, typestr, "__c") + ");"; - break; - case OpMlslLane: - s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + - SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpMlslHiLane: - s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + - GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpMlsl: - s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; - break; - case OpMlslHi: - s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); - break; - case OpMlslHiN: - s += MangleName("vmlsl_n", typestr, ClassS); - s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; - break; - case OpQDMullLane: - s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + - SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpQDMullHiLane: - s += MangleName("vqdmull", typestr, ClassS) + "(" + - GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpQDMlalLane: - s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + - SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpQDMlalHiLane: - s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + - GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpQDMlslLane: - s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + - SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpQDMlslHiLane: - s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + - GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; - break; - case OpQDMulhLane: - s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + - SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpQRDMulhLane: - s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + - SplatLane(nElts, "__b", "__c") + ");"; - break; - case OpEq: - s += "(" + ts + ")(__a == __b);"; - break; - case OpGe: - s += "(" + ts + ")(__a >= __b);"; - break; - case OpLe: - s += "(" + ts + ")(__a <= __b);"; - break; - case OpGt: - s += "(" + ts + ")(__a > __b);"; - break; - case OpLt: - s += "(" + ts + ")(__a < __b);"; - break; - case OpNeg: - s += " -__a;"; - break; - case OpNot: - s += " ~__a;"; - break; - case OpAnd: - s += "__a & __b;"; - break; - case OpOr: - s += "__a | __b;"; - break; - case OpXor: - s += "__a ^ __b;"; - break; - case OpAndNot: - s += "__a & ~__b;"; - break; - case OpOrNot: - s += "__a | ~__b;"; - break; - case OpCast: - s += "(" + ts + ")__a;"; - break; - case OpConcat: - s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; - s += ", (int64x1_t)__b, 0, 1);"; - break; - case OpHi: - // nElts is for the result vector, so the source is twice that number. - s += "__builtin_shufflevector(__a, __a"; - for (unsigned i = nElts; i < nElts * 2; ++i) - s += ", " + utostr(i); - s+= ");"; - break; - case OpLo: - s += "__builtin_shufflevector(__a, __a"; - for (unsigned i = 0; i < nElts; ++i) - s += ", " + utostr(i); - s+= ");"; - break; - case OpDup: - s += Duplicate(nElts, typestr, "__a") + ";"; - break; - case OpDupLane: - s += SplatLane(nElts, "__a", "__b") + ";"; - break; - case OpSelect: - // ((0 & 1) | (~0 & 2)) - s += "(" + ts + ")"; - ts = TypeString(proto[1], typestr); - s += "((__a & (" + ts + ")__b) | "; - s += "(~__a & (" + ts + ")__c));"; - break; - case OpRev16: - s += "__builtin_shufflevector(__a, __a"; - for (unsigned i = 2; i <= nElts; i += 2) - for (unsigned j = 0; j != 2; ++j) - s += ", " + utostr(i - j - 1); - s += ");"; - break; - case OpRev32: { - unsigned WordElts = nElts >> (1 + (int)quad); - s += "__builtin_shufflevector(__a, __a"; - for (unsigned i = WordElts; i <= nElts; i += WordElts) - for (unsigned j = 0; j != WordElts; ++j) - s += ", " + utostr(i - j - 1); - s += ");"; - break; - } - case OpRev64: { - unsigned DblWordElts = nElts >> (int)quad; - s += "__builtin_shufflevector(__a, __a"; - for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) - for (unsigned j = 0; j != DblWordElts; ++j) - s += ", " + utostr(i - j - 1); - s += ");"; - break; - } - case OpXtnHi: { - s = TypeString(proto[1], typestr) + " __a1 = " + - MangleName("vmovn", typestr, ClassS) + "(__b);\n " + - "return __builtin_shufflevector(__a, __a1"; - for (unsigned i = 0; i < nElts * 4; ++i) - s += ", " + utostr(i); - s += ");"; - break; - } - case OpSqxtunHi: { - s = TypeString(proto[1], typestr) + " __a1 = " + - MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + - "return __builtin_shufflevector(__a, __a1"; - for (unsigned i = 0; i < nElts * 4; ++i) - s += ", " + utostr(i); - s += ");"; - break; - } - case OpQxtnHi: { - s = TypeString(proto[1], typestr) + " __a1 = " + - MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + - "return __builtin_shufflevector(__a, __a1"; - for (unsigned i = 0; i < nElts * 4; ++i) - s += ", " + utostr(i); - s += ");"; - break; - } - case OpFcvtnHi: { - std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; - s = TypeString(proto[1], typestr) + " __a1 = " + - MangleName(FName, typestr, ClassS) + "(__b);\n " + - "return __builtin_shufflevector(__a, __a1"; - for (unsigned i = 0; i < nElts * 4; ++i) - s += ", " + utostr(i); - s += ");"; - break; - } - case OpFcvtlHi: { - std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; - s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + - ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; - break; - } - case OpFcvtxnHi: { - s = TypeString(proto[1], typestr) + " __a1 = " + - MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + - "return __builtin_shufflevector(__a, __a1"; - for (unsigned i = 0; i < nElts * 4; ++i) - s += ", " + utostr(i); - s += ");"; - break; - } - case OpUzp1: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = 0; i < nElts; i++) - s += ", " + utostr(2*i); - s += ");"; - break; - case OpUzp2: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = 0; i < nElts; i++) - s += ", " + utostr(2*i+1); - s += ");"; - break; - case OpZip1: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = 0; i < (nElts/2); i++) - s += ", " + utostr(i) + ", " + utostr(i+nElts); - s += ");"; - break; - case OpZip2: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = nElts/2; i < nElts; i++) - s += ", " + utostr(i) + ", " + utostr(i+nElts); - s += ");"; - break; - case OpTrn1: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = 0; i < (nElts/2); i++) - s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); - s += ");"; - break; - case OpTrn2: - s += "__builtin_shufflevector(__a, __b"; - for (unsigned i = 0; i < (nElts/2); i++) - s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); - s += ");"; - break; - case OpAbdl: { - std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; - if (typestr[0] != 'U') { - // vabd results are always unsigned and must be zero-extended. - std::string utype = "U" + typestr.str(); - s += "(" + TypeString(proto[0], typestr) + ")"; - abd = "(" + TypeString('d', utype) + ")" + abd; - s += Extend(utype, abd) + ";"; - } else { - s += Extend(typestr, abd) + ";"; + for (unsigned J = 0; J < T.getNumVectors(); ++J) + S += Cast + V.getName() + ".val[" + utostr(J) + "], "; + continue; } - break; - } - case OpAbdlHi: - s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); - break; - case OpAddhnHi: { - std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; - s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); - s += ";"; - break; - } - case OpRAddhnHi: { - std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; - s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); - s += ";"; - break; - } - case OpSubhnHi: { - std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; - s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); - s += ";"; - break; - } - case OpRSubhnHi: { - std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; - s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); - s += ";"; - break; - } - case OpAba: - s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; - break; - case OpAbal: - s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; - break; - case OpAbalHi: - s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); - break; - case OpQDMullHi: - s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); - break; - case OpQDMullHiN: - s += MangleName("vqdmull_n", typestr, ClassS); - s += "(" + GetHigh("__a", typestr) + ", __b);"; - return s; - case OpQDMlalHi: - s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); - break; - case OpQDMlalHiN: - s += MangleName("vqdmlal_n", typestr, ClassS); - s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; - return s; - case OpQDMlslHi: - s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); - break; - case OpQDMlslHiN: - s += MangleName("vqdmlsl_n", typestr, ClassS); - s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; - return s; - case OpDiv: - s += "__a / __b;"; - break; - case OpMovlHi: { - s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + - MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; - s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); - s += "(__a1, 0);"; - break; - } - case OpLongHi: { - // Another local variable __a1 is needed for calling a Macro, - // or using __a will have naming conflict when Macro expanding. - s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + - MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; - s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + - "(__a1, __b);"; - break; - } - case OpNarrowHi: { - s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + - MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; - break; - } - case OpCopyLane: { - s += TypeString('s', typestr) + " __c2 = " + - MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + - MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; - break; - } - case OpCopyQLane: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + - "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; - break; - } - case OpCopyLaneQ: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + - "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; - break; - } - case OpScalarMulLane: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + - "(__b, __c);\\\n __a * __d1;"; - break; - } - case OpScalarMulLaneQ: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + - "(__b, __c);\\\n __a * __d1;"; - break; - } - case OpScalarMulXLane: { - bool dummy = false; - char type = ClassifyType(typestr, dummy, dummy, dummy); - if (type == 'f') type = 's'; - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + - "(__b, __c);\\\n vmulx" + type + "_" + - typeCode + "(__a, __d1);"; - break; - } - case OpScalarMulXLaneQ: { - bool dummy = false; - char type = ClassifyType(typestr, dummy, dummy, dummy); - if (type == 'f') type = 's'; - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + - typeCode + "(__b, __c);\\\n vmulx" + type + - "_" + typeCode + "(__a, __d1);"; - break; - } - case OpScalarVMulXLane: { - bool dummy = false; - char type = ClassifyType(typestr, dummy, dummy, dummy); - if (type == 'f') type = 's'; - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vget_lane_" + - typeCode + "(__a, 0);\\\n" + - " " + TypeString('s', typestr) + " __e1 = vget_lane_" + - typeCode + "(__b, __c);\\\n" + - " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + - typeCode + "(__d1, __e1);\\\n" + - " " + TypeString('d', typestr) + " __g1;\\\n" + - " vset_lane_" + typeCode + "(__f1, __g1, __c);"; - break; - } + std::string Arg; + Type CastToType = T; + if (hasSplat() && I == getSplatIdx()) { + Arg = "(" + BaseType.str() + ") {"; + for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { + if (J != 0) + Arg += ", "; + Arg += V.getName(); + } + Arg += "}"; - case OpScalarVMulXLaneQ: { - bool dummy = false; - char type = ClassifyType(typestr, dummy, dummy, dummy); - if (type == 'f') type = 's'; - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += TypeString('s', typestr) + " __d1 = vget_lane_" + - typeCode + "(__a, 0);\\\n" + - " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + - typeCode + "(__b, __c);\\\n" + - " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + - typeCode + "(__d1, __e1);\\\n" + - " " + TypeString('d', typestr) + " __g1;\\\n" + - " vset_lane_" + typeCode + "(__f1, __g1, 0);"; - break; - } - case OpScalarQDMullLane: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + - "vget_lane_" + typeCode + "(b, __c));"; - break; - } - case OpScalarQDMullLaneQ: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + - "vgetq_lane_" + typeCode + "(b, __c));"; - break; - } - case OpScalarQDMulHiLane: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + - "vget_lane_" + typeCode + "(__b, __c));"; - break; - } - case OpScalarQDMulHiLaneQ: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + - "vgetq_lane_" + typeCode + "(__b, __c));"; - break; - } - case OpScalarQRDMulHiLane: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + - "vget_lane_" + typeCode + "(__b, __c));"; - break; - } - case OpScalarQRDMulHiLaneQ: { - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + - "vgetq_lane_" + typeCode + "(__b, __c));"; - break; - } - case OpScalarGetLane:{ - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - if (quad) { - s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n"; - s += " vgetq_lane_s16(__a1, __b);"; + CastToType = BaseType; } else { - s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n"; - s += " vget_lane_s16(__a1, __b);"; + Arg = V.getName(); } - break; - } - case OpScalarSetLane:{ - std::string typeCode = ""; - InstructionTypeCode(typestr, ClassS, quad, typeCode); - s += "int16_t __a1 = (int16_t)__a;\\\n"; - if (quad) { - s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n"; - s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n"; - s += " vreinterpretq_f16_s16(__b2);"; - } else { - s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n"; - s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n"; - s += " vreinterpret_f16_s16(__b2);"; + + // Check if an explicit cast is needed. + if (CastToType.isVector()) { + CastToType.makeInteger(8, true); + Arg = "(" + CastToType.str() + ")" + Arg; } - break; + + S += Arg + ", "; } - default: - PrintFatalError("unknown OpKind!"); + // Extra constant integer to hold type class enum for this function, e.g. s8 + if (getClassKind(true) == ClassB) { + Type ThisTy = getReturnType(); + if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F') + ThisTy = getParamType(0); + if (ThisTy.isPointer()) + ThisTy = getParamType(1); + + S += utostr(ThisTy.getNeonEnum()); + } else { + // Remove extraneous ", ". + S.pop_back(); + S.pop_back(); } - return s; + S += ");"; + + std::string RetExpr; + if (!SRet && !RetVar.getType().isVoid()) + RetExpr = RetVar.getName() + " = "; + + OS << " " << RetExpr << S; + emitNewLine(); } -static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { - unsigned mod = proto[0]; +void Intrinsic::emitBody(StringRef CallPrefix) { + std::vector<std::string> Lines; - if (mod == 'v' || mod == 'f' || mod == 'F') - mod = proto[1]; + assert(RetVar.getType() == Types[0]); + // Create a return variable, if we're not void. + if (!RetVar.getType().isVoid()) { + OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; + emitNewLine(); + } - bool quad = false; - bool poly = false; - bool usgn = false; - bool scal = false; - bool cnst = false; - bool pntr = false; + if (!Body || Body->getValues().size() == 0) { + // Nothing specific to output - must output a builtin. + emitBodyAsBuiltinCall(); + return; + } - // Base type to get the type string for. - char type = ClassifyType(typestr, quad, poly, usgn); + // We have a list of "things to output". The last should be returned. + for (auto *I : Body->getValues()) { + if (StringInit *SI = dyn_cast<StringInit>(I)) { + Lines.push_back(replaceParamsIn(SI->getAsString())); + } else if (DagInit *DI = dyn_cast<DagInit>(I)) { + DagEmitter DE(*this, CallPrefix); + Lines.push_back(DE.emitDag(DI).second + ";"); + } + } - // Based on the modifying character, change the type and width if necessary. - type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); + assert(Lines.size() && "Empty def?"); + if (!RetVar.getType().isVoid()) + Lines.back().insert(0, RetVar.getName() + " = "); - NeonTypeFlags::EltType ET; - switch (type) { - case 'c': - ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; - break; - case 's': - ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; - break; - case 'i': - ET = NeonTypeFlags::Int32; - break; - case 'l': - ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; - break; - case 'h': - ET = NeonTypeFlags::Float16; - break; - case 'f': - ET = NeonTypeFlags::Float32; - break; - case 'd': - ET = NeonTypeFlags::Float64; - break; - default: - PrintFatalError("unhandled type!"); + for (auto &L : Lines) { + OS << " " << L; + emitNewLine(); } - NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); - return Flags.getFlags(); } -// We don't check 'a' in this function, because for builtin function the -// argument matching to 'a' uses a vector type splatted from a scalar type. -static bool ProtoHasScalar(const std::string proto) -{ - return (proto.find('s') != std::string::npos - || proto.find('z') != std::string::npos - || proto.find('r') != std::string::npos - || proto.find('b') != std::string::npos - || proto.find('$') != std::string::npos - || proto.find('y') != std::string::npos - || proto.find('o') != std::string::npos); +void Intrinsic::emitReturn() { + if (RetVar.getType().isVoid()) + return; + if (UseMacro) + OS << " " << RetVar.getName() << ";"; + else + OS << " return " << RetVar.getName() << ";"; + emitNewLine(); } -// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) -static std::string GenBuiltin(const std::string &name, const std::string &proto, - StringRef typestr, ClassKind ck) { - std::string s; +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { + // At this point we should only be seeing a def. + DefInit *DefI = cast<DefInit>(DI->getOperator()); + std::string Op = DefI->getAsString(); + + if (Op == "cast" || Op == "bitcast") + return emitDagCast(DI, Op == "bitcast"); + if (Op == "shuffle") + return emitDagShuffle(DI); + if (Op == "dup") + return emitDagDup(DI); + if (Op == "splat") + return emitDagSplat(DI); + if (Op == "save_temp") + return emitDagSaveTemp(DI); + if (Op == "op") + return emitDagOp(DI); + if (Op == "call") + return emitDagCall(DI); + if (Op == "name_replace") + return emitDagNameReplace(DI); + if (Op == "literal") + return emitDagLiteral(DI); + assert_with_loc(false, "Unknown operation!"); + return std::make_pair(Type::getVoid(), ""); +} - // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit - // sret-like argument. - bool sret = IsMultiVecProto(proto[0]); +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { + std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + if (DI->getNumArgs() == 2) { + // Unary op. + std::pair<Type, std::string> R = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + return std::make_pair(R.first, Op + R.second); + } else { + assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); + std::pair<Type, std::string> R1 = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + std::pair<Type, std::string> R2 = + emitDagArg(DI->getArg(2), DI->getArgName(2)); + assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); + return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); + } +} - bool define = UseMacro(proto); +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { + std::vector<Type> Types; + std::vector<std::string> Values; + for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { + std::pair<Type, std::string> R = + emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); + Types.push_back(R.first); + Values.push_back(R.second); + } - // Check if the prototype has a scalar operand with the type of the vector - // elements. If not, bitcasting the args will take care of arg checking. - // The actual signedness etc. will be taken care of with special enums. - if (!ProtoHasScalar(proto)) - ck = ClassB; + // Look up the called intrinsic. + std::string N; + if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) + N = SI->getAsUnquotedString(); + else + N = emitDagArg(DI->getArg(0), "").second; + Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types); + assert(Callee && "getIntrinsic should not return us nullptr!"); - if (proto[0] != 'v') { - std::string ts = TypeString(proto[0], typestr); + // Make sure the callee is known as an early def. + Callee->setNeededEarly(); + Intr.Dependencies.insert(Callee); - if (define) { - if (sret) - s += ts + " r; "; - else - s += "(" + ts + ")"; - } else if (sret) { - s += ts + " r; "; - } else { - s += "return (" + ts + ")"; - } + // Now create the call itself. + std::string S = CallPrefix.str() + Callee->getMangledName(true) + "("; + for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { + if (I != 0) + S += ", "; + S += Values[I]; } + S += ")"; - bool splat = proto.find('a') != std::string::npos; + return std::make_pair(Callee->getReturnType(), S); +} - s += "__builtin_neon_"; - if (splat) { - // Call the non-splat builtin: chop off the "_n" suffix from the name. - std::string vname(name, 0, name.size()-2); - s += MangleName(vname, typestr, ck); - } else { - s += MangleName(name, typestr, ck); +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, + bool IsBitCast){ + // (cast MOD* VAL) -> cast VAL to type given by MOD. + std::pair<Type, std::string> R = emitDagArg( + DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); + Type castToType = R.first; + for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { + + // MOD can take several forms: + // 1. $X - take the type of parameter / variable X. + // 2. The value "R" - take the type of the return type. + // 3. a type string + // 4. The value "U" or "S" to switch the signedness. + // 5. The value "H" or "D" to half or double the bitwidth. + // 6. The value "8" to convert to 8-bit (signed) integer lanes. + if (DI->getArgName(ArgIdx).size()) { + assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != + Intr.Variables.end(), + "Variable not found"); + castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); + } else { + StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); + assert_with_loc(SI, "Expected string type or $Name for cast type"); + + if (SI->getAsUnquotedString() == "R") { + castToType = Intr.getReturnType(); + } else if (SI->getAsUnquotedString() == "U") { + castToType.makeUnsigned(); + } else if (SI->getAsUnquotedString() == "S") { + castToType.makeSigned(); + } else if (SI->getAsUnquotedString() == "H") { + castToType.halveLanes(); + } else if (SI->getAsUnquotedString() == "D") { + castToType.doubleLanes(); + } else if (SI->getAsUnquotedString() == "8") { + castToType.makeInteger(8, true); + } else { + castToType = Type::fromTypedefName(SI->getAsUnquotedString()); + assert_with_loc(!castToType.isVoid(), "Unknown typedef"); + } + } } - s += "("; - // Pass the address of the return variable as the first argument to sret-like - // builtins. - if (sret) - s += "&r, "; + std::string S; + if (IsBitCast) { + // Emit a reinterpret cast. The second operand must be an lvalue, so create + // a temporary. + std::string N = "reint"; + unsigned I = 0; + while (Intr.Variables.find(N) != Intr.Variables.end()) + N = "reint" + utostr(++I); + Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); - char arg = 'a'; - for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { - std::string args = std::string(&arg, 1); + Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " + << R.second << ";"; + Intr.emitNewLine(); - // Use the local temporaries instead of the macro arguments. - args = "__" + args; + S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; + } else { + // Emit a normal (static) cast. + S = "(" + castToType.str() + ")(" + R.second + ")"; + } - bool argQuad = false; - bool argPoly = false; - bool argUsgn = false; - bool argScalar = false; - bool dummy = false; - char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); - argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, - dummy, dummy); + return std::make_pair(castToType, S); +} - // Handle multiple-vector values specially, emitting each subvector as an - // argument to the __builtin. - unsigned NumOfVec = 0; - if (proto[i] >= '2' && proto[i] <= '4') { - NumOfVec = proto[i] - '0'; - } else if (proto[i] >= 'B' && proto[i] <= 'D') { - NumOfVec = proto[i] - 'A' + 1; +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ + // See the documentation in arm_neon.td for a description of these operators. + class LowHalf : public SetTheory::Operator { + public: + virtual void anchor() {} + virtual ~LowHalf() {} + virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); + Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); } - - if (NumOfVec > 0) { - // Check if an explicit cast is needed. - if (argType != 'c' || argPoly || argUsgn) - args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; - - for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { - s += args + ".val[" + utostr(vi) + "]"; - if ((vi + 1) < ve) - s += ", "; + }; + class HighHalf : public SetTheory::Operator { + public: + virtual void anchor() {} + virtual ~HighHalf() {} + virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); + Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); + } + }; + class Rev : public SetTheory::Operator { + unsigned ElementSize; + + public: + Rev(unsigned ElementSize) : ElementSize(ElementSize) {} + virtual void anchor() {} + virtual ~Rev() {} + virtual void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); + + int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); + VectorSize /= ElementSize; + + std::vector<Record *> Revved; + for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { + for (int LI = VectorSize - 1; LI >= 0; --LI) { + Revved.push_back(Elts2[VI + LI]); + } } - if ((i + 1) < e) - s += ", "; - continue; + Elts.insert(Revved.begin(), Revved.end()); + } + }; + class MaskExpander : public SetTheory::Expander { + unsigned N; + + public: + MaskExpander(unsigned N) : N(N) {} + virtual void anchor() {} + virtual ~MaskExpander() {} + virtual void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) { + unsigned Addend = 0; + if (R->getName() == "mask0") + Addend = 0; + else if (R->getName() == "mask1") + Addend = N; + else + return; + for (unsigned I = 0; I < N; ++I) + Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); } + }; - if (splat && (i + 1) == e) - args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); + // (shuffle arg1, arg2, sequence) + std::pair<Type, std::string> Arg1 = + emitDagArg(DI->getArg(0), DI->getArgName(0)); + std::pair<Type, std::string> Arg2 = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + assert_with_loc(Arg1.first == Arg2.first, + "Different types in arguments to shuffle!"); + + SetTheory ST; + LowHalf LH; + HighHalf HH; + MaskExpander ME(Arg1.first.getNumElements()); + Rev R(Arg1.first.getElementSizeInBits()); + SetTheory::RecSet Elts; + ST.addOperator("lowhalf", &LH); + ST.addOperator("highhalf", &HH); + ST.addOperator("rev", &R); + ST.addExpander("MaskExpand", &ME); + ST.evaluate(DI->getArg(2), Elts, ArrayRef<SMLoc>()); + + std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; + for (auto &E : Elts) { + StringRef Name = E->getName(); + assert_with_loc(Name.startswith("sv"), + "Incorrect element kind in shuffle mask!"); + S += ", " + Name.drop_front(2).str(); + } + S += ")"; + + // Recalculate the return type - the shuffle may have halved or doubled it. + Type T(Arg1.first); + if (Elts.size() > T.getNumElements()) { + assert_with_loc( + Elts.size() == T.getNumElements() * 2, + "Can only double or half the number of elements in a shuffle!"); + T.doubleLanes(); + } else if (Elts.size() < T.getNumElements()) { + assert_with_loc( + Elts.size() == T.getNumElements() / 2, + "Can only double or half the number of elements in a shuffle!"); + T.halveLanes(); + } + + return std::make_pair(T, S); +} - // Check if an explicit cast is needed. - if ((splat || !argScalar) && - ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { - std::string argTypeStr = "c"; - if (ck != ClassB) - argTypeStr = argType; - if (argQuad) - argTypeStr = "Q" + argTypeStr; - args = "(" + TypeString('d', argTypeStr) + ")" + args; - } +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); + assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); - s += args; - if ((i + 1) < e) - s += ", "; + Type T = Intr.getBaseType(); + assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); + std::string S = "(" + T.str() + ") {"; + for (unsigned I = 0; I < T.getNumElements(); ++I) { + if (I != 0) + S += ", "; + S += A.second; } + S += "}"; - // Extra constant integer to hold type class enum for this function, e.g. s8 - if (ck == ClassB) - s += ", " + utostr(GetNeonEnum(proto, typestr)); + return std::make_pair(T, S); +} - s += ");"; +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); + std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); - if (proto[0] != 'v' && sret) { - if (define) - s += " r;"; - else - s += " return r;"; + assert_with_loc(B.first.isScalar(), + "splat() requires a scalar int as the second argument"); + + std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; + for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { + S += ", " + B.second; } - return s; + S += ")"; + + return std::make_pair(Intr.getBaseType(), S); } -static std::string GenBuiltinDef(const std::string &name, - const std::string &proto, - StringRef typestr, ClassKind ck) { - std::string s("BUILTIN(__builtin_neon_"); +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); - // If all types are the same size, bitcasting the args will take care - // of arg checking. The actual signedness etc. will be taken care of with - // special enums. - if (!ProtoHasScalar(proto)) - ck = ClassB; + assert_with_loc(!A.first.isVoid(), + "Argument to save_temp() must have non-void type!"); - s += MangleName(name, typestr, ck); - s += ", \""; + std::string N = DI->getArgName(0); + assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); - for (unsigned i = 0, e = proto.size(); i != e; ++i) - s += BuiltinTypeString(proto[i], typestr, ck, i == 0); + assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), + "Variable already defined!"); + Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); - // Extra constant integer to hold type class enum for this function, e.g. s8 - if (ck == ClassB) - s += "i"; + std::string S = + A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; - s += "\", \"n\")"; - return s; -} - -static std::string GenIntrinsic(const std::string &name, - const std::string &proto, - StringRef outTypeStr, StringRef inTypeStr, - OpKind kind, ClassKind classKind) { - assert(!proto.empty() && ""); - bool define = UseMacro(proto) && kind != OpUnavailable; - std::string s; - - // static always inline + return type - if (define) - s += "#define "; - else - s += "__ai " + TypeString(proto[0], outTypeStr) + " "; - - // Function name with type suffix - std::string mangledName = MangleName(name, outTypeStr, ClassS); - if (outTypeStr != inTypeStr) { - // If the input type is different (e.g., for vreinterpret), append a suffix - // for the input type. String off a "Q" (quad) prefix so that MangleName - // does not insert another "q" in the name. - unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); - StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); - mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); - } - s += mangledName; - - // Function arguments - s += GenArgs(proto, inTypeStr, name); - - // Definition. - if (define) { - s += " __extension__ ({ \\\n "; - s += GenMacroLocals(proto, inTypeStr, name); - } else if (kind == OpUnavailable) { - s += " __attribute__((unavailable));\n"; - return s; - } else - s += " {\n "; - - if (kind != OpNone) - s += GenOpString(name, kind, proto, outTypeStr); - else - s += GenBuiltin(name, proto, outTypeStr, classKind); - if (define) - s += " })"; - else - s += " }"; - s += "\n"; - return s; + return std::make_pair(Type::getVoid(), S); } -/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h -/// is comprised of type definitions and function declarations. -void NeonEmitter::run(raw_ostream &OS) { - OS << - "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" - "---===\n" - " *\n" - " * Permission is hereby granted, free of charge, to any person obtaining " - "a copy\n" - " * of this software and associated documentation files (the \"Software\")," - " to deal\n" - " * in the Software without restriction, including without limitation the " - "rights\n" - " * to use, copy, modify, merge, publish, distribute, sublicense, " - "and/or sell\n" - " * copies of the Software, and to permit persons to whom the Software is\n" - " * furnished to do so, subject to the following conditions:\n" - " *\n" - " * The above copyright notice and this permission notice shall be " - "included in\n" - " * all copies or substantial portions of the Software.\n" - " *\n" - " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " - "EXPRESS OR\n" - " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " - "MERCHANTABILITY,\n" - " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " - "SHALL THE\n" - " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " - "OTHER\n" - " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " - "ARISING FROM,\n" - " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " - "DEALINGS IN\n" - " * THE SOFTWARE.\n" - " *\n" - " *===--------------------------------------------------------------------" - "---===\n" - " */\n\n"; - - OS << "#ifndef __ARM_NEON_H\n"; - OS << "#define __ARM_NEON_H\n\n"; +std::pair<Type, std::string> +Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { + std::string S = Intr.Name; - OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n"; - OS << "#error \"NEON support not enabled\"\n"; - OS << "#endif\n\n"; + assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); + std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); - OS << "#include <stdint.h>\n\n"; + size_t Idx = S.find(ToReplace); - // Emit NEON-specific scalar typedefs. - OS << "typedef float float32_t;\n"; - OS << "typedef __fp16 float16_t;\n"; + assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); + S.replace(Idx, ToReplace.size(), ReplaceWith); - OS << "#ifdef __aarch64__\n"; - OS << "typedef double float64_t;\n"; - OS << "#endif\n\n"; + return std::make_pair(Type::getVoid(), S); +} - // For now, signedness of polynomial types depends on target - OS << "#ifdef __aarch64__\n"; - OS << "typedef uint8_t poly8_t;\n"; - OS << "typedef uint16_t poly16_t;\n"; - OS << "typedef uint64_t poly64_t;\n"; - OS << "#else\n"; - OS << "typedef int8_t poly8_t;\n"; - OS << "typedef int16_t poly16_t;\n"; - OS << "#endif\n"; +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ + std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); + return std::make_pair(Type::fromTypedefName(Ty), Value); +} - // Emit Neon vector typedefs. - std::string TypedefTypes( - "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); - SmallVector<StringRef, 24> TDTypeVec; - ParseTypes(0, TypedefTypes, TDTypeVec); +std::pair<Type, std::string> +Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { + if (ArgName.size()) { + assert_with_loc(!Arg->isComplete(), + "Arguments must either be DAGs or names, not both!"); + assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), + "Variable not defined!"); + Variable &V = Intr.Variables[ArgName]; + return std::make_pair(V.getType(), V.getName()); + } - // Emit vector typedefs. - bool isA64 = false; - bool preinsert; - bool postinsert; - for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { - bool dummy, quad = false, poly = false; - char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); - preinsert = false; - postinsert = false; - - if (type == 'd' || (type == 'l' && poly)) { - preinsert = isA64? false: true; - isA64 = true; - } else { - postinsert = isA64? true: false; - isA64 = false; - } - if (postinsert) - OS << "#endif\n"; - if (preinsert) - OS << "#ifdef __aarch64__\n"; + assert(Arg && "Neither ArgName nor Arg?!"); + DagInit *DI = dyn_cast<DagInit>(Arg); + assert_with_loc(DI, "Arguments must either be DAGs or names!"); - if (poly) - OS << "typedef __attribute__((neon_polyvector_type("; - else - OS << "typedef __attribute__((neon_vector_type("; + return emitDag(DI); +} - unsigned nElts = GetNumElements(TDTypeVec[i], quad); - OS << utostr(nElts) << "))) "; - if (nElts < 10) - OS << " "; +std::string Intrinsic::generate() { + // Little endian intrinsics are simple and don't require any argument + // swapping. + OS << "#ifdef __LITTLE_ENDIAN__\n"; - OS << TypeString('s', TDTypeVec[i]); - OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; + generateImpl(false, "", ""); - } - postinsert = isA64? true: false; - if (postinsert) - OS << "#endif\n"; - OS << "\n"; + OS << "#else\n"; - // Emit struct typedefs. - isA64 = false; - for (unsigned vi = 2; vi != 5; ++vi) { - for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { - bool dummy, quad = false, poly = false; - char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); - preinsert = false; - postinsert = false; - - if (type == 'd' || (type == 'l' && poly)) { - preinsert = isA64? false: true; - isA64 = true; - } else { - postinsert = isA64? true: false; - isA64 = false; - } - if (postinsert) - OS << "#endif\n"; - if (preinsert) - OS << "#ifdef __aarch64__\n"; + // Big endian intrinsics are more complex. The user intended these + // intrinsics to operate on a vector "as-if" loaded by (V)LDR, + // but we load as-if (V)LD1. So we should swap all arguments and + // swap the return value too. + // + // If we call sub-intrinsics, we should call a version that does + // not re-swap the arguments! + generateImpl(true, "", "__noswap_"); - std::string ts = TypeString('d', TDTypeVec[i]); - std::string vs = TypeString('0' + vi, TDTypeVec[i]); - OS << "typedef struct " << vs << " {\n"; - OS << " " << ts << " val"; - OS << "[" << utostr(vi) << "]"; - OS << ";\n} "; - OS << vs << ";\n"; - OS << "\n"; - } + // If we're needed early, create a non-swapping variant for + // big-endian. + if (NeededEarly) { + generateImpl(false, "__noswap_", "__noswap_"); } - postinsert = isA64? true: false; - if (postinsert) - OS << "#endif\n"; - OS << "\n"; - - OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; + OS << "#endif\n\n"; - std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); + return OS.str(); +} - StringMap<ClassKind> EmittedMap; +void Intrinsic::generateImpl(bool ReverseArguments, + StringRef NamePrefix, StringRef CallPrefix) { + CurrentRecord = R; - // Emit vmovl, vmull and vabd intrinsics first so they can be used by other - // intrinsics. (Some of the saturating multiply instructions are also - // used to implement the corresponding "_lane" variants, but tablegen - // sorts the records into alphabetical order so that the "_lane" variants - // come after the intrinsics they use.) - emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); + // If we call a macro, our local variables may be corrupted due to + // lack of proper lexical scoping. So, add a globally unique postfix + // to every variable. + // + // indexBody() should have set up the Dependencies set by now. + for (auto *I : Dependencies) + if (I->UseMacro) { + VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); + break; + } - // ARM intrinsics must be emitted before AArch64 intrinsics to ensure - // common intrinsics appear only once in the output stream. - // The check for uniquiness is done in emitIntrinsic. - // Emit ARM intrinsics. - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; + initVariables(); - // Skip AArch64 intrinsics; they will be emitted at the end. - bool isA64 = R->getValueAsBit("isA64"); - if (isA64) - continue; + emitPrototype(NamePrefix); - if (R->getName() != "VMOVL" && R->getName() != "VMULL" && - R->getName() != "VABD") - emitIntrinsic(OS, R, EmittedMap); + if (IsUnavailable) { + OS << " __attribute__((unavailable));"; + } else { + emitOpeningBrace(); + emitShadowedArgs(); + if (ReverseArguments) + emitArgumentReversal(); + emitBody(CallPrefix); + if (ReverseArguments) + emitReturnReversal(); + emitReturn(); + emitClosingBrace(); } + OS << "\n"; - // Emit AArch64-specific intrinsics. - OS << "#ifdef __aarch64__\n"; - - emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); - emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); + CurrentRecord = nullptr; +} - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; +void Intrinsic::indexBody() { + CurrentRecord = R; - // Skip ARM intrinsics already included above. - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64) - continue; + initVariables(); + emitBody(""); + OS.str(""); - // Skip crypto temporarily, and will emit them all together at the end. - bool isCrypto = R->getValueAsBit("isCrypto"); - if (isCrypto) - continue; - - emitIntrinsic(OS, R, EmittedMap); - } + CurrentRecord = nullptr; +} - OS << "#ifdef __ARM_FEATURE_CRYPTO\n"; +//===----------------------------------------------------------------------===// +// NeonEmitter implementation +//===----------------------------------------------------------------------===// - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; +Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { + // First, look up the name in the intrinsic map. + assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), + ("Intrinsic '" + Name + "' not found!").str()); + std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()]; + std::vector<Intrinsic *> GoodVec; + + // Create a string to print if we end up failing. + std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; + for (unsigned I = 0; I < Types.size(); ++I) { + if (I != 0) + ErrMsg += ", "; + ErrMsg += Types[I].str(); + } + ErrMsg += ")'\n"; + ErrMsg += "Available overloads:\n"; + + // Now, look through each intrinsic implementation and see if the types are + // compatible. + for (auto *I : V) { + ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName(); + ErrMsg += "("; + for (unsigned A = 0; A < I->getNumParams(); ++A) { + if (A != 0) + ErrMsg += ", "; + ErrMsg += I->getParamType(A).str(); + } + ErrMsg += ")\n"; - // Skip crypto temporarily, and will emit them all together at the end. - bool isCrypto = R->getValueAsBit("isCrypto"); - if (!isCrypto) + if (I->getNumParams() != Types.size()) continue; - emitIntrinsic(OS, R, EmittedMap); + bool Good = true; + for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { + if (I->getParamType(Arg) != Types[Arg]) { + Good = false; + break; + } + } + if (Good) + GoodVec.push_back(I); } - - OS << "#endif\n\n"; - OS << "#endif\n\n"; + assert_with_loc(GoodVec.size() > 0, + "No compatible intrinsic found - " + ErrMsg); + assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); - OS << "#undef __ai\n\n"; - OS << "#endif /* __ARM_NEON_H */\n"; + return GoodVec.front(); } -/// emitIntrinsic - Write out the arm_neon.h header file definitions for the -/// intrinsics specified by record R checking for intrinsic uniqueness. -void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, - StringMap<ClassKind> &EmittedMap) { - std::string name = R->getValueAsString("Name"); +void NeonEmitter::createIntrinsic(Record *R, + SmallVectorImpl<Intrinsic *> &Out) { + std::string Name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); std::string Types = R->getValueAsString("Types"); + Record *OperationRec = R->getValueAsDef("Operation"); + bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); + bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); + std::string Guard = R->getValueAsString("ArchGuard"); + bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); + + // Set the global current record. This allows assert_with_loc to produce + // decent location information even when highly nested. + CurrentRecord = R; - SmallVector<StringRef, 16> TypeVec; - ParseTypes(R, Types, TypeVec); + ListInit *Body = OperationRec->getValueAsListInit("Ops"); - OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; + std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); - ClassKind classKind = ClassNone; + ClassKind CK = ClassNone; if (R->getSuperClasses().size() >= 2) - classKind = ClassMap[R->getSuperClasses()[1]]; - if (classKind == ClassNone && kind == OpNone) - PrintFatalError(R->getLoc(), "Builtin has no class kind"); - - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - if (kind == OpReinterpret) { - bool outQuad = false; - bool dummy = false; - (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); - for (unsigned srcti = 0, srcte = TypeVec.size(); - srcti != srcte; ++srcti) { - bool inQuad = false; - (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); - if (srcti == ti || inQuad != outQuad) + CK = ClassMap[R->getSuperClasses()[1]]; + + std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; + for (auto TS : TypeSpecs) { + if (CartesianProductOfTypes) { + Type DefaultT(TS, 'd'); + for (auto SrcTS : TypeSpecs) { + Type DefaultSrcT(SrcTS, 'd'); + if (TS == SrcTS || + DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) continue; - std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], - OpCast, ClassS); - if (EmittedMap.count(s)) - continue; - EmittedMap[s] = ClassS; - OS << s; + NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); } } else { - std::string s = - GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); - if (EmittedMap.count(s)) - continue; - EmittedMap[s] = classKind; - OS << s; + NewTypeSpecs.push_back(std::make_pair(TS, TS)); } } - OS << "\n"; -} -static unsigned RangeFromType(const char mod, StringRef typestr) { - // base type to get the type string for. - bool quad = false, dummy = false; - char type = ClassifyType(typestr, quad, dummy, dummy); - type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); - - switch (type) { - case 'c': - return (8 << (int)quad) - 1; - case 'h': - case 's': - return (4 << (int)quad) - 1; - case 'f': - case 'i': - return (2 << (int)quad) - 1; - case 'd': - case 'l': - return (1 << (int)quad) - 1; - default: - PrintFatalError("unhandled type!"); - } -} + std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); + std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()); -static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { - // base type to get the type string for. - bool dummy = false; - char type = ClassifyType(typestr, dummy, dummy, dummy); - type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); + for (auto &I : NewTypeSpecs) { + Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body, + *this, Guard, IsUnavailable, BigEndianSafe); - switch (type) { - case 'c': - return 7; - case 'h': - case 's': - return 15; - case 'f': - case 'i': - return 31; - case 'd': - case 'l': - return 63; - default: - PrintFatalError("unhandled type!"); + IntrinsicMap[Name].push_back(IT); + Out.push_back(IT); } -} -/// Generate the ARM and AArch64 intrinsic range checking code for -/// shift/lane immediates, checking for unique declarations. -void -NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, - StringMap<ClassKind> &A64IntrinsicMap, - bool isA64RangeCheck) { - std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); - StringMap<OpKind> EmittedMap; + CurrentRecord = nullptr; +} - // Generate the intrinsic range checking code for shift/lane immediates. - if (isA64RangeCheck) - OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; - else - OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; +/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def +/// declaration of builtins, checking for unique builtin declarations. +void NeonEmitter::genBuiltinsDef(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_BUILTINS\n"; - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; + // We only want to emit a builtin once, and we want to emit them in + // alphabetical order, so use a std::set. + std::set<std::string> Builtins; - OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; - if (k != OpNone) + for (auto *Def : Defs) { + if (Def->hasBody()) continue; - - std::string name = R->getValueAsString("Name"); - std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); - std::string Rename = name + "@" + Proto; - // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. - if (Proto.find('a') != std::string::npos) + if (Def->hasSplat()) continue; - // Functions which do not have an immediate do not need to have range - // checking code emitted. - size_t immPos = Proto.find('i'); - if (immPos == std::string::npos) - continue; - - SmallVector<StringRef, 16> TypeVec; - ParseTypes(R, Types, TypeVec); + std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; - if (R->getSuperClasses().size() < 2) - PrintFatalError(R->getLoc(), "Builtin has no class kind"); - - ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - if (!ProtoHasScalar(Proto)) - ck = ClassB; - - // Do not include AArch64 range checks if not generating code for AArch64. - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64RangeCheck && isA64) - continue; + S += Def->getBuiltinTypeStr(); + S += "\", \"n\")"; - // Include ARM range checks in AArch64 but only if ARM intrinsics are not - // redefined by AArch64 to handle new types. - if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { - ClassKind &A64CK = A64IntrinsicMap[Rename]; - if (A64CK == ck && ck != ClassNone) - continue; - } - - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - std::string namestr, shiftstr, rangestr; - - if (R->getValueAsBit("isVCVT_N")) { - // VCVT between floating- and fixed-point values takes an immediate - // in the range [1, 32] for f32, or [1, 64] for f64. - ck = ClassB; - if (name.find("32") != std::string::npos) - rangestr = "l = 1; u = 31"; // upper bound = l + u - else if (name.find("64") != std::string::npos) - rangestr = "l = 1; u = 63"; - else - PrintFatalError(R->getLoc(), - "Fixed point convert name should contains \"32\" or \"64\""); - - } else if (R->getValueAsBit("isScalarShift")) { - // Right shifts have an 'r' in the name, left shifts do not. Convert - // instructions have the same bounds and right shifts. - if (name.find('r') != std::string::npos || - name.find("cvt") != std::string::npos) - rangestr = "l = 1; "; - - unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]); - // Narrow shift has half the upper bound - if (R->getValueAsBit("isScalarNarrowShift")) - upBound /= 2; - - rangestr += "u = " + utostr(upBound); - } else if (R->getValueAsBit("isShift")) { - // Builtins which are overloaded by type will need to have their upper - // bound computed at Sema time based on the type constant. - shiftstr = ", true"; - - // Right shifts have an 'r' in the name, left shifts do not. - if (name.find('r') != std::string::npos) - rangestr = "l = 1; "; - - rangestr += "u = RFT(TV" + shiftstr + ")"; - } else { - // The immediate generally refers to a lane in the preceding argument. - assert(immPos > 0 && "unexpected immediate operand"); - rangestr = - "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); - } - // Make sure cases appear only once by uniquing them in a string map. - namestr = MangleName(name, TypeVec[ti], ck); - if (EmittedMap.count(namestr)) - continue; - EmittedMap[namestr] = OpNone; - - // Calculate the index of the immediate that should be range checked. - unsigned immidx = 0; - - // Builtins that return a struct of multiple vectors have an extra - // leading arg for the struct return. - if (IsMultiVecProto(Proto[0])) - ++immidx; - - // Add one to the index for each argument until we reach the immediate - // to be checked. Structs of vectors are passed as multiple arguments. - for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { - switch (Proto[ii]) { - default: - immidx += 1; - break; - case '2': - case 'B': - immidx += 2; - break; - case '3': - case 'C': - immidx += 3; - break; - case '4': - case 'D': - immidx += 4; - break; - case 'i': - ie = ii + 1; - break; - } - } - if (isA64RangeCheck) - OS << "case AArch64::BI__builtin_neon_"; - else - OS << "case ARM::BI__builtin_neon_"; - OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " - << rangestr << "; break;\n"; - } + Builtins.insert(S); } + + for (auto &S : Builtins) + OS << S << "\n"; OS << "#endif\n\n"; } /// Generate the ARM and AArch64 overloaded type checking code for /// SemaChecking.cpp, checking for unique builtin declarations. -void -NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, - StringMap<ClassKind> &A64IntrinsicMap, - bool isA64TypeCheck) { - std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); - StringMap<OpKind> EmittedMap; - - // Generate the overloaded type checking code for SemaChecking.cpp - if (isA64TypeCheck) - OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; - else - OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; +void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + + // We record each overload check line before emitting because subsequent Inst + // definitions may extend the number of permitted types (i.e. augment the + // Mask). Use std::map to avoid sorting the table by hash number. + struct OverloadInfo { + uint64_t Mask; + int PtrArgNum; + bool HasConstPtr; + OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} + }; + std::map<std::string, OverloadInfo> OverloadMap; - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; - OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; - if (k != OpNone) + for (auto *Def : Defs) { + // If the def has a body (that is, it has Operation DAGs), it won't call + // __builtin_neon_* so we don't need to generate a definition for it. + if (Def->hasBody()) continue; - - std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); - std::string name = R->getValueAsString("Name"); - std::string Rename = name + "@" + Proto; - // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. - if (Proto.find('a') != std::string::npos) + if (Def->hasSplat()) continue; - // Functions which have a scalar argument cannot be overloaded, no need to // check them if we are emitting the type checking code. - if (ProtoHasScalar(Proto)) + if (Def->protoHasScalar()) continue; - SmallVector<StringRef, 16> TypeVec; - ParseTypes(R, Types, TypeVec); + uint64_t Mask = 0ULL; + Type Ty = Def->getReturnType(); + if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' || + Def->getProto()[0] == 'F') + Ty = Def->getParamType(0); + if (Ty.isPointer()) + Ty = Def->getParamType(1); - if (R->getSuperClasses().size() < 2) - PrintFatalError(R->getLoc(), "Builtin has no class kind"); + Mask |= 1ULL << Ty.getNeonEnum(); - // Do not include AArch64 type checks if not generating code for AArch64. - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64TypeCheck && isA64) - continue; - - // Include ARM type check in AArch64 but only if ARM intrinsics - // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr - // redefined in AArch64 to handle an additional 2 x f64 type. - ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { - ClassKind &A64CK = A64IntrinsicMap[Rename]; - if (A64CK == ck && ck != ClassNone) - continue; - } - - int si = -1, qi = -1; - uint64_t mask = 0, qmask = 0; - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - // Generate the switch case(s) for this builtin for the type validation. - bool quad = false, poly = false, usgn = false; - (void) ClassifyType(TypeVec[ti], quad, poly, usgn); - - if (quad) { - qi = ti; - qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); - } else { - si = ti; - mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); - } - } - - // Check if the builtin function has a pointer or const pointer argument. + // Check if the function has a pointer or const pointer argument. + std::string Proto = Def->getProto(); int PtrArgNum = -1; bool HasConstPtr = false; - for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { - char ArgType = Proto[arg]; + for (unsigned I = 0; I < Def->getNumParams(); ++I) { + char ArgType = Proto[I + 1]; if (ArgType == 'c') { HasConstPtr = true; - PtrArgNum = arg - 1; + PtrArgNum = I; break; } if (ArgType == 'p') { - PtrArgNum = arg - 1; + PtrArgNum = I; break; } } // For sret builtins, adjust the pointer argument index. - if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) + if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) PtrArgNum += 1; + std::string Name = Def->getName(); // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, // and vst1_lane intrinsics. Using a pointer to the vector element // type with one of those operations causes codegen to select an aligned // load/store instruction. If you want an unaligned operation, // the pointer argument needs to have less alignment than element type, // so just accept any pointer type. - if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { + if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { PtrArgNum = -1; HasConstPtr = false; } - if (mask) { - if (isA64TypeCheck) - OS << "case AArch64::BI__builtin_neon_"; - else - OS << "case ARM::BI__builtin_neon_"; - OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " - << "0x" << utohexstr(mask) << "ULL"; - if (PtrArgNum >= 0) - OS << "; PtrArgNum = " << PtrArgNum; - if (HasConstPtr) - OS << "; HasConstPtr = true"; - OS << "; break;\n"; - } - if (qmask) { - if (isA64TypeCheck) - OS << "case AArch64::BI__builtin_neon_"; - else - OS << "case ARM::BI__builtin_neon_"; - OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " - << "0x" << utohexstr(qmask) << "ULL"; - if (PtrArgNum >= 0) - OS << "; PtrArgNum = " << PtrArgNum; - if (HasConstPtr) - OS << "; HasConstPtr = true"; - OS << "; break;\n"; + if (Mask) { + std::string Name = Def->getMangledName(); + OverloadMap.insert(std::make_pair(Name, OverloadInfo())); + OverloadInfo &OI = OverloadMap[Name]; + OI.Mask |= Mask; + OI.PtrArgNum |= PtrArgNum; + OI.HasConstPtr = HasConstPtr; } } + + for (auto &I : OverloadMap) { + OverloadInfo &OI = I.second; + + OS << "case NEON::BI__builtin_neon_" << I.first << ": "; + OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; + if (OI.PtrArgNum >= 0) + OS << "; PtrArgNum = " << OI.PtrArgNum; + if (OI.HasConstPtr) + OS << "; HasConstPtr = true"; + OS << "; break;\n"; + } OS << "#endif\n\n"; } -/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def -/// declaration of builtins, checking for unique builtin declarations. -void NeonEmitter::genBuiltinsDef(raw_ostream &OS, - StringMap<ClassKind> &A64IntrinsicMap, - bool isA64GenBuiltinDef) { - std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); - StringMap<OpKind> EmittedMap; +void +NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; - // Generate BuiltinsARM.def and BuiltinsAArch64.def - if (isA64GenBuiltinDef) - OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; - else - OS << "#ifdef GET_NEON_BUILTINS\n"; + std::set<std::string> Emitted; - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; - OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; - if (k != OpNone) + for (auto *Def : Defs) { + if (Def->hasBody()) continue; - - std::string Proto = R->getValueAsString("Prototype"); - std::string name = R->getValueAsString("Name"); - std::string Rename = name + "@" + Proto; - // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. - if (Proto.find('a') != std::string::npos) + if (Def->hasSplat()) continue; - - std::string Types = R->getValueAsString("Types"); - SmallVector<StringRef, 16> TypeVec; - ParseTypes(R, Types, TypeVec); - - if (R->getSuperClasses().size() < 2) - PrintFatalError(R->getLoc(), "Builtin has no class kind"); - - ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - - // Do not include AArch64 BUILTIN() macros if not generating - // code for AArch64 - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64GenBuiltinDef && isA64) + // Functions which do not have an immediate do not need to have range + // checking code emitted. + if (!Def->hasImmediate()) + continue; + if (Emitted.find(Def->getMangledName()) != Emitted.end()) continue; - // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics - // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr - // redefined in AArch64 to handle an additional 2 x f64 type. - if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { - ClassKind &A64CK = A64IntrinsicMap[Rename]; - if (A64CK == ck && ck != ClassNone) - continue; + std::string LowerBound, UpperBound; + + Record *R = Def->getRecord(); + if (R->getValueAsBit("isVCVT_N")) { + // VCVT between floating- and fixed-point values takes an immediate + // in the range [1, 32) for f32 or [1, 64) for f64. + LowerBound = "1"; + if (Def->getBaseType().getElementSizeInBits() == 32) + UpperBound = "31"; + else + UpperBound = "63"; + } else if (R->getValueAsBit("isScalarShift")) { + // Right shifts have an 'r' in the name, left shifts do not. Convert + // instructions have the same bounds and right shifts. + if (Def->getName().find('r') != std::string::npos || + Def->getName().find("cvt") != std::string::npos) + LowerBound = "1"; + + UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); + } else if (R->getValueAsBit("isShift")) { + // Builtins which are overloaded by type will need to have their upper + // bound computed at Sema time based on the type constant. + + // Right shifts have an 'r' in the name, left shifts do not. + if (Def->getName().find('r') != std::string::npos) + LowerBound = "1"; + UpperBound = "RFT(TV, true)"; + } else if (Def->getClassKind(true) == ClassB) { + // ClassB intrinsics have a type (and hence lane number) that is only + // known at runtime. + if (R->getValueAsBit("isLaneQ")) + UpperBound = "RFT(TV, false, true)"; + else + UpperBound = "RFT(TV, false, false)"; + } else { + // The immediate generally refers to a lane in the preceding argument. + assert(Def->getImmediateIdx() > 0); + Type T = Def->getParamType(Def->getImmediateIdx() - 1); + UpperBound = utostr(T.getNumElements() - 1); } - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - // Generate the declaration for this builtin, ensuring - // that each unique BUILTIN() macro appears only once in the output - // stream. - std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); - if (EmittedMap.count(bd)) - continue; + // Calculate the index of the immediate that should be range checked. + unsigned Idx = Def->getNumParams(); + if (Def->hasImmediate()) + Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); - EmittedMap[bd] = OpNone; - OS << bd << "\n"; - } + OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " + << "i = " << Idx << ";"; + if (LowerBound.size()) + OS << " l = " << LowerBound << ";"; + if (UpperBound.size()) + OS << " u = " << UpperBound << ";"; + OS << " break;\n"; + + Emitted.insert(Def->getMangledName()); } + OS << "#endif\n\n"; } @@ -3192,222 +2167,220 @@ void NeonEmitter::genBuiltinsDef(raw_ostream &OS, void NeonEmitter::runHeader(raw_ostream &OS) { std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); - // build a map of AArch64 intriniscs to be used in uniqueness checks. - StringMap<ClassKind> A64IntrinsicMap; - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; + SmallVector<Intrinsic *, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); - bool isA64 = R->getValueAsBit("isA64"); - if (!isA64) - continue; + // Generate shared BuiltinsXXX.def + genBuiltinsDef(OS, Defs); - ClassKind CK = ClassNone; - if (R->getSuperClasses().size() >= 2) - CK = ClassMap[R->getSuperClasses()[1]]; + // Generate ARM overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, Defs); - std::string Name = R->getValueAsString("Name"); - std::string Proto = R->getValueAsString("Prototype"); - std::string Rename = Name + "@" + Proto; - if (A64IntrinsicMap.count(Rename)) - continue; - A64IntrinsicMap[Rename] = CK; - } + // Generate ARM range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, Defs); +} - // Generate BuiltinsARM.def for ARM - genBuiltinsDef(OS, A64IntrinsicMap, false); +/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h +/// is comprised of type definitions and function declarations. +void NeonEmitter::run(raw_ostream &OS) { + OS << "/*===---- arm_neon.h - ARM Neon intrinsics " + "------------------------------" + "---===\n" + " *\n" + " * Permission is hereby granted, free of charge, to any person " + "obtaining " + "a copy\n" + " * of this software and associated documentation files (the " + "\"Software\")," + " to deal\n" + " * in the Software without restriction, including without limitation " + "the " + "rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, " + "and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software " + "is\n" + " * furnished to do so, subject to the following conditions:\n" + " *\n" + " * The above copyright notice and this permission notice shall be " + "included in\n" + " * all copies or substantial portions of the Software.\n" + " *\n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " + "EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " + "MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " + "SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " + "OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " + "ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " + "DEALINGS IN\n" + " * THE SOFTWARE.\n" + " *\n" + " *===-----------------------------------------------------------------" + "---" + "---===\n" + " */\n\n"; - // Generate BuiltinsAArch64.def for AArch64 - genBuiltinsDef(OS, A64IntrinsicMap, true); + OS << "#ifndef __ARM_NEON_H\n"; + OS << "#define __ARM_NEON_H\n\n"; - // Generate ARM overloaded type checking code for SemaChecking.cpp - genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); + OS << "#if !defined(__ARM_NEON)\n"; + OS << "#error \"NEON support not enabled\"\n"; + OS << "#endif\n\n"; - // Generate AArch64 overloaded type checking code for SemaChecking.cpp - genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); + OS << "#include <stdint.h>\n\n"; - // Generate ARM range checking code for shift/lane immediates. - genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); + // Emit NEON-specific scalar typedefs. + OS << "typedef float float32_t;\n"; + OS << "typedef __fp16 float16_t;\n"; - // Generate the AArch64 range checking code for shift/lane immediates. - genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); -} + OS << "#ifdef __aarch64__\n"; + OS << "typedef double float64_t;\n"; + OS << "#endif\n\n"; -/// GenTest - Write out a test for the intrinsic specified by the name and -/// type strings, including the embedded patterns for FileCheck to match. -static std::string GenTest(const std::string &name, - const std::string &proto, - StringRef outTypeStr, StringRef inTypeStr, - bool isShift, bool isHiddenLOp, - ClassKind ck, const std::string &InstName, - bool isA64, - std::string & testFuncProto) { - assert(!proto.empty() && ""); - std::string s; - - // Function name with type suffix - std::string mangledName = MangleName(name, outTypeStr, ClassS); - if (outTypeStr != inTypeStr) { - // If the input type is different (e.g., for vreinterpret), append a suffix - // for the input type. String off a "Q" (quad) prefix so that MangleName - // does not insert another "q" in the name. - unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); - StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); - mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); - } - - // todo: GenerateChecksForIntrinsic does not generate CHECK - // for aarch64 instructions yet - std::vector<std::string> FileCheckPatterns; - if (!isA64) { - GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, - isHiddenLOp, FileCheckPatterns); - s+= "// CHECK_ARM: test_" + mangledName + "\n"; - } - s += "// CHECK_AARCH64: test_" + mangledName + "\n"; - - // Emit the FileCheck patterns. - // If for any reason we do not want to emit a check, mangledInst - // will be the empty string. - if (FileCheckPatterns.size()) { - for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), - e = FileCheckPatterns.end(); - i != e; - ++i) { - s += "// CHECK_ARM: " + *i + "\n"; + // For now, signedness of polynomial types depends on target + OS << "#ifdef __aarch64__\n"; + OS << "typedef uint8_t poly8_t;\n"; + OS << "typedef uint16_t poly16_t;\n"; + OS << "typedef uint64_t poly64_t;\n"; + OS << "typedef __uint128_t poly128_t;\n"; + OS << "#else\n"; + OS << "typedef int8_t poly8_t;\n"; + OS << "typedef int16_t poly16_t;\n"; + OS << "#endif\n"; + + // Emit Neon vector typedefs. + std::string TypedefTypes( + "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); + std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); + + // Emit vector typedefs. + bool InIfdef = false; + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, 'd'); + if (T.isDouble() || (T.isPoly() && T.isLong())) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; } + + if (T.isPoly()) + OS << "typedef __attribute__((neon_polyvector_type("; + else + OS << "typedef __attribute__((neon_vector_type("; + + Type T2 = T; + T2.makeScalar(); + OS << utostr(T.getNumElements()) << "))) "; + OS << T2.str(); + OS << " " << T.str() << ";\n"; } + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; - // Emit the start of the test function. + // Emit struct typedefs. + InIfdef = false; + for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, 'd'); + if (T.isDouble() || (T.isPoly() && T.isLong())) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } - testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; - char arg = 'a'; - std::string comma; - for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { - // Do not create arguments for values that must be immediate constants. - if (proto[i] == 'i') - continue; - testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; - testFuncProto.push_back(arg); - comma = ", "; - } - testFuncProto += ")"; - - s+= testFuncProto; - s+= " {\n "; - - if (proto[0] != 'v') - s += "return "; - s += mangledName + "("; - arg = 'a'; - for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { - if (proto[i] == 'i') { - // For immediate operands, test the maximum value. - if (isShift) - s += "1"; // FIXME - else - // The immediate generally refers to a lane in the preceding argument. - s += utostr(RangeFromType(proto[i-1], inTypeStr)); - } else { - s.push_back(arg); + char M = '2' + (NumMembers - 2); + Type VT(TS, M); + OS << "typedef struct " << VT.str() << " {\n"; + OS << " " << T.str() << " val"; + OS << "[" << utostr(NumMembers) << "]"; + OS << ";\n} "; + OS << VT.str() << ";\n"; + OS << "\n"; } - if ((i + 1) < e) - s += ", "; } - s += ");\n}\n\n"; - return s; -} + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; -/// Write out all intrinsic tests for the specified target, checking -/// for intrinsic test uniqueness. -void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, - bool isA64GenTest) { - if (isA64GenTest) - OS << "#ifdef __aarch64__\n"; + OS << "#define __ai static inline __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + SmallVector<Intrinsic *, 128> Defs; std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); - for (unsigned i = 0, e = RV.size(); i != e; ++i) { - Record *R = RV[i]; - std::string name = R->getValueAsString("Name"); - std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); - bool isShift = R->getValueAsBit("isShift"); - std::string InstName = R->getValueAsString("InstName"); - bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); - bool isA64 = R->getValueAsBit("isA64"); - - // do not include AArch64 intrinsic test if not generating - // code for AArch64 - if (!isA64GenTest && isA64) - continue; - - SmallVector<StringRef, 16> TypeVec; - ParseTypes(R, Types, TypeVec); + for (auto *R : RV) + createIntrinsic(R, Defs); + + for (auto *I : Defs) + I->indexBody(); + + std::stable_sort( + Defs.begin(), Defs.end(), + [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); + + // Only emit a def when its requirements have been met. + // FIXME: This loop could be made faster, but it's fast enough for now. + bool MadeProgress = true; + std::string InGuard = ""; + while (!Defs.empty() && MadeProgress) { + MadeProgress = false; + + for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); + I != Defs.end(); /*No step*/) { + bool DependenciesSatisfied = true; + for (auto *II : (*I)->getDependencies()) { + if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) + DependenciesSatisfied = false; + } + if (!DependenciesSatisfied) { + // Try the next one. + ++I; + continue; + } - ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; - if (kind == OpUnavailable) - continue; - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - if (kind == OpReinterpret) { - bool outQuad = false; - bool dummy = false; - (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); - for (unsigned srcti = 0, srcte = TypeVec.size(); - srcti != srcte; ++srcti) { - bool inQuad = false; - (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); - if (srcti == ti || inQuad != outQuad) - continue; - std::string testFuncProto; - std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], - isShift, isHiddenLOp, ck, InstName, isA64, - testFuncProto); - if (EmittedMap.count(testFuncProto)) - continue; - EmittedMap[testFuncProto] = kind; - OS << s << "\n"; - } - } else { - std::string testFuncProto; - std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, - isHiddenLOp, ck, InstName, isA64, testFuncProto); - if (EmittedMap.count(testFuncProto)) - continue; - EmittedMap[testFuncProto] = kind; - OS << s << "\n"; + // Emit #endif/#if pair if needed. + if ((*I)->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif\n"; + InGuard = (*I)->getGuard(); + if (!InGuard.empty()) + OS << "#if " << InGuard << "\n"; } + + // Actually generate the intrinsic code. + OS << (*I)->generate(); + + MadeProgress = true; + I = Defs.erase(I); } } + assert(Defs.empty() && "Some requirements were not satisfied!"); + if (!InGuard.empty()) + OS << "#endif\n"; - if (isA64GenTest) - OS << "#endif\n"; -} -/// runTests - Write out a complete set of tests for all of the Neon -/// intrinsics. -void NeonEmitter::runTests(raw_ostream &OS) { - OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " - "apcs-gnu\\\n" - "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" - "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" - "\n" - "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" - "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" - "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" - "\n" - "// REQUIRES: long_tests\n" - "\n" - "#include <arm_neon.h>\n" - "\n"; - - // ARM tests must be emitted before AArch64 tests to ensure - // tests for intrinsics that are common to ARM and AArch64 - // appear only once in the output stream. - // The check for uniqueness is done in genTargetTest. - StringMap<OpKind> EmittedMap; - - genTargetTest(OS, EmittedMap, false); - - genTargetTest(OS, EmittedMap, true); + OS << "\n"; + OS << "#undef __ai\n\n"; + OS << "#endif /* __ARM_NEON_H */\n"; } namespace clang { @@ -3418,6 +2391,6 @@ void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runHeader(OS); } void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { - NeonEmitter(Records).runTests(OS); + llvm_unreachable("Neon test generation no longer implemented!"); } } // End namespace clang |