diff options
Diffstat (limited to 'contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp | 2403 |
1 files changed, 2403 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp new file mode 100644 index 0000000..6e7bc90 --- /dev/null +++ b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp @@ -0,0 +1,2403 @@ +//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend is responsible for emitting arm_neon.h, which includes +// a declaration and definition of each function specified by the ARM NEON +// compiler interface. See ARM document DUI0348B. +// +// Each NEON instruction is implemented in terms of 1 or more functions which +// are suffixed with the element type of the input vectors. Functions may be +// implemented in terms of generic vector operations such as +, *, -, etc. or +// by calling a __builtin_-prefixed function which will be handled by clang's +// CodeGen library. +// +// Additional validation code can be generated by this file when runHeader() is +// called, rather than the normal run() entry point. +// +// See also the documentation in include/clang/Basic/arm_neon.td. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" +#include "llvm/TableGen/TableGenBackend.h" +#include <algorithm> +#include <deque> +#include <map> +#include <sstream> +#include <string> +#include <vector> +using namespace llvm; + +namespace { + +// While globals are generally bad, this one allows us to perform assertions +// liberally and somehow still trace them back to the def they indirectly +// came from. +static Record *CurrentRecord = nullptr; +static void assert_with_loc(bool Assertion, const std::string &Str) { + if (!Assertion) { + if (CurrentRecord) + PrintFatalError(CurrentRecord->getLoc(), Str); + else + PrintFatalError(Str); + } +} + +enum ClassKind { + ClassNone, + ClassI, // generic integer instruction, e.g., "i8" suffix + ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix + ClassW, // width-specific instruction, e.g., "8" suffix + ClassB, // bitcast arguments with enum argument to specify type + ClassL, // Logical instructions which are op instructions + // but we need to not emit any suffix for in our + // tests. + ClassNoTest // Instructions which we do not test since they are + // not TRUE instructions. +}; + +/// NeonTypeFlags - Flags to identify the types for overloaded Neon +/// builtins. These must be kept in sync with the flags in +/// include/clang/Basic/TargetBuiltins.h. +namespace NeonTypeFlags { +enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 }; + +enum EltType { + Int8, + Int16, + Int32, + Int64, + Poly8, + Poly16, + Poly64, + Poly128, + Float16, + Float32, + Float64 +}; +} + +class Intrinsic; +class NeonEmitter; +class Type; +class Variable; + +//===----------------------------------------------------------------------===// +// TypeSpec +//===----------------------------------------------------------------------===// + +/// A TypeSpec is just a simple wrapper around a string, but gets its own type +/// for strong typing purposes. +/// +/// A TypeSpec can be used to create a type. +class TypeSpec : public std::string { +public: + static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) { + std::vector<TypeSpec> Ret; + TypeSpec Acc; + for (char I : Str.str()) { + if (islower(I)) { + Acc.push_back(I); + Ret.push_back(TypeSpec(Acc)); + Acc.clear(); + } else { + Acc.push_back(I); + } + } + return Ret; + } +}; + +//===----------------------------------------------------------------------===// +// Type +//===----------------------------------------------------------------------===// + +/// A Type. Not much more to say here. +class Type { +private: + TypeSpec TS; + + bool Float, Signed, Immediate, Void, Poly, Constant, Pointer; + // ScalarForMangling and NoManglingQ are really not suited to live here as + // they are not related to the type. But they live in the TypeSpec (not the + // prototype), so this is really the only place to store them. + bool ScalarForMangling, NoManglingQ; + unsigned Bitwidth, ElementBitwidth, NumVectors; + +public: + Type() + : Float(false), Signed(false), Immediate(false), Void(true), Poly(false), + Constant(false), Pointer(false), ScalarForMangling(false), + NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} + + Type(TypeSpec TS, char CharMod) + : TS(TS), Float(false), Signed(false), Immediate(false), Void(false), + Poly(false), Constant(false), Pointer(false), ScalarForMangling(false), + NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { + applyModifier(CharMod); + } + + /// Returns a type representing "void". + static Type getVoid() { return Type(); } + + bool operator==(const Type &Other) const { return str() == Other.str(); } + bool operator!=(const Type &Other) const { return !operator==(Other); } + + // + // Query functions + // + bool isScalarForMangling() const { return ScalarForMangling; } + bool noManglingQ() const { return NoManglingQ; } + + bool isPointer() const { return Pointer; } + bool isFloating() const { return Float; } + bool isInteger() const { return !Float && !Poly; } + bool isSigned() const { return Signed; } + bool isImmediate() const { return Immediate; } + bool isScalar() const { return NumVectors == 0; } + bool isVector() const { return NumVectors > 0; } + bool isFloat() const { return Float && ElementBitwidth == 32; } + bool isDouble() const { return Float && ElementBitwidth == 64; } + bool isHalf() const { return Float && ElementBitwidth == 16; } + bool isPoly() const { return Poly; } + bool isChar() const { return ElementBitwidth == 8; } + bool isShort() const { return !Float && ElementBitwidth == 16; } + bool isInt() const { return !Float && ElementBitwidth == 32; } + bool isLong() const { return !Float && ElementBitwidth == 64; } + bool isVoid() const { return Void; } + unsigned getNumElements() const { return Bitwidth / ElementBitwidth; } + unsigned getSizeInBits() const { return Bitwidth; } + unsigned getElementSizeInBits() const { return ElementBitwidth; } + unsigned getNumVectors() const { return NumVectors; } + + // + // Mutator functions + // + void makeUnsigned() { Signed = false; } + void makeSigned() { Signed = true; } + void makeInteger(unsigned ElemWidth, bool Sign) { + Float = false; + Poly = false; + Signed = Sign; + Immediate = false; + ElementBitwidth = ElemWidth; + } + void makeImmediate(unsigned ElemWidth) { + Float = false; + Poly = false; + Signed = true; + Immediate = true; + ElementBitwidth = ElemWidth; + } + void makeScalar() { + Bitwidth = ElementBitwidth; + NumVectors = 0; + } + void makeOneVector() { + assert(isVector()); + NumVectors = 1; + } + void doubleLanes() { + assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!"); + Bitwidth = 128; + } + void halveLanes() { + assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!"); + Bitwidth = 64; + } + + /// Return the C string representation of a type, which is the typename + /// defined in stdint.h or arm_neon.h. + std::string str() const; + + /// Return the string representation of a type, which is an encoded + /// string for passing to the BUILTIN() macro in Builtins.def. + std::string builtin_str() const; + + /// Return the value in NeonTypeFlags for this type. + unsigned getNeonEnum() const; + + /// Parse a type from a stdint.h or arm_neon.h typedef name, + /// for example uint32x2_t or int64_t. + static Type fromTypedefName(StringRef Name); + +private: + /// Creates the type based on the typespec string in TS. + /// Sets "Quad" to true if the "Q" or "H" modifiers were + /// seen. This is needed by applyModifier as some modifiers + /// only take effect if the type size was changed by "Q" or "H". + void applyTypespec(bool &Quad); + /// Applies a prototype modifier to the type. + void applyModifier(char Mod); +}; + +//===----------------------------------------------------------------------===// +// Variable +//===----------------------------------------------------------------------===// + +/// A variable is a simple class that just has a type and a name. +class Variable { + Type T; + std::string N; + +public: + Variable() : T(Type::getVoid()), N("") {} + Variable(Type T, std::string N) : T(T), N(N) {} + + Type getType() const { return T; } + std::string getName() const { return "__" + N; } +}; + +//===----------------------------------------------------------------------===// +// Intrinsic +//===----------------------------------------------------------------------===// + +/// The main grunt class. This represents an instantiation of an intrinsic with +/// a particular typespec and prototype. +class Intrinsic { + friend class DagEmitter; + + /// The Record this intrinsic was created from. + Record *R; + /// The unmangled name and prototype. + std::string Name, Proto; + /// The input and output typespecs. InTS == OutTS except when + /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. + TypeSpec OutTS, InTS; + /// The base class kind. Most intrinsics use ClassS, which has full type + /// info for integers (s32/u32). Some use ClassI, which doesn't care about + /// signedness (i32), while some (ClassB) have no type at all, only a width + /// (32). + ClassKind CK; + /// The list of DAGs for the body. May be empty, in which case we should + /// emit a builtin call. + ListInit *Body; + /// The architectural #ifdef guard. + std::string Guard; + /// Set if the Unvailable bit is 1. This means we don't generate a body, + /// just an "unavailable" attribute on a declaration. + bool IsUnavailable; + /// Is this intrinsic safe for big-endian? or does it need its arguments + /// reversing? + bool BigEndianSafe; + + /// The types of return value [0] and parameters [1..]. + std::vector<Type> Types; + /// The local variables defined. + std::map<std::string, Variable> Variables; + /// NeededEarly - set if any other intrinsic depends on this intrinsic. + bool NeededEarly; + /// UseMacro - set if we should implement using a macro or unset for a + /// function. + bool UseMacro; + /// The set of intrinsics that this intrinsic uses/requires. + std::set<Intrinsic *> Dependencies; + /// The "base type", which is Type('d', OutTS). InBaseType is only + /// different if CartesianProductOfTypes = 1 (for vreinterpret). + Type BaseType, InBaseType; + /// The return variable. + Variable RetVar; + /// A postfix to apply to every variable. Defaults to "". + std::string VariablePostfix; + + NeonEmitter &Emitter; + std::stringstream OS; + +public: + Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, + TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, + StringRef Guard, bool IsUnavailable, bool BigEndianSafe) + : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), + CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), + BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), + BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { + // If this builtin takes an immediate argument, we need to #define it rather + // than use a standard declaration, so that SemaChecking can range check + // the immediate passed by the user. + if (Proto.find('i') != std::string::npos) + UseMacro = true; + + // Pointer arguments need to use macros to avoid hiding aligned attributes + // from the pointer type. + if (Proto.find('p') != std::string::npos || + Proto.find('c') != std::string::npos) + UseMacro = true; + + // It is not permitted to pass or return an __fp16 by value, so intrinsics + // taking a scalar float16_t must be implemented as macros. + if (OutTS.find('h') != std::string::npos && + Proto.find('s') != std::string::npos) + UseMacro = true; + + // Modify the TypeSpec per-argument to get a concrete Type, and create + // known variables for each. + // Types[0] is the return value. + Types.emplace_back(OutTS, Proto[0]); + for (unsigned I = 1; I < Proto.size(); ++I) + Types.emplace_back(InTS, Proto[I]); + } + + /// Get the Record that this intrinsic is based off. + Record *getRecord() const { return R; } + /// Get the set of Intrinsics that this intrinsic calls. + /// this is the set of immediate dependencies, NOT the + /// transitive closure. + const std::set<Intrinsic *> &getDependencies() const { return Dependencies; } + /// Get the architectural guard string (#ifdef). + std::string getGuard() const { return Guard; } + /// Get the non-mangled name. + std::string getName() const { return Name; } + + /// Return true if the intrinsic takes an immediate operand. + bool hasImmediate() const { + return Proto.find('i') != std::string::npos; + } + /// Return the parameter index of the immediate operand. + unsigned getImmediateIdx() const { + assert(hasImmediate()); + unsigned Idx = Proto.find('i'); + assert(Idx > 0 && "Can't return an immediate!"); + return Idx - 1; + } + + /// Return true if the intrinsic takes an splat operand. + bool hasSplat() const { return Proto.find('a') != std::string::npos; } + /// Return the parameter index of the splat operand. + unsigned getSplatIdx() const { + assert(hasSplat()); + unsigned Idx = Proto.find('a'); + assert(Idx > 0 && "Can't return a splat!"); + return Idx - 1; + } + + unsigned getNumParams() const { return Proto.size() - 1; } + Type getReturnType() const { return Types[0]; } + Type getParamType(unsigned I) const { return Types[I + 1]; } + Type getBaseType() const { return BaseType; } + /// Return the raw prototype string. + std::string getProto() const { return Proto; } + + /// Return true if the prototype has a scalar argument. + /// This does not return true for the "splat" code ('a'). + bool protoHasScalar() const; + + /// Return the index that parameter PIndex will sit at + /// in a generated function call. This is often just PIndex, + /// but may not be as things such as multiple-vector operands + /// and sret parameters need to be taken into accont. + unsigned getGeneratedParamIdx(unsigned PIndex) { + unsigned Idx = 0; + if (getReturnType().getNumVectors() > 1) + // Multiple vectors are passed as sret. + ++Idx; + + for (unsigned I = 0; I < PIndex; ++I) + Idx += std::max(1U, getParamType(I).getNumVectors()); + + return Idx; + } + + bool hasBody() const { return Body && Body->getValues().size() > 0; } + + void setNeededEarly() { NeededEarly = true; } + + bool operator<(const Intrinsic &Other) const { + // Sort lexicographically on a two-tuple (Guard, Name) + if (Guard != Other.Guard) + return Guard < Other.Guard; + return Name < Other.Name; + } + + ClassKind getClassKind(bool UseClassBIfScalar = false) { + if (UseClassBIfScalar && !protoHasScalar()) + return ClassB; + return CK; + } + + /// Return the name, mangled with type information. + /// If ForceClassS is true, use ClassS (u32/s32) instead + /// of the intrinsic's own type class. + std::string getMangledName(bool ForceClassS = false) const; + /// Return the type code for a builtin function call. + std::string getInstTypeCode(Type T, ClassKind CK) const; + /// Return the type string for a BUILTIN() macro in Builtins.def. + std::string getBuiltinTypeStr(); + + /// Generate the intrinsic, returning code. + std::string generate(); + /// Perform type checking and populate the dependency graph, but + /// don't generate code yet. + void indexBody(); + +private: + std::string mangleName(std::string Name, ClassKind CK) const; + + void initVariables(); + std::string replaceParamsIn(std::string S); + + void emitBodyAsBuiltinCall(); + + void generateImpl(bool ReverseArguments, + StringRef NamePrefix, StringRef CallPrefix); + void emitReturn(); + void emitBody(StringRef CallPrefix); + void emitShadowedArgs(); + void emitArgumentReversal(); + void emitReturnReversal(); + void emitReverseVariable(Variable &Dest, Variable &Src); + void emitNewLine(); + void emitClosingBrace(); + void emitOpeningBrace(); + void emitPrototype(StringRef NamePrefix); + + class DagEmitter { + Intrinsic &Intr; + StringRef CallPrefix; + + public: + DagEmitter(Intrinsic &Intr, StringRef CallPrefix) : + Intr(Intr), CallPrefix(CallPrefix) { + } + std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName); + std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI); + std::pair<Type, std::string> emitDagSplat(DagInit *DI); + std::pair<Type, std::string> emitDagDup(DagInit *DI); + std::pair<Type, std::string> emitDagShuffle(DagInit *DI); + std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast); + std::pair<Type, std::string> emitDagCall(DagInit *DI); + std::pair<Type, std::string> emitDagNameReplace(DagInit *DI); + std::pair<Type, std::string> emitDagLiteral(DagInit *DI); + std::pair<Type, std::string> emitDagOp(DagInit *DI); + std::pair<Type, std::string> emitDag(DagInit *DI); + }; + +}; + +//===----------------------------------------------------------------------===// +// NeonEmitter +//===----------------------------------------------------------------------===// + +class NeonEmitter { + RecordKeeper &Records; + DenseMap<Record *, ClassKind> ClassMap; + std::map<std::string, std::deque<Intrinsic>> IntrinsicMap; + unsigned UniqueNumber; + + void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out); + void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs); + void genOverloadTypeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs); + void genIntrinsicRangeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs); + +public: + /// Called by Intrinsic - this attempts to get an intrinsic that takes + /// the given types as arguments. + Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types); + + /// Called by Intrinsic - returns a globally-unique number. + unsigned getUniqueNumber() { return UniqueNumber++; } + + NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { + Record *SI = R.getClass("SInst"); + Record *II = R.getClass("IInst"); + Record *WI = R.getClass("WInst"); + Record *SOpI = R.getClass("SOpInst"); + Record *IOpI = R.getClass("IOpInst"); + Record *WOpI = R.getClass("WOpInst"); + Record *LOpI = R.getClass("LOpInst"); + Record *NoTestOpI = R.getClass("NoTestOpInst"); + + ClassMap[SI] = ClassS; + ClassMap[II] = ClassI; + ClassMap[WI] = ClassW; + ClassMap[SOpI] = ClassS; + ClassMap[IOpI] = ClassI; + ClassMap[WOpI] = ClassW; + ClassMap[LOpI] = ClassL; + ClassMap[NoTestOpI] = ClassNoTest; + } + + // run - Emit arm_neon.h.inc + void run(raw_ostream &o); + + // runHeader - Emit all the __builtin prototypes used in arm_neon.h + void runHeader(raw_ostream &o); + + // runTests - Emit tests for all the Neon intrinsics. + void runTests(raw_ostream &o); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Type implementation +//===----------------------------------------------------------------------===// + +std::string Type::str() const { + if (Void) + return "void"; + std::string S; + + if (!Signed && isInteger()) + S += "u"; + + if (Poly) + S += "poly"; + else if (Float) + S += "float"; + else + S += "int"; + + S += utostr(ElementBitwidth); + if (isVector()) + S += "x" + utostr(getNumElements()); + if (NumVectors > 1) + S += "x" + utostr(NumVectors); + S += "_t"; + + if (Constant) + S += " const"; + if (Pointer) + S += " *"; + + return S; +} + +std::string Type::builtin_str() const { + std::string S; + if (isVoid()) + return "v"; + + if (Pointer) + // All pointers are void pointers. + S += "v"; + else if (isInteger()) + switch (ElementBitwidth) { + case 8: S += "c"; break; + case 16: S += "s"; break; + case 32: S += "i"; break; + case 64: S += "Wi"; break; + case 128: S += "LLLi"; break; + default: llvm_unreachable("Unhandled case!"); + } + else + switch (ElementBitwidth) { + case 16: S += "h"; break; + case 32: S += "f"; break; + case 64: S += "d"; break; + default: llvm_unreachable("Unhandled case!"); + } + + if (isChar() && !Pointer) + // Make chars explicitly signed. + S = "S" + S; + else if (isInteger() && !Pointer && !Signed) + S = "U" + S; + + // Constant indices are "int", but have the "constant expression" modifier. + if (isImmediate()) { + assert(isInteger() && isSigned()); + S = "I" + S; + } + + if (isScalar()) { + if (Constant) S += "C"; + if (Pointer) S += "*"; + return S; + } + + std::string Ret; + for (unsigned I = 0; I < NumVectors; ++I) + Ret += "V" + utostr(getNumElements()) + S; + + return Ret; +} + +unsigned Type::getNeonEnum() const { + unsigned Addend; + switch (ElementBitwidth) { + case 8: Addend = 0; break; + case 16: Addend = 1; break; + case 32: Addend = 2; break; + case 64: Addend = 3; break; + case 128: Addend = 4; break; + default: llvm_unreachable("Unhandled element bitwidth!"); + } + + unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend; + if (Poly) { + // Adjustment needed because Poly32 doesn't exist. + if (Addend >= 2) + --Addend; + Base = (unsigned)NeonTypeFlags::Poly8 + Addend; + } + if (Float) { + assert(Addend != 0 && "Float8 doesn't exist!"); + Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1); + } + + if (Bitwidth == 128) + Base |= (unsigned)NeonTypeFlags::QuadFlag; + if (isInteger() && !Signed) + Base |= (unsigned)NeonTypeFlags::UnsignedFlag; + + return Base; +} + +Type Type::fromTypedefName(StringRef Name) { + Type T; + T.Void = false; + T.Float = false; + T.Poly = false; + + if (Name.front() == 'u') { + T.Signed = false; + Name = Name.drop_front(); + } else { + T.Signed = true; + } + + if (Name.startswith("float")) { + T.Float = true; + Name = Name.drop_front(5); + } else if (Name.startswith("poly")) { + T.Poly = true; + Name = Name.drop_front(4); + } else { + assert(Name.startswith("int")); + Name = Name.drop_front(3); + } + + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; + } + Name.substr(0, I).getAsInteger(10, T.ElementBitwidth); + Name = Name.drop_front(I); + + T.Bitwidth = T.ElementBitwidth; + T.NumVectors = 1; + + if (Name.front() == 'x') { + Name = Name.drop_front(); + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; + } + unsigned NumLanes; + Name.substr(0, I).getAsInteger(10, NumLanes); + Name = Name.drop_front(I); + T.Bitwidth = T.ElementBitwidth * NumLanes; + } else { + // Was scalar. + T.NumVectors = 0; + } + if (Name.front() == 'x') { + Name = Name.drop_front(); + unsigned I = 0; + for (I = 0; I < Name.size(); ++I) { + if (!isdigit(Name[I])) + break; + } + Name.substr(0, I).getAsInteger(10, T.NumVectors); + Name = Name.drop_front(I); + } + + assert(Name.startswith("_t") && "Malformed typedef!"); + return T; +} + +void Type::applyTypespec(bool &Quad) { + std::string S = TS; + ScalarForMangling = false; + Void = false; + Poly = Float = false; + ElementBitwidth = ~0U; + Signed = true; + NumVectors = 1; + + for (char I : S) { + switch (I) { + case 'S': + ScalarForMangling = true; + break; + case 'H': + NoManglingQ = true; + Quad = true; + break; + case 'Q': + Quad = true; + break; + case 'P': + Poly = true; + break; + case 'U': + Signed = false; + break; + case 'c': + ElementBitwidth = 8; + break; + case 'h': + Float = true; + // Fall through + case 's': + ElementBitwidth = 16; + break; + case 'f': + Float = true; + // Fall through + case 'i': + ElementBitwidth = 32; + break; + case 'd': + Float = true; + // Fall through + case 'l': + ElementBitwidth = 64; + break; + case 'k': + ElementBitwidth = 128; + // Poly doesn't have a 128x1 type. + if (Poly) + NumVectors = 0; + break; + default: + llvm_unreachable("Unhandled type code!"); + } + } + assert(ElementBitwidth != ~0U && "Bad element bitwidth!"); + + Bitwidth = Quad ? 128 : 64; +} + +void Type::applyModifier(char Mod) { + bool AppliedQuad = false; + applyTypespec(AppliedQuad); + + switch (Mod) { + case 'v': + Void = true; + break; + case 't': + if (Poly) { + Poly = false; + Signed = false; + } + break; + case 'b': + Signed = false; + Float = false; + Poly = false; + NumVectors = 0; + Bitwidth = ElementBitwidth; + break; + case '$': + Signed = true; + Float = false; + Poly = false; + NumVectors = 0; + Bitwidth = ElementBitwidth; + break; + case 'u': + Signed = false; + Poly = false; + Float = false; + break; + case 'x': + Signed = true; + assert(!Poly && "'u' can't be used with poly types!"); + Float = false; + break; + case 'o': + Bitwidth = ElementBitwidth = 64; + NumVectors = 0; + Float = true; + break; + case 'y': + Bitwidth = ElementBitwidth = 32; + NumVectors = 0; + Float = true; + break; + case 'f': + Float = true; + ElementBitwidth = 32; + break; + case 'F': + Float = true; + ElementBitwidth = 64; + break; + case 'g': + if (AppliedQuad) + Bitwidth /= 2; + break; + case 'j': + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'w': + ElementBitwidth *= 2; + Bitwidth *= 2; + break; + case 'n': + ElementBitwidth *= 2; + break; + case 'i': + Float = false; + Poly = false; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Signed = true; + Immediate = true; + break; + case 'l': + Float = false; + Poly = false; + ElementBitwidth = Bitwidth = 64; + NumVectors = 0; + Signed = false; + Immediate = true; + break; + case 'z': + ElementBitwidth /= 2; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'r': + ElementBitwidth *= 2; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 's': + case 'a': + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'k': + Bitwidth *= 2; + break; + case 'c': + Constant = true; + // Fall through + case 'p': + Pointer = true; + Bitwidth = ElementBitwidth; + NumVectors = 0; + break; + case 'h': + ElementBitwidth /= 2; + break; + case 'q': + ElementBitwidth /= 2; + Bitwidth *= 2; + break; + case 'e': + ElementBitwidth /= 2; + Signed = false; + break; + case 'm': + ElementBitwidth /= 2; + Bitwidth /= 2; + break; + case 'd': + break; + case '2': + NumVectors = 2; + break; + case '3': + NumVectors = 3; + break; + case '4': + NumVectors = 4; + break; + case 'B': + NumVectors = 2; + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'C': + NumVectors = 3; + if (!AppliedQuad) + Bitwidth *= 2; + break; + case 'D': + NumVectors = 4; + if (!AppliedQuad) + Bitwidth *= 2; + break; + default: + llvm_unreachable("Unhandled character!"); + } +} + +//===----------------------------------------------------------------------===// +// Intrinsic implementation +//===----------------------------------------------------------------------===// + +std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { + char typeCode = '\0'; + bool printNumber = true; + + if (CK == ClassB) + return ""; + + if (T.isPoly()) + typeCode = 'p'; + else if (T.isInteger()) + typeCode = T.isSigned() ? 's' : 'u'; + else + typeCode = 'f'; + + if (CK == ClassI) { + switch (typeCode) { + default: + break; + case 's': + case 'u': + case 'p': + typeCode = 'i'; + break; + } + } + if (CK == ClassB) { + typeCode = '\0'; + } + + std::string S; + if (typeCode != '\0') + S.push_back(typeCode); + if (printNumber) + S += utostr(T.getElementSizeInBits()); + + return S; +} + +static bool isFloatingPointProtoModifier(char Mod) { + return Mod == 'F' || Mod == 'f'; +} + +std::string Intrinsic::getBuiltinTypeStr() { + ClassKind LocalCK = getClassKind(true); + std::string S; + + Type RetT = getReturnType(); + if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && + !RetT.isFloating()) + RetT.makeInteger(RetT.getElementSizeInBits(), false); + + // Since the return value must be one type, return a vector type of the + // appropriate width which we will bitcast. An exception is made for + // returning structs of 2, 3, or 4 vectors which are returned in a sret-like + // fashion, storing them to a pointer arg. + if (RetT.getNumVectors() > 1) { + S += "vv*"; // void result with void* first argument + } else { + if (RetT.isPoly()) + RetT.makeInteger(RetT.getElementSizeInBits(), false); + if (!RetT.isScalar() && !RetT.isSigned()) + RetT.makeSigned(); + + bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]); + if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType) + // Cast to vector of 8-bit elements. + RetT.makeInteger(8, true); + + S += RetT.builtin_str(); + } + + for (unsigned I = 0; I < getNumParams(); ++I) { + Type T = getParamType(I); + if (T.isPoly()) + T.makeInteger(T.getElementSizeInBits(), false); + + bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]); + if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType) + T.makeInteger(8, true); + // Halves always get converted to 8-bit elements. + if (T.isHalf() && T.isVector() && !T.isScalarForMangling()) + T.makeInteger(8, true); + + if (LocalCK == ClassI) + T.makeSigned(); + + if (hasImmediate() && getImmediateIdx() == I) + T.makeImmediate(32); + + S += T.builtin_str(); + } + + // Extra constant integer to hold type class enum for this function, e.g. s8 + if (LocalCK == ClassB) + S += "i"; + + return S; +} + +std::string Intrinsic::getMangledName(bool ForceClassS) const { + // Check if the prototype has a scalar operand with the type of the vector + // elements. If not, bitcasting the args will take care of arg checking. + // The actual signedness etc. will be taken care of with special enums. + ClassKind LocalCK = CK; + if (!protoHasScalar()) + LocalCK = ClassB; + + return mangleName(Name, ForceClassS ? ClassS : LocalCK); +} + +std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const { + std::string typeCode = getInstTypeCode(BaseType, LocalCK); + std::string S = Name; + + if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" || + Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32") + return Name; + + if (typeCode.size() > 0) { + // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN. + if (Name.size() >= 3 && isdigit(Name.back()) && + Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_') + S.insert(S.length() - 3, "_" + typeCode); + else + S += "_" + typeCode; + } + + if (BaseType != InBaseType) { + // A reinterpret - out the input base type at the end. + S += "_" + getInstTypeCode(InBaseType, LocalCK); + } + + if (LocalCK == ClassB) + S += "_v"; + + // Insert a 'q' before the first '_' character so that it ends up before + // _lane or _n on vector-scalar operations. + if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) { + size_t Pos = S.find('_'); + S.insert(Pos, "q"); + } + + char Suffix = '\0'; + if (BaseType.isScalarForMangling()) { + switch (BaseType.getElementSizeInBits()) { + case 8: Suffix = 'b'; break; + case 16: Suffix = 'h'; break; + case 32: Suffix = 's'; break; + case 64: Suffix = 'd'; break; + default: llvm_unreachable("Bad suffix!"); + } + } + if (Suffix != '\0') { + size_t Pos = S.find('_'); + S.insert(Pos, &Suffix, 1); + } + + return S; +} + +std::string Intrinsic::replaceParamsIn(std::string S) { + while (S.find('$') != std::string::npos) { + size_t Pos = S.find('$'); + size_t End = Pos + 1; + while (isalpha(S[End])) + ++End; + + std::string VarName = S.substr(Pos + 1, End - Pos - 1); + assert_with_loc(Variables.find(VarName) != Variables.end(), + "Variable not defined!"); + S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName()); + } + + return S; +} + +void Intrinsic::initVariables() { + Variables.clear(); + + // Modify the TypeSpec per-argument to get a concrete Type, and create + // known variables for each. + for (unsigned I = 1; I < Proto.size(); ++I) { + char NameC = '0' + (I - 1); + std::string Name = "p"; + Name.push_back(NameC); + + Variables[Name] = Variable(Types[I], Name + VariablePostfix); + } + RetVar = Variable(Types[0], "ret" + VariablePostfix); +} + +void Intrinsic::emitPrototype(StringRef NamePrefix) { + if (UseMacro) + OS << "#define "; + else + OS << "__ai " << Types[0].str() << " "; + + OS << NamePrefix.str() << mangleName(Name, ClassS) << "("; + + for (unsigned I = 0; I < getNumParams(); ++I) { + if (I != 0) + OS << ", "; + + char NameC = '0' + I; + std::string Name = "p"; + Name.push_back(NameC); + assert(Variables.find(Name) != Variables.end()); + Variable &V = Variables[Name]; + + if (!UseMacro) + OS << V.getType().str() << " "; + OS << V.getName(); + } + + OS << ")"; +} + +void Intrinsic::emitOpeningBrace() { + if (UseMacro) + OS << " __extension__ ({"; + else + OS << " {"; + emitNewLine(); +} + +void Intrinsic::emitClosingBrace() { + if (UseMacro) + OS << "})"; + else + OS << "}"; +} + +void Intrinsic::emitNewLine() { + if (UseMacro) + OS << " \\\n"; + else + OS << "\n"; +} + +void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) { + if (Dest.getType().getNumVectors() > 1) { + emitNewLine(); + + for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) { + OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = " + << "__builtin_shufflevector(" + << Src.getName() << ".val[" << utostr(K) << "], " + << Src.getName() << ".val[" << utostr(K) << "]"; + for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) + OS << ", " << utostr(J); + OS << ");"; + emitNewLine(); + } + } else { + OS << " " << Dest.getName() + << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName(); + for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J) + OS << ", " << utostr(J); + OS << ");"; + emitNewLine(); + } +} + +void Intrinsic::emitArgumentReversal() { + if (BigEndianSafe) + return; + + // Reverse all vector arguments. + for (unsigned I = 0; I < getNumParams(); ++I) { + std::string Name = "p" + utostr(I); + std::string NewName = "rev" + utostr(I); + + Variable &V = Variables[Name]; + Variable NewV(V.getType(), NewName + VariablePostfix); + + if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1) + continue; + + OS << " " << NewV.getType().str() << " " << NewV.getName() << ";"; + emitReverseVariable(NewV, V); + V = NewV; + } +} + +void Intrinsic::emitReturnReversal() { + if (BigEndianSafe) + return; + if (!getReturnType().isVector() || getReturnType().isVoid() || + getReturnType().getNumElements() == 1) + return; + emitReverseVariable(RetVar, RetVar); +} + + +void Intrinsic::emitShadowedArgs() { + // Macro arguments are not type-checked like inline function arguments, + // so assign them to local temporaries to get the right type checking. + if (!UseMacro) + return; + + for (unsigned I = 0; I < getNumParams(); ++I) { + // Do not create a temporary for an immediate argument. + // That would defeat the whole point of using a macro! + if (hasImmediate() && Proto[I+1] == 'i') + continue; + // Do not create a temporary for pointer arguments. The input + // pointer may have an alignment hint. + if (getParamType(I).isPointer()) + continue; + + std::string Name = "p" + utostr(I); + + assert(Variables.find(Name) != Variables.end()); + Variable &V = Variables[Name]; + + std::string NewName = "s" + utostr(I); + Variable V2(V.getType(), NewName + VariablePostfix); + + OS << " " << V2.getType().str() << " " << V2.getName() << " = " + << V.getName() << ";"; + emitNewLine(); + + V = V2; + } +} + +// We don't check 'a' in this function, because for builtin function the +// argument matching to 'a' uses a vector type splatted from a scalar type. +bool Intrinsic::protoHasScalar() const { + return (Proto.find('s') != std::string::npos || + Proto.find('z') != std::string::npos || + Proto.find('r') != std::string::npos || + Proto.find('b') != std::string::npos || + Proto.find('$') != std::string::npos || + Proto.find('y') != std::string::npos || + Proto.find('o') != std::string::npos); +} + +void Intrinsic::emitBodyAsBuiltinCall() { + std::string S; + + // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit + // sret-like argument. + bool SRet = getReturnType().getNumVectors() >= 2; + + StringRef N = Name; + if (hasSplat()) { + // Call the non-splat builtin: chop off the "_n" suffix from the name. + assert(N.endswith("_n")); + N = N.drop_back(2); + } + + ClassKind LocalCK = CK; + if (!protoHasScalar()) + LocalCK = ClassB; + + if (!getReturnType().isVoid() && !SRet) + S += "(" + RetVar.getType().str() + ") "; + + S += "__builtin_neon_" + mangleName(N, LocalCK) + "("; + + if (SRet) + S += "&" + RetVar.getName() + ", "; + + for (unsigned I = 0; I < getNumParams(); ++I) { + Variable &V = Variables["p" + utostr(I)]; + Type T = V.getType(); + + // Handle multiple-vector values specially, emitting each subvector as an + // argument to the builtin. + if (T.getNumVectors() > 1) { + // Check if an explicit cast is needed. + std::string Cast; + if (T.isChar() || T.isPoly() || !T.isSigned()) { + Type T2 = T; + T2.makeOneVector(); + T2.makeInteger(8, /*Signed=*/true); + Cast = "(" + T2.str() + ")"; + } + + for (unsigned J = 0; J < T.getNumVectors(); ++J) + S += Cast + V.getName() + ".val[" + utostr(J) + "], "; + continue; + } + + std::string Arg; + Type CastToType = T; + if (hasSplat() && I == getSplatIdx()) { + Arg = "(" + BaseType.str() + ") {"; + for (unsigned J = 0; J < BaseType.getNumElements(); ++J) { + if (J != 0) + Arg += ", "; + Arg += V.getName(); + } + Arg += "}"; + + CastToType = BaseType; + } else { + Arg = V.getName(); + } + + // Check if an explicit cast is needed. + if (CastToType.isVector()) { + CastToType.makeInteger(8, true); + Arg = "(" + CastToType.str() + ")" + Arg; + } + + S += Arg + ", "; + } + + // Extra constant integer to hold type class enum for this function, e.g. s8 + if (getClassKind(true) == ClassB) { + Type ThisTy = getReturnType(); + if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) + ThisTy = getParamType(0); + if (ThisTy.isPointer()) + ThisTy = getParamType(1); + + S += utostr(ThisTy.getNeonEnum()); + } else { + // Remove extraneous ", ". + S.pop_back(); + S.pop_back(); + } + S += ");"; + + std::string RetExpr; + if (!SRet && !RetVar.getType().isVoid()) + RetExpr = RetVar.getName() + " = "; + + OS << " " << RetExpr << S; + emitNewLine(); +} + +void Intrinsic::emitBody(StringRef CallPrefix) { + std::vector<std::string> Lines; + + assert(RetVar.getType() == Types[0]); + // Create a return variable, if we're not void. + if (!RetVar.getType().isVoid()) { + OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; + emitNewLine(); + } + + if (!Body || Body->getValues().size() == 0) { + // Nothing specific to output - must output a builtin. + emitBodyAsBuiltinCall(); + return; + } + + // We have a list of "things to output". The last should be returned. + for (auto *I : Body->getValues()) { + if (StringInit *SI = dyn_cast<StringInit>(I)) { + Lines.push_back(replaceParamsIn(SI->getAsString())); + } else if (DagInit *DI = dyn_cast<DagInit>(I)) { + DagEmitter DE(*this, CallPrefix); + Lines.push_back(DE.emitDag(DI).second + ";"); + } + } + + assert(!Lines.empty() && "Empty def?"); + if (!RetVar.getType().isVoid()) + Lines.back().insert(0, RetVar.getName() + " = "); + + for (auto &L : Lines) { + OS << " " << L; + emitNewLine(); + } +} + +void Intrinsic::emitReturn() { + if (RetVar.getType().isVoid()) + return; + if (UseMacro) + OS << " " << RetVar.getName() << ";"; + else + OS << " return " << RetVar.getName() << ";"; + emitNewLine(); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) { + // At this point we should only be seeing a def. + DefInit *DefI = cast<DefInit>(DI->getOperator()); + std::string Op = DefI->getAsString(); + + if (Op == "cast" || Op == "bitcast") + return emitDagCast(DI, Op == "bitcast"); + if (Op == "shuffle") + return emitDagShuffle(DI); + if (Op == "dup") + return emitDagDup(DI); + if (Op == "splat") + return emitDagSplat(DI); + if (Op == "save_temp") + return emitDagSaveTemp(DI); + if (Op == "op") + return emitDagOp(DI); + if (Op == "call") + return emitDagCall(DI); + if (Op == "name_replace") + return emitDagNameReplace(DI); + if (Op == "literal") + return emitDagLiteral(DI); + assert_with_loc(false, "Unknown operation!"); + return std::make_pair(Type::getVoid(), ""); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) { + std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + if (DI->getNumArgs() == 2) { + // Unary op. + std::pair<Type, std::string> R = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + return std::make_pair(R.first, Op + R.second); + } else { + assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!"); + std::pair<Type, std::string> R1 = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + std::pair<Type, std::string> R2 = + emitDagArg(DI->getArg(2), DI->getArgName(2)); + assert_with_loc(R1.first == R2.first, "Argument type mismatch!"); + return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second); + } +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) { + std::vector<Type> Types; + std::vector<std::string> Values; + for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { + std::pair<Type, std::string> R = + emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1)); + Types.push_back(R.first); + Values.push_back(R.second); + } + + // Look up the called intrinsic. + std::string N; + if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) + N = SI->getAsUnquotedString(); + else + N = emitDagArg(DI->getArg(0), "").second; + Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types); + + // Make sure the callee is known as an early def. + Callee.setNeededEarly(); + Intr.Dependencies.insert(&Callee); + + // Now create the call itself. + std::string S = CallPrefix.str() + Callee.getMangledName(true) + "("; + for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) { + if (I != 0) + S += ", "; + S += Values[I]; + } + S += ")"; + + return std::make_pair(Callee.getReturnType(), S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI, + bool IsBitCast){ + // (cast MOD* VAL) -> cast VAL to type given by MOD. + std::pair<Type, std::string> R = emitDagArg( + DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1)); + Type castToType = R.first; + for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) { + + // MOD can take several forms: + // 1. $X - take the type of parameter / variable X. + // 2. The value "R" - take the type of the return type. + // 3. a type string + // 4. The value "U" or "S" to switch the signedness. + // 5. The value "H" or "D" to half or double the bitwidth. + // 6. The value "8" to convert to 8-bit (signed) integer lanes. + if (DI->getArgName(ArgIdx).size()) { + assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) != + Intr.Variables.end(), + "Variable not found"); + castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType(); + } else { + StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx)); + assert_with_loc(SI, "Expected string type or $Name for cast type"); + + if (SI->getAsUnquotedString() == "R") { + castToType = Intr.getReturnType(); + } else if (SI->getAsUnquotedString() == "U") { + castToType.makeUnsigned(); + } else if (SI->getAsUnquotedString() == "S") { + castToType.makeSigned(); + } else if (SI->getAsUnquotedString() == "H") { + castToType.halveLanes(); + } else if (SI->getAsUnquotedString() == "D") { + castToType.doubleLanes(); + } else if (SI->getAsUnquotedString() == "8") { + castToType.makeInteger(8, true); + } else { + castToType = Type::fromTypedefName(SI->getAsUnquotedString()); + assert_with_loc(!castToType.isVoid(), "Unknown typedef"); + } + } + } + + std::string S; + if (IsBitCast) { + // Emit a reinterpret cast. The second operand must be an lvalue, so create + // a temporary. + std::string N = "reint"; + unsigned I = 0; + while (Intr.Variables.find(N) != Intr.Variables.end()) + N = "reint" + utostr(++I); + Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix); + + Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = " + << R.second << ";"; + Intr.emitNewLine(); + + S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + ""; + } else { + // Emit a normal (static) cast. + S = "(" + castToType.str() + ")(" + R.second + ")"; + } + + return std::make_pair(castToType, S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){ + // See the documentation in arm_neon.td for a description of these operators. + class LowHalf : public SetTheory::Operator { + public: + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); + Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2)); + } + }; + class HighHalf : public SetTheory::Operator { + public: + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc); + Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end()); + } + }; + class Rev : public SetTheory::Operator { + unsigned ElementSize; + + public: + Rev(unsigned ElementSize) : ElementSize(ElementSize) {} + void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + SetTheory::RecSet Elts2; + ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc); + + int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue(); + VectorSize /= ElementSize; + + std::vector<Record *> Revved; + for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) { + for (int LI = VectorSize - 1; LI >= 0; --LI) { + Revved.push_back(Elts2[VI + LI]); + } + } + + Elts.insert(Revved.begin(), Revved.end()); + } + }; + class MaskExpander : public SetTheory::Expander { + unsigned N; + + public: + MaskExpander(unsigned N) : N(N) {} + void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override { + unsigned Addend = 0; + if (R->getName() == "mask0") + Addend = 0; + else if (R->getName() == "mask1") + Addend = N; + else + return; + for (unsigned I = 0; I < N; ++I) + Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend))); + } + }; + + // (shuffle arg1, arg2, sequence) + std::pair<Type, std::string> Arg1 = + emitDagArg(DI->getArg(0), DI->getArgName(0)); + std::pair<Type, std::string> Arg2 = + emitDagArg(DI->getArg(1), DI->getArgName(1)); + assert_with_loc(Arg1.first == Arg2.first, + "Different types in arguments to shuffle!"); + + SetTheory ST; + SetTheory::RecSet Elts; + ST.addOperator("lowhalf", llvm::make_unique<LowHalf>()); + ST.addOperator("highhalf", llvm::make_unique<HighHalf>()); + ST.addOperator("rev", + llvm::make_unique<Rev>(Arg1.first.getElementSizeInBits())); + ST.addExpander("MaskExpand", + llvm::make_unique<MaskExpander>(Arg1.first.getNumElements())); + ST.evaluate(DI->getArg(2), Elts, None); + + std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second; + for (auto &E : Elts) { + StringRef Name = E->getName(); + assert_with_loc(Name.startswith("sv"), + "Incorrect element kind in shuffle mask!"); + S += ", " + Name.drop_front(2).str(); + } + S += ")"; + + // Recalculate the return type - the shuffle may have halved or doubled it. + Type T(Arg1.first); + if (Elts.size() > T.getNumElements()) { + assert_with_loc( + Elts.size() == T.getNumElements() * 2, + "Can only double or half the number of elements in a shuffle!"); + T.doubleLanes(); + } else if (Elts.size() < T.getNumElements()) { + assert_with_loc( + Elts.size() == T.getNumElements() / 2, + "Can only double or half the number of elements in a shuffle!"); + T.halveLanes(); + } + + return std::make_pair(T, S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); + assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument"); + + Type T = Intr.getBaseType(); + assert_with_loc(T.isVector(), "dup() used but default type is scalar!"); + std::string S = "(" + T.str() + ") {"; + for (unsigned I = 0; I < T.getNumElements(); ++I) { + if (I != 0) + S += ", "; + S += A.second; + } + S += "}"; + + return std::make_pair(T, S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0)); + std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1)); + + assert_with_loc(B.first.isScalar(), + "splat() requires a scalar int as the second argument"); + + std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second; + for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) { + S += ", " + B.second; + } + S += ")"; + + return std::make_pair(Intr.getBaseType(), S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) { + assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments"); + std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1)); + + assert_with_loc(!A.first.isVoid(), + "Argument to save_temp() must have non-void type!"); + + std::string N = DI->getArgName(0); + assert_with_loc(N.size(), "save_temp() expects a name as the first argument"); + + assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(), + "Variable already defined!"); + Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix); + + std::string S = + A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second; + + return std::make_pair(Type::getVoid(), S); +} + +std::pair<Type, std::string> +Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) { + std::string S = Intr.Name; + + assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!"); + std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); + + size_t Idx = S.find(ToReplace); + + assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!"); + S.replace(Idx, ToReplace.size(), ReplaceWith); + + return std::make_pair(Type::getVoid(), S); +} + +std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){ + std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString(); + std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString(); + return std::make_pair(Type::fromTypedefName(Ty), Value); +} + +std::pair<Type, std::string> +Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) { + if (ArgName.size()) { + assert_with_loc(!Arg->isComplete(), + "Arguments must either be DAGs or names, not both!"); + assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(), + "Variable not defined!"); + Variable &V = Intr.Variables[ArgName]; + return std::make_pair(V.getType(), V.getName()); + } + + assert(Arg && "Neither ArgName nor Arg?!"); + DagInit *DI = dyn_cast<DagInit>(Arg); + assert_with_loc(DI, "Arguments must either be DAGs or names!"); + + return emitDag(DI); +} + +std::string Intrinsic::generate() { + // Little endian intrinsics are simple and don't require any argument + // swapping. + OS << "#ifdef __LITTLE_ENDIAN__\n"; + + generateImpl(false, "", ""); + + OS << "#else\n"; + + // Big endian intrinsics are more complex. The user intended these + // intrinsics to operate on a vector "as-if" loaded by (V)LDR, + // but we load as-if (V)LD1. So we should swap all arguments and + // swap the return value too. + // + // If we call sub-intrinsics, we should call a version that does + // not re-swap the arguments! + generateImpl(true, "", "__noswap_"); + + // If we're needed early, create a non-swapping variant for + // big-endian. + if (NeededEarly) { + generateImpl(false, "__noswap_", "__noswap_"); + } + OS << "#endif\n\n"; + + return OS.str(); +} + +void Intrinsic::generateImpl(bool ReverseArguments, + StringRef NamePrefix, StringRef CallPrefix) { + CurrentRecord = R; + + // If we call a macro, our local variables may be corrupted due to + // lack of proper lexical scoping. So, add a globally unique postfix + // to every variable. + // + // indexBody() should have set up the Dependencies set by now. + for (auto *I : Dependencies) + if (I->UseMacro) { + VariablePostfix = "_" + utostr(Emitter.getUniqueNumber()); + break; + } + + initVariables(); + + emitPrototype(NamePrefix); + + if (IsUnavailable) { + OS << " __attribute__((unavailable));"; + } else { + emitOpeningBrace(); + emitShadowedArgs(); + if (ReverseArguments) + emitArgumentReversal(); + emitBody(CallPrefix); + if (ReverseArguments) + emitReturnReversal(); + emitReturn(); + emitClosingBrace(); + } + OS << "\n"; + + CurrentRecord = nullptr; +} + +void Intrinsic::indexBody() { + CurrentRecord = R; + + initVariables(); + emitBody(""); + OS.str(""); + + CurrentRecord = nullptr; +} + +//===----------------------------------------------------------------------===// +// NeonEmitter implementation +//===----------------------------------------------------------------------===// + +Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) { + // First, look up the name in the intrinsic map. + assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(), + ("Intrinsic '" + Name + "' not found!").str()); + auto &V = IntrinsicMap.find(Name.str())->second; + std::vector<Intrinsic *> GoodVec; + + // Create a string to print if we end up failing. + std::string ErrMsg = "looking up intrinsic '" + Name.str() + "("; + for (unsigned I = 0; I < Types.size(); ++I) { + if (I != 0) + ErrMsg += ", "; + ErrMsg += Types[I].str(); + } + ErrMsg += ")'\n"; + ErrMsg += "Available overloads:\n"; + + // Now, look through each intrinsic implementation and see if the types are + // compatible. + for (auto &I : V) { + ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName(); + ErrMsg += "("; + for (unsigned A = 0; A < I.getNumParams(); ++A) { + if (A != 0) + ErrMsg += ", "; + ErrMsg += I.getParamType(A).str(); + } + ErrMsg += ")\n"; + + if (I.getNumParams() != Types.size()) + continue; + + bool Good = true; + for (unsigned Arg = 0; Arg < Types.size(); ++Arg) { + if (I.getParamType(Arg) != Types[Arg]) { + Good = false; + break; + } + } + if (Good) + GoodVec.push_back(&I); + } + + assert_with_loc(GoodVec.size() > 0, + "No compatible intrinsic found - " + ErrMsg); + assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg); + + return *GoodVec.front(); +} + +void NeonEmitter::createIntrinsic(Record *R, + SmallVectorImpl<Intrinsic *> &Out) { + std::string Name = R->getValueAsString("Name"); + std::string Proto = R->getValueAsString("Prototype"); + std::string Types = R->getValueAsString("Types"); + Record *OperationRec = R->getValueAsDef("Operation"); + bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes"); + bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); + std::string Guard = R->getValueAsString("ArchGuard"); + bool IsUnavailable = OperationRec->getValueAsBit("Unavailable"); + + // Set the global current record. This allows assert_with_loc to produce + // decent location information even when highly nested. + CurrentRecord = R; + + ListInit *Body = OperationRec->getValueAsListInit("Ops"); + + std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types); + + ClassKind CK = ClassNone; + if (R->getSuperClasses().size() >= 2) + CK = ClassMap[R->getSuperClasses()[1]]; + + std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs; + for (auto TS : TypeSpecs) { + if (CartesianProductOfTypes) { + Type DefaultT(TS, 'd'); + for (auto SrcTS : TypeSpecs) { + Type DefaultSrcT(SrcTS, 'd'); + if (TS == SrcTS || + DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) + continue; + NewTypeSpecs.push_back(std::make_pair(TS, SrcTS)); + } + } else { + NewTypeSpecs.push_back(std::make_pair(TS, TS)); + } + } + + std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end()); + NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()), + NewTypeSpecs.end()); + auto &Entry = IntrinsicMap[Name]; + + for (auto &I : NewTypeSpecs) { + Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, + Guard, IsUnavailable, BigEndianSafe); + Out.push_back(&Entry.back()); + } + + CurrentRecord = nullptr; +} + +/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def +/// declaration of builtins, checking for unique builtin declarations. +void NeonEmitter::genBuiltinsDef(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_BUILTINS\n"; + + // We only want to emit a builtin once, and we want to emit them in + // alphabetical order, so use a std::set. + std::set<std::string> Builtins; + + for (auto *Def : Defs) { + if (Def->hasBody()) + continue; + // Functions with 'a' (the splat code) in the type prototype should not get + // their own builtin as they use the non-splat variant. + if (Def->hasSplat()) + continue; + + std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \""; + + S += Def->getBuiltinTypeStr(); + S += "\", \"n\")"; + + Builtins.insert(S); + } + + for (auto &S : Builtins) + OS << S << "\n"; + OS << "#endif\n\n"; +} + +/// Generate the ARM and AArch64 overloaded type checking code for +/// SemaChecking.cpp, checking for unique builtin declarations. +void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + + // We record each overload check line before emitting because subsequent Inst + // definitions may extend the number of permitted types (i.e. augment the + // Mask). Use std::map to avoid sorting the table by hash number. + struct OverloadInfo { + uint64_t Mask; + int PtrArgNum; + bool HasConstPtr; + OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {} + }; + std::map<std::string, OverloadInfo> OverloadMap; + + for (auto *Def : Defs) { + // If the def has a body (that is, it has Operation DAGs), it won't call + // __builtin_neon_* so we don't need to generate a definition for it. + if (Def->hasBody()) + continue; + // Functions with 'a' (the splat code) in the type prototype should not get + // their own builtin as they use the non-splat variant. + if (Def->hasSplat()) + continue; + // Functions which have a scalar argument cannot be overloaded, no need to + // check them if we are emitting the type checking code. + if (Def->protoHasScalar()) + continue; + + uint64_t Mask = 0ULL; + Type Ty = Def->getReturnType(); + if (Def->getProto()[0] == 'v' || + isFloatingPointProtoModifier(Def->getProto()[0])) + Ty = Def->getParamType(0); + if (Ty.isPointer()) + Ty = Def->getParamType(1); + + Mask |= 1ULL << Ty.getNeonEnum(); + + // Check if the function has a pointer or const pointer argument. + std::string Proto = Def->getProto(); + int PtrArgNum = -1; + bool HasConstPtr = false; + for (unsigned I = 0; I < Def->getNumParams(); ++I) { + char ArgType = Proto[I + 1]; + if (ArgType == 'c') { + HasConstPtr = true; + PtrArgNum = I; + break; + } + if (ArgType == 'p') { + PtrArgNum = I; + break; + } + } + // For sret builtins, adjust the pointer argument index. + if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) + PtrArgNum += 1; + + std::string Name = Def->getName(); + // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, + // and vst1_lane intrinsics. Using a pointer to the vector element + // type with one of those operations causes codegen to select an aligned + // load/store instruction. If you want an unaligned operation, + // the pointer argument needs to have less alignment than element type, + // so just accept any pointer type. + if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") { + PtrArgNum = -1; + HasConstPtr = false; + } + + if (Mask) { + std::string Name = Def->getMangledName(); + OverloadMap.insert(std::make_pair(Name, OverloadInfo())); + OverloadInfo &OI = OverloadMap[Name]; + OI.Mask |= Mask; + OI.PtrArgNum |= PtrArgNum; + OI.HasConstPtr = HasConstPtr; + } + } + + for (auto &I : OverloadMap) { + OverloadInfo &OI = I.second; + + OS << "case NEON::BI__builtin_neon_" << I.first << ": "; + OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; + if (OI.PtrArgNum >= 0) + OS << "; PtrArgNum = " << OI.PtrArgNum; + if (OI.HasConstPtr) + OS << "; HasConstPtr = true"; + OS << "; break;\n"; + } + OS << "#endif\n\n"; +} + +void +NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, + SmallVectorImpl<Intrinsic *> &Defs) { + OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; + + std::set<std::string> Emitted; + + for (auto *Def : Defs) { + if (Def->hasBody()) + continue; + // Functions with 'a' (the splat code) in the type prototype should not get + // their own builtin as they use the non-splat variant. + if (Def->hasSplat()) + continue; + // Functions which do not have an immediate do not need to have range + // checking code emitted. + if (!Def->hasImmediate()) + continue; + if (Emitted.find(Def->getMangledName()) != Emitted.end()) + continue; + + std::string LowerBound, UpperBound; + + Record *R = Def->getRecord(); + if (R->getValueAsBit("isVCVT_N")) { + // VCVT between floating- and fixed-point values takes an immediate + // in the range [1, 32) for f32 or [1, 64) for f64. + LowerBound = "1"; + if (Def->getBaseType().getElementSizeInBits() == 32) + UpperBound = "31"; + else + UpperBound = "63"; + } else if (R->getValueAsBit("isScalarShift")) { + // Right shifts have an 'r' in the name, left shifts do not. Convert + // instructions have the same bounds and right shifts. + if (Def->getName().find('r') != std::string::npos || + Def->getName().find("cvt") != std::string::npos) + LowerBound = "1"; + + UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1); + } else if (R->getValueAsBit("isShift")) { + // Builtins which are overloaded by type will need to have their upper + // bound computed at Sema time based on the type constant. + + // Right shifts have an 'r' in the name, left shifts do not. + if (Def->getName().find('r') != std::string::npos) + LowerBound = "1"; + UpperBound = "RFT(TV, true)"; + } else if (Def->getClassKind(true) == ClassB) { + // ClassB intrinsics have a type (and hence lane number) that is only + // known at runtime. + if (R->getValueAsBit("isLaneQ")) + UpperBound = "RFT(TV, false, true)"; + else + UpperBound = "RFT(TV, false, false)"; + } else { + // The immediate generally refers to a lane in the preceding argument. + assert(Def->getImmediateIdx() > 0); + Type T = Def->getParamType(Def->getImmediateIdx() - 1); + UpperBound = utostr(T.getNumElements() - 1); + } + + // Calculate the index of the immediate that should be range checked. + unsigned Idx = Def->getNumParams(); + if (Def->hasImmediate()) + Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx()); + + OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": " + << "i = " << Idx << ";"; + if (LowerBound.size()) + OS << " l = " << LowerBound << ";"; + if (UpperBound.size()) + OS << " u = " << UpperBound << ";"; + OS << " break;\n"; + + Emitted.insert(Def->getMangledName()); + } + + OS << "#endif\n\n"; +} + +/// runHeader - Emit a file with sections defining: +/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. +/// 2. the SemaChecking code for the type overload checking. +/// 3. the SemaChecking code for validation of intrinsic immediate arguments. +void NeonEmitter::runHeader(raw_ostream &OS) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + + SmallVector<Intrinsic *, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); + + // Generate shared BuiltinsXXX.def + genBuiltinsDef(OS, Defs); + + // Generate ARM overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, Defs); + + // Generate ARM range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, Defs); +} + +/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h +/// is comprised of type definitions and function declarations. +void NeonEmitter::run(raw_ostream &OS) { + OS << "/*===---- arm_neon.h - ARM Neon intrinsics " + "------------------------------" + "---===\n" + " *\n" + " * Permission is hereby granted, free of charge, to any person " + "obtaining " + "a copy\n" + " * of this software and associated documentation files (the " + "\"Software\")," + " to deal\n" + " * in the Software without restriction, including without limitation " + "the " + "rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, " + "and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software " + "is\n" + " * furnished to do so, subject to the following conditions:\n" + " *\n" + " * The above copyright notice and this permission notice shall be " + "included in\n" + " * all copies or substantial portions of the Software.\n" + " *\n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " + "EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " + "MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " + "SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " + "OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " + "ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " + "DEALINGS IN\n" + " * THE SOFTWARE.\n" + " *\n" + " *===-----------------------------------------------------------------" + "---" + "---===\n" + " */\n\n"; + + OS << "#ifndef __ARM_NEON_H\n"; + OS << "#define __ARM_NEON_H\n\n"; + + OS << "#if !defined(__ARM_NEON)\n"; + OS << "#error \"NEON support not enabled\"\n"; + OS << "#endif\n\n"; + + OS << "#include <stdint.h>\n\n"; + + // Emit NEON-specific scalar typedefs. + OS << "typedef float float32_t;\n"; + OS << "typedef __fp16 float16_t;\n"; + + OS << "#ifdef __aarch64__\n"; + OS << "typedef double float64_t;\n"; + OS << "#endif\n\n"; + + // For now, signedness of polynomial types depends on target + OS << "#ifdef __aarch64__\n"; + OS << "typedef uint8_t poly8_t;\n"; + OS << "typedef uint16_t poly16_t;\n"; + OS << "typedef uint64_t poly64_t;\n"; + OS << "typedef __uint128_t poly128_t;\n"; + OS << "#else\n"; + OS << "typedef int8_t poly8_t;\n"; + OS << "typedef int16_t poly16_t;\n"; + OS << "#endif\n"; + + // Emit Neon vector typedefs. + std::string TypedefTypes( + "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); + std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes); + + // Emit vector typedefs. + bool InIfdef = false; + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, 'd'); + if (T.isDouble() || (T.isPoly() && T.isLong())) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + if (T.isPoly()) + OS << "typedef __attribute__((neon_polyvector_type("; + else + OS << "typedef __attribute__((neon_vector_type("; + + Type T2 = T; + T2.makeScalar(); + OS << utostr(T.getNumElements()) << "))) "; + OS << T2.str(); + OS << " " << T.str() << ";\n"; + } + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; + + // Emit struct typedefs. + InIfdef = false; + for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { + for (auto &TS : TDTypeVec) { + bool IsA64 = false; + Type T(TS, 'd'); + if (T.isDouble() || (T.isPoly() && T.isLong())) + IsA64 = true; + + if (InIfdef && !IsA64) { + OS << "#endif\n"; + InIfdef = false; + } + if (!InIfdef && IsA64) { + OS << "#ifdef __aarch64__\n"; + InIfdef = true; + } + + char M = '2' + (NumMembers - 2); + Type VT(TS, M); + OS << "typedef struct " << VT.str() << " {\n"; + OS << " " << T.str() << " val"; + OS << "[" << utostr(NumMembers) << "]"; + OS << ";\n} "; + OS << VT.str() << ";\n"; + OS << "\n"; + } + } + if (InIfdef) + OS << "#endif\n"; + OS << "\n"; + + OS << "#define __ai static inline __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + + SmallVector<Intrinsic *, 128> Defs; + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + for (auto *I : Defs) + I->indexBody(); + + std::stable_sort( + Defs.begin(), Defs.end(), + [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; }); + + // Only emit a def when its requirements have been met. + // FIXME: This loop could be made faster, but it's fast enough for now. + bool MadeProgress = true; + std::string InGuard = ""; + while (!Defs.empty() && MadeProgress) { + MadeProgress = false; + + for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin(); + I != Defs.end(); /*No step*/) { + bool DependenciesSatisfied = true; + for (auto *II : (*I)->getDependencies()) { + if (std::find(Defs.begin(), Defs.end(), II) != Defs.end()) + DependenciesSatisfied = false; + } + if (!DependenciesSatisfied) { + // Try the next one. + ++I; + continue; + } + + // Emit #endif/#if pair if needed. + if ((*I)->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif\n"; + InGuard = (*I)->getGuard(); + if (!InGuard.empty()) + OS << "#if " << InGuard << "\n"; + } + + // Actually generate the intrinsic code. + OS << (*I)->generate(); + + MadeProgress = true; + I = Defs.erase(I); + } + } + assert(Defs.empty() && "Some requirements were not satisfied!"); + if (!InGuard.empty()) + OS << "#endif\n"; + + OS << "\n"; + OS << "#undef __ai\n\n"; + OS << "#endif /* __ARM_NEON_H */\n"; +} + +namespace clang { +void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { + NeonEmitter(Records).run(OS); +} +void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { + NeonEmitter(Records).runHeader(OS); +} +void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { + llvm_unreachable("Neon test generation no longer implemented!"); +} +} // End namespace clang |