diff options
Diffstat (limited to 'contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp | 1508 |
1 files changed, 1325 insertions, 183 deletions
diff --git a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp index 34b955e..b0939c9 100644 --- a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp +++ b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp @@ -40,31 +40,58 @@ enum OpKind { OpUnavailable, OpAdd, OpAddl, + OpAddlHi, OpAddw, + OpAddwHi, OpSub, OpSubl, + OpSublHi, OpSubw, + OpSubwHi, OpMul, OpMla, OpMlal, + OpMullHi, + OpMullHiN, + OpMlalHi, + OpMlalHiN, OpMls, OpMlsl, + OpMlslHi, + OpMlslHiN, OpMulN, OpMlaN, OpMlsN, + OpFMlaN, + OpFMlsN, OpMlalN, OpMlslN, OpMulLane, + OpMulXLane, OpMullLane, + OpMullHiLane, OpMlaLane, OpMlsLane, OpMlalLane, + OpMlalHiLane, OpMlslLane, + OpMlslHiLane, OpQDMullLane, + OpQDMullHiLane, OpQDMlalLane, + OpQDMlalHiLane, OpQDMlslLane, + OpQDMlslHiLane, OpQDMulhLane, OpQRDMulhLane, + OpFMSLane, + OpFMSLaneQ, + OpTrn1, + OpZip1, + OpUzp1, + OpTrn2, + OpZip2, + OpUzp2, OpEq, OpGe, OpLe, @@ -87,10 +114,49 @@ enum OpKind { OpRev16, OpRev32, OpRev64, + OpXtnHi, + OpSqxtunHi, + OpQxtnHi, + OpFcvtnHi, + OpFcvtlHi, + OpFcvtxnHi, OpReinterpret, + OpAddhnHi, + OpRAddhnHi, + OpSubhnHi, + OpRSubhnHi, OpAbdl, + OpAbdlHi, OpAba, - OpAbal + OpAbal, + OpAbalHi, + OpQDMullHi, + OpQDMullHiN, + OpQDMlalHi, + OpQDMlalHiN, + OpQDMlslHi, + OpQDMlslHiN, + OpDiv, + OpLongHi, + OpNarrowHi, + OpMovlHi, + OpCopyLane, + OpCopyQLane, + OpCopyLaneQ, + OpScalarMulLane, + OpScalarMulLaneQ, + OpScalarMulXLane, + OpScalarMulXLaneQ, + OpScalarVMulXLane, + OpScalarVMulXLaneQ, + OpScalarQDMullLane, + OpScalarQDMullLaneQ, + OpScalarQDMulHiLane, + OpScalarQDMulHiLaneQ, + OpScalarQRDMulHiLane, + OpScalarQRDMulHiLaneQ, + OpScalarGetLane, + OpScalarSetLane }; enum ClassKind { @@ -126,8 +192,10 @@ public: Int64, Poly8, Poly16, + Poly64, Float16, - Float32 + Float32, + Float64 }; NeonTypeFlags(unsigned F) : Flags(F) {} @@ -154,31 +222,58 @@ public: OpMap["OP_UNAVAILABLE"] = OpUnavailable; OpMap["OP_ADD"] = OpAdd; OpMap["OP_ADDL"] = OpAddl; + OpMap["OP_ADDLHi"] = OpAddlHi; OpMap["OP_ADDW"] = OpAddw; + OpMap["OP_ADDWHi"] = OpAddwHi; OpMap["OP_SUB"] = OpSub; OpMap["OP_SUBL"] = OpSubl; + OpMap["OP_SUBLHi"] = OpSublHi; OpMap["OP_SUBW"] = OpSubw; + OpMap["OP_SUBWHi"] = OpSubwHi; OpMap["OP_MUL"] = OpMul; OpMap["OP_MLA"] = OpMla; OpMap["OP_MLAL"] = OpMlal; + OpMap["OP_MULLHi"] = OpMullHi; + OpMap["OP_MULLHi_N"] = OpMullHiN; + OpMap["OP_MLALHi"] = OpMlalHi; + OpMap["OP_MLALHi_N"] = OpMlalHiN; OpMap["OP_MLS"] = OpMls; OpMap["OP_MLSL"] = OpMlsl; + OpMap["OP_MLSLHi"] = OpMlslHi; + OpMap["OP_MLSLHi_N"] = OpMlslHiN; OpMap["OP_MUL_N"] = OpMulN; OpMap["OP_MLA_N"] = OpMlaN; OpMap["OP_MLS_N"] = OpMlsN; + OpMap["OP_FMLA_N"] = OpFMlaN; + OpMap["OP_FMLS_N"] = OpFMlsN; OpMap["OP_MLAL_N"] = OpMlalN; OpMap["OP_MLSL_N"] = OpMlslN; OpMap["OP_MUL_LN"]= OpMulLane; + OpMap["OP_MULX_LN"]= OpMulXLane; OpMap["OP_MULL_LN"] = OpMullLane; + OpMap["OP_MULLHi_LN"] = OpMullHiLane; OpMap["OP_MLA_LN"]= OpMlaLane; OpMap["OP_MLS_LN"]= OpMlsLane; OpMap["OP_MLAL_LN"] = OpMlalLane; + OpMap["OP_MLALHi_LN"] = OpMlalHiLane; OpMap["OP_MLSL_LN"] = OpMlslLane; + OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; OpMap["OP_QDMULL_LN"] = OpQDMullLane; + OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; + OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; + OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; OpMap["OP_QDMULH_LN"] = OpQDMulhLane; OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; + OpMap["OP_FMS_LN"] = OpFMSLane; + OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; + OpMap["OP_TRN1"] = OpTrn1; + OpMap["OP_ZIP1"] = OpZip1; + OpMap["OP_UZP1"] = OpUzp1; + OpMap["OP_TRN2"] = OpTrn2; + OpMap["OP_ZIP2"] = OpZip2; + OpMap["OP_UZP2"] = OpUzp2; OpMap["OP_EQ"] = OpEq; OpMap["OP_GE"] = OpGe; OpMap["OP_LE"] = OpLe; @@ -201,10 +296,49 @@ public: OpMap["OP_REV16"] = OpRev16; OpMap["OP_REV32"] = OpRev32; OpMap["OP_REV64"] = OpRev64; + OpMap["OP_XTN"] = OpXtnHi; + OpMap["OP_SQXTUN"] = OpSqxtunHi; + OpMap["OP_QXTN"] = OpQxtnHi; + OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; + OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; + OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; OpMap["OP_REINT"] = OpReinterpret; + OpMap["OP_ADDHNHi"] = OpAddhnHi; + OpMap["OP_RADDHNHi"] = OpRAddhnHi; + OpMap["OP_SUBHNHi"] = OpSubhnHi; + OpMap["OP_RSUBHNHi"] = OpRSubhnHi; OpMap["OP_ABDL"] = OpAbdl; + OpMap["OP_ABDLHi"] = OpAbdlHi; OpMap["OP_ABA"] = OpAba; OpMap["OP_ABAL"] = OpAbal; + OpMap["OP_ABALHi"] = OpAbalHi; + OpMap["OP_QDMULLHi"] = OpQDMullHi; + OpMap["OP_QDMULLHi_N"] = OpQDMullHiN; + OpMap["OP_QDMLALHi"] = OpQDMlalHi; + OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN; + OpMap["OP_QDMLSLHi"] = OpQDMlslHi; + OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN; + OpMap["OP_DIV"] = OpDiv; + OpMap["OP_LONG_HI"] = OpLongHi; + OpMap["OP_NARROW_HI"] = OpNarrowHi; + OpMap["OP_MOVL_HI"] = OpMovlHi; + OpMap["OP_COPY_LN"] = OpCopyLane; + OpMap["OP_COPYQ_LN"] = OpCopyQLane; + OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; + OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; + OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; + OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; + OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; + OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; + OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; + OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane; + OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ; + OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane; + OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; + OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; + OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; + OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane; + OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -235,7 +369,18 @@ public: void runTests(raw_ostream &o); private: - void emitIntrinsic(raw_ostream &OS, Record *R); + void emitIntrinsic(raw_ostream &OS, Record *R, + StringMap<ClassKind> &EmittedMap); + void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, + bool isA64GenBuiltinDef); + void genOverloadTypeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64TypeCheck); + void genIntrinsicRangeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64RangeCheck); + void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, + bool isA64TestGen); }; } // end anonymous namespace @@ -249,7 +394,8 @@ static void ParseTypes(Record *r, std::string &s, int len = 0; for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { - if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U') + if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' + || data[len] == 'H' || data[len] == 'S') continue; switch (data[len]) { @@ -259,6 +405,7 @@ static void ParseTypes(Record *r, std::string &s, case 'l': case 'h': case 'f': + case 'd': break; default: PrintFatalError(r->getLoc(), @@ -282,6 +429,8 @@ static char Widen(const char t) { return 'l'; case 'h': return 'f'; + case 'f': + return 'd'; default: PrintFatalError("unhandled type in widen!"); } @@ -299,18 +448,46 @@ static char Narrow(const char t) { return 'i'; case 'f': return 'h'; + case 'd': + return 'f'; default: PrintFatalError("unhandled type in narrow!"); } } +static std::string GetNarrowTypestr(StringRef ty) +{ + std::string s; + for (size_t i = 0, end = ty.size(); i < end; i++) { + switch (ty[i]) { + case 's': + s += 'c'; + break; + case 'i': + s += 's'; + break; + case 'l': + s += 'i'; + break; + default: + s += ty[i]; + break; + } + } + + return s; +} + /// For a particular StringRef, return the base type code, and whether it has /// the quad-vector, polynomial, or unsigned modifiers set. static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { unsigned off = 0; - + // ignore scalar. + if (ty[off] == 'S') { + ++off; + } // remember quad. - if (ty[off] == 'Q') { + if (ty[off] == 'Q' || ty[off] == 'H') { quad = true; ++off; } @@ -342,27 +519,52 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, usgn = true; } break; + case 'b': + scal = true; case 'u': usgn = true; poly = false; if (type == 'f') type = 'i'; + if (type == 'd') + type = 'l'; break; + case '$': + scal = true; case 'x': usgn = false; poly = false; if (type == 'f') type = 'i'; + if (type == 'd') + type = 'l'; break; + case 'o': + scal = true; + type = 'd'; + usgn = false; + break; + case 'y': + scal = true; case 'f': if (type == 'h') quad = true; type = 'f'; usgn = false; break; + case 'F': + type = 'd'; + usgn = false; + break; case 'g': quad = false; break; + case 'B': + case 'C': + case 'D': + case 'j': + quad = true; + break; case 'w': type = Widen(type); quad = true; @@ -379,6 +581,14 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, scal = true; usgn = true; break; + case 'z': + type = Narrow(type); + scal = true; + break; + case 'r': + type = Widen(type); + scal = true; + break; case 's': case 'a': scal = true; @@ -397,16 +607,28 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, if (type == 'h') quad = false; break; + case 'q': + type = Narrow(type); + quad = true; + break; case 'e': type = Narrow(type); usgn = true; break; + case 'm': + type = Narrow(type); + quad = false; + break; default: break; } return type; } +static bool IsMultiVecProto(const char p) { + return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); +} + /// TypeString - for a modifier and type, generate the name of the typedef for /// that type. QUc -> uint8x8_t. static std::string TypeString(const char mod, StringRef typestr) { @@ -453,7 +675,7 @@ static std::string TypeString(const char mod, StringRef typestr) { s += quad ? "x4" : "x2"; break; case 'l': - s += "int64"; + s += (poly && !usgn)? "poly64" : "int64"; if (scal) break; s += quad ? "x2" : "x1"; @@ -470,15 +692,22 @@ static std::string TypeString(const char mod, StringRef typestr) { break; s += quad ? "x4" : "x2"; break; + case 'd': + s += "float64"; + if (scal) + break; + s += quad ? "x2" : "x1"; + break; + default: PrintFatalError("unhandled type!"); } - if (mod == '2') + if (mod == '2' || mod == 'B') s += "x2"; - if (mod == '3') + if (mod == '3' || mod == 'C') s += "x3"; - if (mod == '4') + if (mod == '4' || mod == 'D') s += "x4"; // Append _t, finishing the type string typedef type. @@ -527,7 +756,8 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr, type = 's'; usgn = true; } - usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f'); + usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && + scal && type != 'f' && type != 'd'); if (scal) { SmallString<128> s; @@ -554,10 +784,12 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr, // returning structs of 2, 3, or 4 vectors which are returned in a sret-like // fashion, storing them to a pointer arg. if (ret) { - if (mod >= '2' && mod <= '4') + if (IsMultiVecProto(mod)) return "vv*"; // void result with void* first argument if (mod == 'f' || (ck != ClassB && type == 'f')) return quad ? "V4f" : "V2f"; + if (mod == 'F' || (ck != ClassB && type == 'd')) + return quad ? "V2d" : "V1d"; if (ck != ClassB && type == 's') return quad ? "V8s" : "V4s"; if (ck != ClassB && type == 'i') @@ -569,15 +801,17 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr, } // Non-return array types are passed as individual vectors. - if (mod == '2') + if (mod == '2' || mod == 'B') return quad ? "V16ScV16Sc" : "V8ScV8Sc"; - if (mod == '3') + if (mod == '3' || mod == 'C') return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; - if (mod == '4') + if (mod == '4' || mod == 'D') return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; if (mod == 'f' || (ck != ClassB && type == 'f')) return quad ? "V4f" : "V2f"; + if (mod == 'F' || (ck != ClassB && type == 'd')) + return quad ? "V2d" : "V1d"; if (ck != ClassB && type == 's') return quad ? "V8s" : "V4s"; if (ck != ClassB && type == 'i') @@ -625,7 +859,7 @@ static void InstructionTypeCode(const StringRef &typeStr, break; case 'l': switch (ck) { - case ClassS: typeCode = usgn ? "u64" : "s64"; break; + case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; case ClassI: typeCode = "i64"; break; case ClassW: typeCode = "64"; break; default: break; @@ -647,17 +881,60 @@ static void InstructionTypeCode(const StringRef &typeStr, default: break; } break; + case 'd': + switch (ck) { + case ClassS: + case ClassI: + typeCode += "f64"; + break; + case ClassW: + PrintFatalError("unhandled type!"); + default: + break; + } + break; default: PrintFatalError("unhandled type!"); } } +static char Insert_BHSD_Suffix(StringRef typestr){ + unsigned off = 0; + if(typestr[off++] == 'S'){ + while(typestr[off] == 'Q' || typestr[off] == 'H'|| + typestr[off] == 'P' || typestr[off] == 'U') + ++off; + switch (typestr[off]){ + default : break; + case 'c' : return 'b'; + case 's' : return 'h'; + case 'i' : + case 'f' : return 's'; + case 'l' : + case 'd' : return 'd'; + } + } + return 0; +} + +static bool endsWith_xN(std::string const &name) { + if (name.length() > 3) { + if (name.compare(name.length() - 3, 3, "_x2") == 0 || + name.compare(name.length() - 3, 3, "_x3") == 0 || + name.compare(name.length() - 3, 3, "_x4") == 0) + return true; + } + return false; +} + /// MangleName - Append a type or width suffix to a base neon function name, -/// and insert a 'q' in the appropriate location if the operation works on -/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. +/// and insert a 'q' in the appropriate location if type string starts with 'Q'. +/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. +/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. static std::string MangleName(const std::string &name, StringRef typestr, ClassKind ck) { - if (name == "vcvt_f32_f16") + if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" || + name == "vcvt_f64_f32") return name; bool quad = false; @@ -668,7 +945,11 @@ static std::string MangleName(const std::string &name, StringRef typestr, std::string s = name; if (typeCode.size() > 0) { - s += "_" + typeCode; + // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. + if (endsWith_xN(s)) + s.insert(s.length() - 3, "_" + typeCode); + else + s += "_" + typeCode; } if (ck == ClassB) @@ -676,9 +957,14 @@ static std::string MangleName(const std::string &name, StringRef typestr, // Insert a 'q' before the first '_' character so that it ends up before // _lane or _n on vector-scalar operations. - if (quad) { + if (typestr.find("Q") != StringRef::npos) { + size_t pos = s.find('_'); + s = s.insert(pos, "q"); + } + char ins = Insert_BHSD_Suffix(typestr); + if(ins){ size_t pos = s.find('_'); - s = s.insert(pos, "q"); + s = s.insert(pos, &ins, 1); } return s; @@ -770,9 +1056,7 @@ GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, // a dup/lane instruction. if (IsLDSTOne) { if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { - RegisterSuffix += ", :" + OutTypeCode; - } else if (OutTypeCode == "64") { - RegisterSuffix += ", :64"; + RegisterSuffix += ":" + OutTypeCode; } } @@ -828,6 +1112,7 @@ static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, switch (Proto[i]) { case 'u': case 'f': + case 'F': case 'd': case 's': case 'x': @@ -840,6 +1125,7 @@ static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, NormedProto += 'q'; break; case 'g': + case 'j': case 'h': case 'e': NormedProto += 'd'; @@ -1158,7 +1444,8 @@ static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { } // Generate the string "(argtype a, argtype b, ...)" -static std::string GenArgs(const std::string &proto, StringRef typestr) { +static std::string GenArgs(const std::string &proto, StringRef typestr, + const std::string &name) { bool define = UseMacro(proto); char arg = 'a'; @@ -1176,6 +1463,9 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) { s += TypeString(proto[i], typestr) + " __"; } s.push_back(arg); + //To avoid argument being multiple defined, add extra number for renaming. + if (name == "vcopy_lane" || name == "vcopy_laneq") + s.push_back('1'); if ((i + 1) < e) s += ", "; } @@ -1186,7 +1476,8 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) { // Macro arguments are not type-checked like inline function arguments, so // assign them to local temporaries to get the right type checking. -static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { +static std::string GenMacroLocals(const std::string &proto, StringRef typestr, + const std::string &name ) { char arg = 'a'; std::string s; bool generatedLocal = false; @@ -1197,11 +1488,18 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { if (MacroArgUsedDirectly(proto, i)) continue; generatedLocal = true; + bool extranumber = false; + if (name == "vcopy_lane" || name == "vcopy_laneq") + extranumber = true; s += TypeString(proto[i], typestr) + " __"; s.push_back(arg); + if(extranumber) + s.push_back('1'); s += " = ("; s.push_back(arg); + if(extranumber) + s.push_back('1'); s += "); "; } @@ -1211,13 +1509,60 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) { } // Use the vmovl builtin to sign-extend or zero-extend a vector. -static std::string Extend(StringRef typestr, const std::string &a) { +static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { + std::string s, high; + high = h ? "_high" : ""; + s = MangleName("vmovl" + high, typestr, ClassS); + s += "(" + a + ")"; + return s; +} + +// Get the high 64-bit part of a vector +static std::string GetHigh(const std::string &a, StringRef typestr) { std::string s; - s = MangleName("vmovl", typestr, ClassS); + s = MangleName("vget_high", typestr, ClassS); s += "(" + a + ")"; return s; } +// Gen operation with two operands and get high 64-bit for both of two operands. +static std::string Gen2OpWith2High(StringRef typestr, + const std::string &op, + const std::string &a, + const std::string &b) { + std::string s; + std::string Op1 = GetHigh(a, typestr); + std::string Op2 = GetHigh(b, typestr); + s = MangleName(op, typestr, ClassS); + s += "(" + Op1 + ", " + Op2 + ");"; + return s; +} + +// Gen operation with three operands and get high 64-bit of the latter +// two operands. +static std::string Gen3OpWith2High(StringRef typestr, + const std::string &op, + const std::string &a, + const std::string &b, + const std::string &c) { + std::string s; + std::string Op1 = GetHigh(b, typestr); + std::string Op2 = GetHigh(c, typestr); + s = MangleName(op, typestr, ClassS); + s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; + return s; +} + +// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. +static std::string GenCombine(std::string typestr, + const std::string &a, + const std::string &b) { + std::string s; + s = MangleName("vcombine", typestr, ClassS); + s += "(" + a + ", " + b + ")"; + return s; +} + static std::string Duplicate(unsigned nElts, StringRef typestr, const std::string &a) { std::string s; @@ -1242,6 +1587,15 @@ static std::string SplatLane(unsigned nElts, const std::string &vec, return s; } +static std::string RemoveHigh(const std::string &name) { + std::string s = name; + std::size_t found = s.find("_high_"); + if (found == std::string::npos) + PrintFatalError("name should contain \"_high_\" for high intrinsics"); + s.replace(found, 5, ""); + return s; +} + static unsigned GetNumElements(StringRef typestr, bool &quad) { quad = false; bool dummy = false; @@ -1254,6 +1608,9 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) { case 'l': nElts = 1; break; case 'h': nElts = 4; break; case 'f': nElts = 2; break; + case 'd': + nElts = 1; + break; default: PrintFatalError("unhandled type!"); } @@ -1262,8 +1619,8 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) { } // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. -static std::string GenOpString(OpKind op, const std::string &proto, - StringRef typestr) { +static std::string GenOpString(const std::string &name, OpKind op, + const std::string &proto, StringRef typestr) { bool quad; unsigned nElts = GetNumElements(typestr, quad); bool define = UseMacro(proto); @@ -1281,31 +1638,59 @@ static std::string GenOpString(OpKind op, const std::string &proto, case OpAddl: s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; break; + case OpAddlHi: + s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; + break; case OpAddw: s += "__a + " + Extend(typestr, "__b") + ";"; break; + case OpAddwHi: + s += "__a + " + Extend(typestr, "__b", 1) + ";"; + break; case OpSub: s += "__a - __b;"; break; case OpSubl: s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; break; + case OpSublHi: + s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; + break; case OpSubw: s += "__a - " + Extend(typestr, "__b") + ";"; break; + case OpSubwHi: + s += "__a - " + Extend(typestr, "__b", 1) + ";"; + break; case OpMulN: s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; break; case OpMulLane: s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; break; + case OpMulXLane: + s += MangleName("vmulx", typestr, ClassS) + "(__a, " + + SplatLane(nElts, "__b", "__c") + ");"; + break; case OpMul: s += "__a * __b;"; break; + case OpFMlaN: + s += MangleName("vfma", typestr, ClassS); + s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; + break; + case OpFMlsN: + s += MangleName("vfms", typestr, ClassS); + s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; + break; case OpMullLane: s += MangleName("vmull", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; break; + case OpMullHiLane: + s += MangleName("vmull", typestr, ClassS) + "(" + + GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; + break; case OpMlaN: s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; break; @@ -1323,15 +1708,45 @@ static std::string GenOpString(OpKind op, const std::string &proto, s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + SplatLane(nElts, "__c", "__d") + ");"; break; + case OpMlalHiLane: + s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + + GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; + break; case OpMlal: s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; break; + case OpMullHi: + s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); + break; + case OpMullHiN: + s += MangleName("vmull_n", typestr, ClassS); + s += "(" + GetHigh("__a", typestr) + ", __b);"; + return s; + case OpMlalHi: + s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); + break; + case OpMlalHiN: + s += MangleName("vmlal_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; case OpMlsN: s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; break; case OpMlsLane: s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; break; + case OpFMSLane: + s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; + s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; + s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; + s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; + break; + case OpFMSLaneQ: + s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; + s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; + s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; + s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; + break; case OpMls: s += "__a - (__b * __c);"; break; @@ -1343,21 +1758,44 @@ static std::string GenOpString(OpKind op, const std::string &proto, s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + SplatLane(nElts, "__c", "__d") + ");"; break; + case OpMlslHiLane: + s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + + GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; + break; case OpMlsl: s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; break; + case OpMlslHi: + s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); + break; + case OpMlslHiN: + s += MangleName("vmlsl_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + break; case OpQDMullLane: s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; break; + case OpQDMullHiLane: + s += MangleName("vqdmull", typestr, ClassS) + "(" + + GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; + break; case OpQDMlalLane: s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + SplatLane(nElts, "__c", "__d") + ");"; break; + case OpQDMlalHiLane: + s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + + GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; + break; case OpQDMlslLane: s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + SplatLane(nElts, "__c", "__d") + ");"; break; + case OpQDMlslHiLane: + s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + + GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; + break; case OpQDMulhLane: s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; @@ -1410,12 +1848,17 @@ static std::string GenOpString(OpKind op, const std::string &proto, s += ", (int64x1_t)__b, 0, 1);"; break; case OpHi: - s += "(" + ts + - ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);"; + // nElts is for the result vector, so the source is twice that number. + s += "__builtin_shufflevector(__a, __a"; + for (unsigned i = nElts; i < nElts * 2; ++i) + s += ", " + utostr(i); + s+= ");"; break; case OpLo: - s += "(" + ts + - ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);"; + s += "__builtin_shufflevector(__a, __a"; + for (unsigned i = 0; i < nElts; ++i) + s += ", " + utostr(i); + s+= ");"; break; case OpDup: s += Duplicate(nElts, typestr, "__a") + ";"; @@ -1455,6 +1898,94 @@ static std::string GenOpString(OpKind op, const std::string &proto, s += ");"; break; } + case OpXtnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vmovn", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpSqxtunHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpQxtnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpFcvtnHi: { + std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName(FName, typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpFcvtlHi: { + std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; + s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + + ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; + break; + } + case OpFcvtxnHi: { + s = TypeString(proto[1], typestr) + " __a1 = " + + MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + + "return __builtin_shufflevector(__a, __a1"; + for (unsigned i = 0; i < nElts * 4; ++i) + s += ", " + utostr(i); + s += ");"; + break; + } + case OpUzp1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < nElts; i++) + s += ", " + utostr(2*i); + s += ");"; + break; + case OpUzp2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < nElts; i++) + s += ", " + utostr(2*i+1); + s += ");"; + break; + case OpZip1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(i) + ", " + utostr(i+nElts); + s += ");"; + break; + case OpZip2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = nElts/2; i < nElts; i++) + s += ", " + utostr(i) + ", " + utostr(i+nElts); + s += ");"; + break; + case OpTrn1: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); + s += ");"; + break; + case OpTrn2: + s += "__builtin_shufflevector(__a, __b"; + for (unsigned i = 0; i < (nElts/2); i++) + s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); + s += ");"; + break; case OpAbdl: { std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; if (typestr[0] != 'U') { @@ -1468,23 +1999,247 @@ static std::string GenOpString(OpKind op, const std::string &proto, } break; } + case OpAbdlHi: + s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); + break; + case OpAddhnHi: { + std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); + s += ";"; + break; + } + case OpRAddhnHi: { + std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); + s += ";"; + break; + } + case OpSubhnHi: { + std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); + s += ";"; + break; + } + case OpRSubhnHi: { + std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; + s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); + s += ";"; + break; + } case OpAba: s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; break; - case OpAbal: { - s += "__a + "; - std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)"; - if (typestr[0] != 'U') { - // vabd results are always unsigned and must be zero-extended. - std::string utype = "U" + typestr.str(); - s += "(" + TypeString(proto[0], typestr) + ")"; - abd = "(" + TypeString('d', utype) + ")" + abd; - s += Extend(utype, abd) + ";"; + case OpAbal: + s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; + break; + case OpAbalHi: + s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); + break; + case OpQDMullHi: + s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); + break; + case OpQDMullHiN: + s += MangleName("vqdmull_n", typestr, ClassS); + s += "(" + GetHigh("__a", typestr) + ", __b);"; + return s; + case OpQDMlalHi: + s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); + break; + case OpQDMlalHiN: + s += MangleName("vqdmlal_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; + case OpQDMlslHi: + s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); + break; + case OpQDMlslHiN: + s += MangleName("vqdmlsl_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; + case OpDiv: + s += "__a / __b;"; + break; + case OpMovlHi: { + s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + + MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; + s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); + s += "(__a1, 0);"; + break; + } + case OpLongHi: { + // Another local variable __a1 is needed for calling a Macro, + // or using __a will have naming conflict when Macro expanding. + s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + + MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; + s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + + "(__a1, __b);"; + break; + } + case OpNarrowHi: { + s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + + MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; + break; + } + case OpCopyLane: { + s += TypeString('s', typestr) + " __c2 = " + + MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + + MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; + break; + } + case OpCopyQLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + + "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; + break; + } + case OpCopyLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + + "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; + break; + } + case OpScalarMulLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + + "(__b, __c);\\\n __a * __d1;"; + break; + } + case OpScalarMulLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + + "(__b, __c);\\\n __a * __d1;"; + break; + } + case OpScalarMulXLane: { + bool dummy = false; + char type = ClassifyType(typestr, dummy, dummy, dummy); + if (type == 'f') type = 's'; + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + + "(__b, __c);\\\n vmulx" + type + "_" + + typeCode + "(__a, __d1);"; + break; + } + case OpScalarMulXLaneQ: { + bool dummy = false; + char type = ClassifyType(typestr, dummy, dummy, dummy); + if (type == 'f') type = 's'; + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + + typeCode + "(__b, __c);\\\n vmulx" + type + + "_" + typeCode + "(__a, __d1);"; + break; + } + + case OpScalarVMulXLane: { + bool dummy = false; + char type = ClassifyType(typestr, dummy, dummy, dummy); + if (type == 'f') type = 's'; + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vget_lane_" + + typeCode + "(__a, 0);\\\n" + + " " + TypeString('s', typestr) + " __e1 = vget_lane_" + + typeCode + "(__b, __c);\\\n" + + " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + + typeCode + "(__d1, __e1);\\\n" + + " " + TypeString('d', typestr) + " __g1;\\\n" + + " vset_lane_" + typeCode + "(__f1, __g1, __c);"; + break; + } + + case OpScalarVMulXLaneQ: { + bool dummy = false; + char type = ClassifyType(typestr, dummy, dummy, dummy); + if (type == 'f') type = 's'; + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += TypeString('s', typestr) + " __d1 = vget_lane_" + + typeCode + "(__a, 0);\\\n" + + " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + + typeCode + "(__b, __c);\\\n" + + " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + + typeCode + "(__d1, __e1);\\\n" + + " " + TypeString('d', typestr) + " __g1;\\\n" + + " vset_lane_" + typeCode + "(__f1, __g1, 0);"; + break; + } + case OpScalarQDMullLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + + "vget_lane_" + typeCode + "(b, __c));"; + break; + } + case OpScalarQDMullLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + + "vgetq_lane_" + typeCode + "(b, __c));"; + break; + } + case OpScalarQDMulHiLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + + "vget_lane_" + typeCode + "(__b, __c));"; + break; + } + case OpScalarQDMulHiLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + + "vgetq_lane_" + typeCode + "(__b, __c));"; + break; + } + case OpScalarQRDMulHiLane: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + + "vget_lane_" + typeCode + "(__b, __c));"; + break; + } + case OpScalarQRDMulHiLaneQ: { + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + + "vgetq_lane_" + typeCode + "(__b, __c));"; + break; + } + case OpScalarGetLane:{ + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + if (quad) { + s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n"; + s += " vgetq_lane_s16(__a1, __b);"; } else { - s += Extend(typestr, abd) + ";"; + s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n"; + s += " vget_lane_s16(__a1, __b);"; } break; } + case OpScalarSetLane:{ + std::string typeCode = ""; + InstructionTypeCode(typestr, ClassS, quad, typeCode); + s += "int16_t __a1 = (int16_t)__a;\\\n"; + if (quad) { + s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n"; + s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n"; + s += " vreinterpretq_f16_s16(__b2);"; + } else { + s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n"; + s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n"; + s += " vreinterpret_f16_s16(__b2);"; + } + break; + } + default: PrintFatalError("unknown OpKind!"); } @@ -1494,7 +2249,7 @@ static std::string GenOpString(OpKind op, const std::string &proto, static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { unsigned mod = proto[0]; - if (mod == 'v' || mod == 'f') + if (mod == 'v' || mod == 'f' || mod == 'F') mod = proto[1]; bool quad = false; @@ -1522,7 +2277,7 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { ET = NeonTypeFlags::Int32; break; case 'l': - ET = NeonTypeFlags::Int64; + ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; break; case 'h': ET = NeonTypeFlags::Float16; @@ -1530,6 +2285,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { case 'f': ET = NeonTypeFlags::Float32; break; + case 'd': + ET = NeonTypeFlags::Float64; + break; default: PrintFatalError("unhandled type!"); } @@ -1537,6 +2295,19 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { return Flags.getFlags(); } +// We don't check 'a' in this function, because for builtin function the +// argument matching to 'a' uses a vector type splatted from a scalar type. +static bool ProtoHasScalar(const std::string proto) +{ + return (proto.find('s') != std::string::npos + || proto.find('z') != std::string::npos + || proto.find('r') != std::string::npos + || proto.find('b') != std::string::npos + || proto.find('$') != std::string::npos + || proto.find('y') != std::string::npos + || proto.find('o') != std::string::npos); +} + // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) static std::string GenBuiltin(const std::string &name, const std::string &proto, StringRef typestr, ClassKind ck) { @@ -1544,14 +2315,14 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto, // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit // sret-like argument. - bool sret = (proto[0] >= '2' && proto[0] <= '4'); + bool sret = IsMultiVecProto(proto[0]); bool define = UseMacro(proto); // Check if the prototype has a scalar operand with the type of the vector // elements. If not, bitcasting the args will take care of arg checking. // The actual signedness etc. will be taken care of with special enums. - if (proto.find('s') == std::string::npos) + if (!ProtoHasScalar(proto)) ck = ClassB; if (proto[0] != 'v') { @@ -1604,12 +2375,19 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto, // Handle multiple-vector values specially, emitting each subvector as an // argument to the __builtin. + unsigned NumOfVec = 0; if (proto[i] >= '2' && proto[i] <= '4') { + NumOfVec = proto[i] - '0'; + } else if (proto[i] >= 'B' && proto[i] <= 'D') { + NumOfVec = proto[i] - 'A' + 1; + } + + if (NumOfVec > 0) { // Check if an explicit cast is needed. if (argType != 'c' || argPoly || argUsgn) args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; - for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) { + for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { s += args + ".val[" + utostr(vi) + "]"; if ((vi + 1) < ve) s += ", "; @@ -1662,7 +2440,7 @@ static std::string GenBuiltinDef(const std::string &name, // If all types are the same size, bitcasting the args will take care // of arg checking. The actual signedness etc. will be taken care of with // special enums. - if (proto.find('s') == std::string::npos) + if (!ProtoHasScalar(proto)) ck = ClassB; s += MangleName(name, typestr, ck); @@ -1706,12 +2484,12 @@ static std::string GenIntrinsic(const std::string &name, s += mangledName; // Function arguments - s += GenArgs(proto, inTypeStr); + s += GenArgs(proto, inTypeStr, name); // Definition. if (define) { s += " __extension__ ({ \\\n "; - s += GenMacroLocals(proto, inTypeStr); + s += GenMacroLocals(proto, inTypeStr, name); } else if (kind == OpUnavailable) { s += " __attribute__((unavailable));\n"; return s; @@ -1719,7 +2497,7 @@ static std::string GenIntrinsic(const std::string &name, s += " {\n "; if (kind != OpNone) - s += GenOpString(kind, proto, outTypeStr); + s += GenOpString(name, kind, proto, outTypeStr); else s += GenBuiltin(name, proto, outTypeStr, classKind); if (define) @@ -1773,7 +2551,7 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#ifndef __ARM_NEON_H\n"; OS << "#define __ARM_NEON_H\n\n"; - OS << "#ifndef __ARM_NEON__\n"; + OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n"; OS << "#error \"NEON support not enabled\"\n"; OS << "#endif\n\n"; @@ -1781,19 +2559,50 @@ void NeonEmitter::run(raw_ostream &OS) { // Emit NEON-specific scalar typedefs. OS << "typedef float float32_t;\n"; + OS << "typedef __fp16 float16_t;\n"; + + OS << "#ifdef __aarch64__\n"; + OS << "typedef double float64_t;\n"; + OS << "#endif\n\n"; + + // For now, signedness of polynomial types depends on target + OS << "#ifdef __aarch64__\n"; + OS << "typedef uint8_t poly8_t;\n"; + OS << "typedef uint16_t poly16_t;\n"; + OS << "typedef uint64_t poly64_t;\n"; + OS << "#else\n"; OS << "typedef int8_t poly8_t;\n"; OS << "typedef int16_t poly16_t;\n"; - OS << "typedef uint16_t float16_t;\n"; + OS << "#endif\n"; // Emit Neon vector typedefs. - std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs"); + std::string TypedefTypes( + "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); SmallVector<StringRef, 24> TDTypeVec; ParseTypes(0, TypedefTypes, TDTypeVec); // Emit vector typedefs. + bool isA64 = false; + bool preinsert; + bool postinsert; for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { bool dummy, quad = false, poly = false; - (void) ClassifyType(TDTypeVec[i], quad, poly, dummy); + char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); + preinsert = false; + postinsert = false; + + if (type == 'd' || (type == 'l' && poly)) { + preinsert = isA64? false: true; + isA64 = true; + } else { + postinsert = isA64? true: false; + isA64 = false; + } + if (postinsert) + OS << "#endif\n"; + if (preinsert) + OS << "#ifdef __aarch64__\n"; + if (poly) OS << "typedef __attribute__((neon_polyvector_type("; else @@ -1806,50 +2615,130 @@ void NeonEmitter::run(raw_ostream &OS) { OS << TypeString('s', TDTypeVec[i]); OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; + } + postinsert = isA64? true: false; + if (postinsert) + OS << "#endif\n"; OS << "\n"; // Emit struct typedefs. + isA64 = false; for (unsigned vi = 2; vi != 5; ++vi) { for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { + bool dummy, quad = false, poly = false; + char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); + preinsert = false; + postinsert = false; + + if (type == 'd' || (type == 'l' && poly)) { + preinsert = isA64? false: true; + isA64 = true; + } else { + postinsert = isA64? true: false; + isA64 = false; + } + if (postinsert) + OS << "#endif\n"; + if (preinsert) + OS << "#ifdef __aarch64__\n"; + std::string ts = TypeString('d', TDTypeVec[i]); std::string vs = TypeString('0' + vi, TDTypeVec[i]); OS << "typedef struct " << vs << " {\n"; OS << " " << ts << " val"; OS << "[" << utostr(vi) << "]"; OS << ";\n} "; - OS << vs << ";\n\n"; + OS << vs << ";\n"; + OS << "\n"; } } + postinsert = isA64? true: false; + if (postinsert) + OS << "#endif\n"; + OS << "\n"; OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<ClassKind> EmittedMap; + // Emit vmovl, vmull and vabd intrinsics first so they can be used by other // intrinsics. (Some of the saturating multiply instructions are also // used to implement the corresponding "_lane" variants, but tablegen // sorts the records into alphabetical order so that the "_lane" variants // come after the intrinsics they use.) - emitIntrinsic(OS, Records.getDef("VMOVL")); - emitIntrinsic(OS, Records.getDef("VMULL")); - emitIntrinsic(OS, Records.getDef("VABD")); - + emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); + + // ARM intrinsics must be emitted before AArch64 intrinsics to ensure + // common intrinsics appear only once in the output stream. + // The check for uniquiness is done in emitIntrinsic. + // Emit ARM intrinsics. for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; - if (R->getName() != "VMOVL" && - R->getName() != "VMULL" && + + // Skip AArch64 intrinsics; they will be emitted at the end. + bool isA64 = R->getValueAsBit("isA64"); + if (isA64) + continue; + + if (R->getName() != "VMOVL" && R->getName() != "VMULL" && R->getName() != "VABD") - emitIntrinsic(OS, R); + emitIntrinsic(OS, R, EmittedMap); + } + + // Emit AArch64-specific intrinsics. + OS << "#ifdef __aarch64__\n"; + + emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); + + for (unsigned i = 0, e = RV.size(); i != e; ++i) { + Record *R = RV[i]; + + // Skip ARM intrinsics already included above. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64) + continue; + + // Skip crypto temporarily, and will emit them all together at the end. + bool isCrypto = R->getValueAsBit("isCrypto"); + if (isCrypto) + continue; + + emitIntrinsic(OS, R, EmittedMap); + } + + OS << "#ifdef __ARM_FEATURE_CRYPTO\n"; + + for (unsigned i = 0, e = RV.size(); i != e; ++i) { + Record *R = RV[i]; + + // Skip crypto temporarily, and will emit them all together at the end. + bool isCrypto = R->getValueAsBit("isCrypto"); + if (!isCrypto) + continue; + + emitIntrinsic(OS, R, EmittedMap); } + + OS << "#endif\n\n"; + + OS << "#endif\n\n"; OS << "#undef __ai\n\n"; OS << "#endif /* __ARM_NEON_H */\n"; } /// emitIntrinsic - Write out the arm_neon.h header file definitions for the -/// intrinsics specified by record R. -void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { +/// intrinsics specified by record R checking for intrinsic uniqueness. +void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, + StringMap<ClassKind> &EmittedMap) { std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); std::string Types = R->getValueAsString("Types"); @@ -1876,12 +2765,20 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); if (srcti == ti || inQuad != outQuad) continue; - OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], - OpCast, ClassS); + std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], + OpCast, ClassS); + if (EmittedMap.count(s)) + continue; + EmittedMap[s] = ClassS; + OS << s; } } else { - OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], - kind, classKind); + std::string s = + GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); + if (EmittedMap.count(s)) + continue; + EmittedMap[s] = classKind; + OS << s; } } OS << "\n"; @@ -1902,6 +2799,7 @@ static unsigned RangeFromType(const char mod, StringRef typestr) { case 'f': case 'i': return (2 << (int)quad) - 1; + case 'd': case 'l': return (1 << (int)quad) - 1; default: @@ -1909,56 +2807,198 @@ static unsigned RangeFromType(const char mod, StringRef typestr) { } } -/// runHeader - Emit a file with sections defining: -/// 1. the NEON section of BuiltinsARM.def. -/// 2. the SemaChecking code for the type overload checking. -/// 3. the SemaChecking code for validation of intrinsic immediate arguments. -void NeonEmitter::runHeader(raw_ostream &OS) { - std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); +static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { + // base type to get the type string for. + bool dummy = false; + char type = ClassifyType(typestr, dummy, dummy, dummy); + type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); + switch (type) { + case 'c': + return 7; + case 'h': + case 's': + return 15; + case 'f': + case 'i': + return 31; + case 'd': + case 'l': + return 63; + default: + PrintFatalError("unhandled type!"); + } +} + +/// Generate the ARM and AArch64 intrinsic range checking code for +/// shift/lane immediates, checking for unique declarations. +void +NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64RangeCheck) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); StringMap<OpKind> EmittedMap; - // Generate BuiltinsARM.def for NEON - OS << "#ifdef GET_NEON_BUILTINS\n"; + // Generate the intrinsic range checking code for shift/lane immediates. + if (isA64RangeCheck) + OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; + else + OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; + OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; if (k != OpNone) continue; + std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); + std::string Types = R->getValueAsString("Types"); + std::string Rename = name + "@" + Proto; // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. if (Proto.find('a') != std::string::npos) continue; - std::string Types = R->getValueAsString("Types"); + // Functions which do not have an immediate do not need to have range + // checking code emitted. + size_t immPos = Proto.find('i'); + if (immPos == std::string::npos) + continue; + SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); if (R->getSuperClasses().size() < 2) PrintFatalError(R->getLoc(), "Builtin has no class kind"); - std::string name = R->getValueAsString("Name"); ClassKind ck = ClassMap[R->getSuperClasses()[1]]; + if (!ProtoHasScalar(Proto)) + ck = ClassB; + + // Do not include AArch64 range checks if not generating code for AArch64. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64RangeCheck && isA64) + continue; + + // Include ARM range checks in AArch64 but only if ARM intrinsics are not + // redefined by AArch64 to handle new types. + if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { + ClassKind &A64CK = A64IntrinsicMap[Rename]; + if (A64CK == ck && ck != ClassNone) + continue; + } for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - // Generate the BuiltinsARM.def declaration for this builtin, ensuring - // that each unique BUILTIN() macro appears only once in the output - // stream. - std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); - if (EmittedMap.count(bd)) + std::string namestr, shiftstr, rangestr; + + if (R->getValueAsBit("isVCVT_N")) { + // VCVT between floating- and fixed-point values takes an immediate + // in the range [1, 32] for f32, or [1, 64] for f64. + ck = ClassB; + if (name.find("32") != std::string::npos) + rangestr = "l = 1; u = 31"; // upper bound = l + u + else if (name.find("64") != std::string::npos) + rangestr = "l = 1; u = 63"; + else + PrintFatalError(R->getLoc(), + "Fixed point convert name should contains \"32\" or \"64\""); + + } else if (R->getValueAsBit("isScalarShift")) { + // Right shifts have an 'r' in the name, left shifts do not. Convert + // instructions have the same bounds and right shifts. + if (name.find('r') != std::string::npos || + name.find("cvt") != std::string::npos) + rangestr = "l = 1; "; + + unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]); + // Narrow shift has half the upper bound + if (R->getValueAsBit("isScalarNarrowShift")) + upBound /= 2; + + rangestr += "u = " + utostr(upBound); + } else if (R->getValueAsBit("isShift")) { + // Builtins which are overloaded by type will need to have their upper + // bound computed at Sema time based on the type constant. + shiftstr = ", true"; + + // Right shifts have an 'r' in the name, left shifts do not. + if (name.find('r') != std::string::npos) + rangestr = "l = 1; "; + + rangestr += "u = RFT(TV" + shiftstr + ")"; + } else { + // The immediate generally refers to a lane in the preceding argument. + assert(immPos > 0 && "unexpected immediate operand"); + rangestr = + "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); + } + // Make sure cases appear only once by uniquing them in a string map. + namestr = MangleName(name, TypeVec[ti], ck); + if (EmittedMap.count(namestr)) continue; + EmittedMap[namestr] = OpNone; - EmittedMap[bd] = OpNone; - OS << bd << "\n"; + // Calculate the index of the immediate that should be range checked. + unsigned immidx = 0; + + // Builtins that return a struct of multiple vectors have an extra + // leading arg for the struct return. + if (IsMultiVecProto(Proto[0])) + ++immidx; + + // Add one to the index for each argument until we reach the immediate + // to be checked. Structs of vectors are passed as multiple arguments. + for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { + switch (Proto[ii]) { + default: + immidx += 1; + break; + case '2': + case 'B': + immidx += 2; + break; + case '3': + case 'C': + immidx += 3; + break; + case '4': + case 'D': + immidx += 4; + break; + case 'i': + ie = ii + 1; + break; + } + } + if (isA64RangeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " + << rangestr << "; break;\n"; } } OS << "#endif\n\n"; +} + +/// Generate the ARM and AArch64 overloaded type checking code for +/// SemaChecking.cpp, checking for unique builtin declarations. +void +NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64TypeCheck) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<OpKind> EmittedMap; // Generate the overloaded type checking code for SemaChecking.cpp - OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + if (isA64TypeCheck) + OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; + else + OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; @@ -1968,7 +3008,8 @@ void NeonEmitter::runHeader(raw_ostream &OS) { std::string Proto = R->getValueAsString("Prototype"); std::string Types = R->getValueAsString("Types"); std::string name = R->getValueAsString("Name"); - + std::string Rename = name + "@" + Proto; + // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. if (Proto.find('a') != std::string::npos) @@ -1976,7 +3017,7 @@ void NeonEmitter::runHeader(raw_ostream &OS) { // Functions which have a scalar argument cannot be overloaded, no need to // check them if we are emitting the type checking code. - if (Proto.find('s') != std::string::npos) + if (ProtoHasScalar(Proto)) continue; SmallVector<StringRef, 16> TypeVec; @@ -1985,6 +3026,21 @@ void NeonEmitter::runHeader(raw_ostream &OS) { if (R->getSuperClasses().size() < 2) PrintFatalError(R->getLoc(), "Builtin has no class kind"); + // Do not include AArch64 type checks if not generating code for AArch64. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64TypeCheck && isA64) + continue; + + // Include ARM type check in AArch64 but only if ARM intrinsics + // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr + // redefined in AArch64 to handle an additional 2 x f64 type. + ClassKind ck = ClassMap[R->getSuperClasses()[1]]; + if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { + ClassKind &A64CK = A64IntrinsicMap[Rename]; + if (A64CK == ck && ck != ClassNone) + continue; + } + int si = -1, qi = -1; uint64_t mask = 0, qmask = 0; for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { @@ -2017,7 +3073,7 @@ void NeonEmitter::runHeader(raw_ostream &OS) { } } // For sret builtins, adjust the pointer argument index. - if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4')) + if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) PtrArgNum += 1; // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, @@ -2032,9 +3088,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) { } if (mask) { - OS << "case ARM::BI__builtin_neon_" - << MangleName(name, TypeVec[si], ClassB) - << ": mask = " << "0x" << utohexstr(mask) << "ULL"; + if (isA64TypeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " + << "0x" << utohexstr(mask) << "ULL"; if (PtrArgNum >= 0) OS << "; PtrArgNum = " << PtrArgNum; if (HasConstPtr) @@ -2042,9 +3101,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) { OS << "; break;\n"; } if (qmask) { - OS << "case ARM::BI__builtin_neon_" - << MangleName(name, TypeVec[qi], ClassB) - << ": mask = " << "0x" << utohexstr(qmask) << "ULL"; + if (isA64TypeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " + << "0x" << utohexstr(qmask) << "ULL"; if (PtrArgNum >= 0) OS << "; PtrArgNum = " << PtrArgNum; if (HasConstPtr) @@ -2053,31 +3115,38 @@ void NeonEmitter::runHeader(raw_ostream &OS) { } } OS << "#endif\n\n"; +} + +/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def +/// declaration of builtins, checking for unique builtin declarations. +void NeonEmitter::genBuiltinsDef(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64GenBuiltinDef) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<OpKind> EmittedMap; + + // Generate BuiltinsARM.def and BuiltinsAArch64.def + if (isA64GenBuiltinDef) + OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; + else + OS << "#ifdef GET_NEON_BUILTINS\n"; - // Generate the intrinsic range checking code for shift/lane immediates. - OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; - OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; if (k != OpNone) continue; - std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); + std::string name = R->getValueAsString("Name"); + std::string Rename = name + "@" + Proto; // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. if (Proto.find('a') != std::string::npos) continue; - // Functions which do not have an immediate do not need to have range - // checking code emitted. - size_t immPos = Proto.find('i'); - if (immPos == std::string::npos) - continue; - + std::string Types = R->getValueAsString("Types"); SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); @@ -2086,70 +3155,92 @@ void NeonEmitter::runHeader(raw_ostream &OS) { ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - std::string namestr, shiftstr, rangestr; - - if (R->getValueAsBit("isVCVT_N")) { - // VCVT between floating- and fixed-point values takes an immediate - // in the range 1 to 32. - ck = ClassB; - rangestr = "l = 1; u = 31"; // upper bound = l + u - } else if (Proto.find('s') == std::string::npos) { - // Builtins which are overloaded by type will need to have their upper - // bound computed at Sema time based on the type constant. - ck = ClassB; - if (R->getValueAsBit("isShift")) { - shiftstr = ", true"; + // Do not include AArch64 BUILTIN() macros if not generating + // code for AArch64 + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64GenBuiltinDef && isA64) + continue; - // Right shifts have an 'r' in the name, left shifts do not. - if (name.find('r') != std::string::npos) - rangestr = "l = 1; "; - } - rangestr += "u = RFT(TV" + shiftstr + ")"; - } else { - // The immediate generally refers to a lane in the preceding argument. - assert(immPos > 0 && "unexpected immediate operand"); - rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti])); - } - // Make sure cases appear only once by uniquing them in a string map. - namestr = MangleName(name, TypeVec[ti], ck); - if (EmittedMap.count(namestr)) + // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics + // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr + // redefined in AArch64 to handle an additional 2 x f64 type. + if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { + ClassKind &A64CK = A64IntrinsicMap[Rename]; + if (A64CK == ck && ck != ClassNone) continue; - EmittedMap[namestr] = OpNone; - - // Calculate the index of the immediate that should be range checked. - unsigned immidx = 0; + } - // Builtins that return a struct of multiple vectors have an extra - // leading arg for the struct return. - if (Proto[0] >= '2' && Proto[0] <= '4') - ++immidx; + for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { + // Generate the declaration for this builtin, ensuring + // that each unique BUILTIN() macro appears only once in the output + // stream. + std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); + if (EmittedMap.count(bd)) + continue; - // Add one to the index for each argument until we reach the immediate - // to be checked. Structs of vectors are passed as multiple arguments. - for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { - switch (Proto[ii]) { - default: immidx += 1; break; - case '2': immidx += 2; break; - case '3': immidx += 3; break; - case '4': immidx += 4; break; - case 'i': ie = ii + 1; break; - } - } - OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck) - << ": i = " << immidx << "; " << rangestr << "; break;\n"; + EmittedMap[bd] = OpNone; + OS << bd << "\n"; } } OS << "#endif\n\n"; } +/// runHeader - Emit a file with sections defining: +/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. +/// 2. the SemaChecking code for the type overload checking. +/// 3. the SemaChecking code for validation of intrinsic immediate arguments. +void NeonEmitter::runHeader(raw_ostream &OS) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + + // build a map of AArch64 intriniscs to be used in uniqueness checks. + StringMap<ClassKind> A64IntrinsicMap; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { + Record *R = RV[i]; + + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64) + continue; + + ClassKind CK = ClassNone; + if (R->getSuperClasses().size() >= 2) + CK = ClassMap[R->getSuperClasses()[1]]; + + std::string Name = R->getValueAsString("Name"); + std::string Proto = R->getValueAsString("Prototype"); + std::string Rename = Name + "@" + Proto; + if (A64IntrinsicMap.count(Rename)) + continue; + A64IntrinsicMap[Rename] = CK; + } + + // Generate BuiltinsARM.def for ARM + genBuiltinsDef(OS, A64IntrinsicMap, false); + + // Generate BuiltinsAArch64.def for AArch64 + genBuiltinsDef(OS, A64IntrinsicMap, true); + + // Generate ARM overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); + + // Generate AArch64 overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); + + // Generate ARM range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); + + // Generate the AArch64 range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); +} + /// GenTest - Write out a test for the intrinsic specified by the name and /// type strings, including the embedded patterns for FileCheck to match. static std::string GenTest(const std::string &name, const std::string &proto, StringRef outTypeStr, StringRef inTypeStr, bool isShift, bool isHiddenLOp, - ClassKind ck, const std::string &InstName) { + ClassKind ck, const std::string &InstName, + bool isA64, + std::string & testFuncProto) { assert(!proto.empty() && ""); std::string s; @@ -2164,12 +3255,17 @@ static std::string GenTest(const std::string &name, mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); } + // todo: GenerateChecksForIntrinsic does not generate CHECK + // for aarch64 instructions yet std::vector<std::string> FileCheckPatterns; - GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, - isHiddenLOp, FileCheckPatterns); + if (!isA64) { + GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, + isHiddenLOp, FileCheckPatterns); + s+= "// CHECK_ARM: test_" + mangledName + "\n"; + } + s += "// CHECK_AARCH64: test_" + mangledName + "\n"; // Emit the FileCheck patterns. - s += "// CHECK: test_" + mangledName + "\n"; // If for any reason we do not want to emit a check, mangledInst // will be the empty string. if (FileCheckPatterns.size()) { @@ -2177,23 +3273,27 @@ static std::string GenTest(const std::string &name, e = FileCheckPatterns.end(); i != e; ++i) { - s += "// CHECK: " + *i + "\n"; + s += "// CHECK_ARM: " + *i + "\n"; } } // Emit the start of the test function. - s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; + + testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; char arg = 'a'; std::string comma; for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { // Do not create arguments for values that must be immediate constants. if (proto[i] == 'i') continue; - s += comma + TypeString(proto[i], inTypeStr) + " "; - s.push_back(arg); + testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; + testFuncProto.push_back(arg); comma = ", "; } - s += ") {\n "; + testFuncProto += ")"; + + s+= testFuncProto; + s+= " {\n "; if (proto[0] != 'v') s += "return "; @@ -2217,18 +3317,14 @@ static std::string GenTest(const std::string &name, return s; } -/// runTests - Write out a complete set of tests for all of the Neon -/// intrinsics. -void NeonEmitter::runTests(raw_ostream &OS) { - OS << - "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n" - "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" - "// RUN: | FileCheck %s\n" - "\n" - "#include <arm_neon.h>\n" - "\n"; +/// Write out all intrinsic tests for the specified target, checking +/// for intrinsic test uniqueness. +void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, + bool isA64GenTest) { + if (isA64GenTest) + OS << "#ifdef __aarch64__\n"; - std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; std::string name = R->getValueAsString("Name"); @@ -2237,6 +3333,12 @@ void NeonEmitter::runTests(raw_ostream &OS) { bool isShift = R->getValueAsBit("isShift"); std::string InstName = R->getValueAsString("InstName"); bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); + bool isA64 = R->getValueAsBit("isA64"); + + // do not include AArch64 intrinsic test if not generating + // code for AArch64 + if (!isA64GenTest && isA64) + continue; SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); @@ -2256,16 +3358,56 @@ void NeonEmitter::runTests(raw_ostream &OS) { (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); if (srcti == ti || inQuad != outQuad) continue; - OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], - isShift, isHiddenLOp, ck, InstName); + std::string testFuncProto; + std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], + isShift, isHiddenLOp, ck, InstName, isA64, + testFuncProto); + if (EmittedMap.count(testFuncProto)) + continue; + EmittedMap[testFuncProto] = kind; + OS << s << "\n"; } } else { - OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], - isShift, isHiddenLOp, ck, InstName); + std::string testFuncProto; + std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, + isHiddenLOp, ck, InstName, isA64, testFuncProto); + if (EmittedMap.count(testFuncProto)) + continue; + EmittedMap[testFuncProto] = kind; + OS << s << "\n"; } } - OS << "\n"; } + + if (isA64GenTest) + OS << "#endif\n"; +} +/// runTests - Write out a complete set of tests for all of the Neon +/// intrinsics. +void NeonEmitter::runTests(raw_ostream &OS) { + OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " + "apcs-gnu\\\n" + "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" + "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" + "\n" + "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" + "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" + "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" + "\n" + "// REQUIRES: long_tests\n" + "\n" + "#include <arm_neon.h>\n" + "\n"; + + // ARM tests must be emitted before AArch64 tests to ensure + // tests for intrinsics that are common to ARM and AArch64 + // appear only once in the output stream. + // The check for uniqueness is done in genTargetTest. + StringMap<OpKind> EmittedMap; + + genTargetTest(OS, EmittedMap, false); + + genTargetTest(OS, EmittedMap, true); } namespace clang { |