summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp')
-rw-r--r--contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp1508
1 files changed, 1325 insertions, 183 deletions
diff --git a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp
index 34b955e..b0939c9 100644
--- a/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp
+++ b/contrib/llvm/tools/clang/utils/TableGen/NeonEmitter.cpp
@@ -40,31 +40,58 @@ enum OpKind {
OpUnavailable,
OpAdd,
OpAddl,
+ OpAddlHi,
OpAddw,
+ OpAddwHi,
OpSub,
OpSubl,
+ OpSublHi,
OpSubw,
+ OpSubwHi,
OpMul,
OpMla,
OpMlal,
+ OpMullHi,
+ OpMullHiN,
+ OpMlalHi,
+ OpMlalHiN,
OpMls,
OpMlsl,
+ OpMlslHi,
+ OpMlslHiN,
OpMulN,
OpMlaN,
OpMlsN,
+ OpFMlaN,
+ OpFMlsN,
OpMlalN,
OpMlslN,
OpMulLane,
+ OpMulXLane,
OpMullLane,
+ OpMullHiLane,
OpMlaLane,
OpMlsLane,
OpMlalLane,
+ OpMlalHiLane,
OpMlslLane,
+ OpMlslHiLane,
OpQDMullLane,
+ OpQDMullHiLane,
OpQDMlalLane,
+ OpQDMlalHiLane,
OpQDMlslLane,
+ OpQDMlslHiLane,
OpQDMulhLane,
OpQRDMulhLane,
+ OpFMSLane,
+ OpFMSLaneQ,
+ OpTrn1,
+ OpZip1,
+ OpUzp1,
+ OpTrn2,
+ OpZip2,
+ OpUzp2,
OpEq,
OpGe,
OpLe,
@@ -87,10 +114,49 @@ enum OpKind {
OpRev16,
OpRev32,
OpRev64,
+ OpXtnHi,
+ OpSqxtunHi,
+ OpQxtnHi,
+ OpFcvtnHi,
+ OpFcvtlHi,
+ OpFcvtxnHi,
OpReinterpret,
+ OpAddhnHi,
+ OpRAddhnHi,
+ OpSubhnHi,
+ OpRSubhnHi,
OpAbdl,
+ OpAbdlHi,
OpAba,
- OpAbal
+ OpAbal,
+ OpAbalHi,
+ OpQDMullHi,
+ OpQDMullHiN,
+ OpQDMlalHi,
+ OpQDMlalHiN,
+ OpQDMlslHi,
+ OpQDMlslHiN,
+ OpDiv,
+ OpLongHi,
+ OpNarrowHi,
+ OpMovlHi,
+ OpCopyLane,
+ OpCopyQLane,
+ OpCopyLaneQ,
+ OpScalarMulLane,
+ OpScalarMulLaneQ,
+ OpScalarMulXLane,
+ OpScalarMulXLaneQ,
+ OpScalarVMulXLane,
+ OpScalarVMulXLaneQ,
+ OpScalarQDMullLane,
+ OpScalarQDMullLaneQ,
+ OpScalarQDMulHiLane,
+ OpScalarQDMulHiLaneQ,
+ OpScalarQRDMulHiLane,
+ OpScalarQRDMulHiLaneQ,
+ OpScalarGetLane,
+ OpScalarSetLane
};
enum ClassKind {
@@ -126,8 +192,10 @@ public:
Int64,
Poly8,
Poly16,
+ Poly64,
Float16,
- Float32
+ Float32,
+ Float64
};
NeonTypeFlags(unsigned F) : Flags(F) {}
@@ -154,31 +222,58 @@ public:
OpMap["OP_UNAVAILABLE"] = OpUnavailable;
OpMap["OP_ADD"] = OpAdd;
OpMap["OP_ADDL"] = OpAddl;
+ OpMap["OP_ADDLHi"] = OpAddlHi;
OpMap["OP_ADDW"] = OpAddw;
+ OpMap["OP_ADDWHi"] = OpAddwHi;
OpMap["OP_SUB"] = OpSub;
OpMap["OP_SUBL"] = OpSubl;
+ OpMap["OP_SUBLHi"] = OpSublHi;
OpMap["OP_SUBW"] = OpSubw;
+ OpMap["OP_SUBWHi"] = OpSubwHi;
OpMap["OP_MUL"] = OpMul;
OpMap["OP_MLA"] = OpMla;
OpMap["OP_MLAL"] = OpMlal;
+ OpMap["OP_MULLHi"] = OpMullHi;
+ OpMap["OP_MULLHi_N"] = OpMullHiN;
+ OpMap["OP_MLALHi"] = OpMlalHi;
+ OpMap["OP_MLALHi_N"] = OpMlalHiN;
OpMap["OP_MLS"] = OpMls;
OpMap["OP_MLSL"] = OpMlsl;
+ OpMap["OP_MLSLHi"] = OpMlslHi;
+ OpMap["OP_MLSLHi_N"] = OpMlslHiN;
OpMap["OP_MUL_N"] = OpMulN;
OpMap["OP_MLA_N"] = OpMlaN;
OpMap["OP_MLS_N"] = OpMlsN;
+ OpMap["OP_FMLA_N"] = OpFMlaN;
+ OpMap["OP_FMLS_N"] = OpFMlsN;
OpMap["OP_MLAL_N"] = OpMlalN;
OpMap["OP_MLSL_N"] = OpMlslN;
OpMap["OP_MUL_LN"]= OpMulLane;
+ OpMap["OP_MULX_LN"]= OpMulXLane;
OpMap["OP_MULL_LN"] = OpMullLane;
+ OpMap["OP_MULLHi_LN"] = OpMullHiLane;
OpMap["OP_MLA_LN"]= OpMlaLane;
OpMap["OP_MLS_LN"]= OpMlsLane;
OpMap["OP_MLAL_LN"] = OpMlalLane;
+ OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
OpMap["OP_MLSL_LN"] = OpMlslLane;
+ OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
OpMap["OP_QDMULL_LN"] = OpQDMullLane;
+ OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
+ OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
+ OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
+ OpMap["OP_FMS_LN"] = OpFMSLane;
+ OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
+ OpMap["OP_TRN1"] = OpTrn1;
+ OpMap["OP_ZIP1"] = OpZip1;
+ OpMap["OP_UZP1"] = OpUzp1;
+ OpMap["OP_TRN2"] = OpTrn2;
+ OpMap["OP_ZIP2"] = OpZip2;
+ OpMap["OP_UZP2"] = OpUzp2;
OpMap["OP_EQ"] = OpEq;
OpMap["OP_GE"] = OpGe;
OpMap["OP_LE"] = OpLe;
@@ -201,10 +296,49 @@ public:
OpMap["OP_REV16"] = OpRev16;
OpMap["OP_REV32"] = OpRev32;
OpMap["OP_REV64"] = OpRev64;
+ OpMap["OP_XTN"] = OpXtnHi;
+ OpMap["OP_SQXTUN"] = OpSqxtunHi;
+ OpMap["OP_QXTN"] = OpQxtnHi;
+ OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
+ OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
+ OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
OpMap["OP_REINT"] = OpReinterpret;
+ OpMap["OP_ADDHNHi"] = OpAddhnHi;
+ OpMap["OP_RADDHNHi"] = OpRAddhnHi;
+ OpMap["OP_SUBHNHi"] = OpSubhnHi;
+ OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
OpMap["OP_ABDL"] = OpAbdl;
+ OpMap["OP_ABDLHi"] = OpAbdlHi;
OpMap["OP_ABA"] = OpAba;
OpMap["OP_ABAL"] = OpAbal;
+ OpMap["OP_ABALHi"] = OpAbalHi;
+ OpMap["OP_QDMULLHi"] = OpQDMullHi;
+ OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
+ OpMap["OP_QDMLALHi"] = OpQDMlalHi;
+ OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
+ OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
+ OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
+ OpMap["OP_DIV"] = OpDiv;
+ OpMap["OP_LONG_HI"] = OpLongHi;
+ OpMap["OP_NARROW_HI"] = OpNarrowHi;
+ OpMap["OP_MOVL_HI"] = OpMovlHi;
+ OpMap["OP_COPY_LN"] = OpCopyLane;
+ OpMap["OP_COPYQ_LN"] = OpCopyQLane;
+ OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
+ OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
+ OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
+ OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
+ OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
+ OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
+ OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
+ OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
+ OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
+ OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
+ OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
+ OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
+ OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
+ OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
+ OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
@@ -235,7 +369,18 @@ public:
void runTests(raw_ostream &o);
private:
- void emitIntrinsic(raw_ostream &OS, Record *R);
+ void emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap);
+ void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef);
+ void genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck);
+ void genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck);
+ void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64TestGen);
};
} // end anonymous namespace
@@ -249,7 +394,8 @@ static void ParseTypes(Record *r, std::string &s,
int len = 0;
for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
- if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
+ if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
+ || data[len] == 'H' || data[len] == 'S')
continue;
switch (data[len]) {
@@ -259,6 +405,7 @@ static void ParseTypes(Record *r, std::string &s,
case 'l':
case 'h':
case 'f':
+ case 'd':
break;
default:
PrintFatalError(r->getLoc(),
@@ -282,6 +429,8 @@ static char Widen(const char t) {
return 'l';
case 'h':
return 'f';
+ case 'f':
+ return 'd';
default:
PrintFatalError("unhandled type in widen!");
}
@@ -299,18 +448,46 @@ static char Narrow(const char t) {
return 'i';
case 'f':
return 'h';
+ case 'd':
+ return 'f';
default:
PrintFatalError("unhandled type in narrow!");
}
}
+static std::string GetNarrowTypestr(StringRef ty)
+{
+ std::string s;
+ for (size_t i = 0, end = ty.size(); i < end; i++) {
+ switch (ty[i]) {
+ case 's':
+ s += 'c';
+ break;
+ case 'i':
+ s += 's';
+ break;
+ case 'l':
+ s += 'i';
+ break;
+ default:
+ s += ty[i];
+ break;
+ }
+ }
+
+ return s;
+}
+
/// For a particular StringRef, return the base type code, and whether it has
/// the quad-vector, polynomial, or unsigned modifiers set.
static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
unsigned off = 0;
-
+ // ignore scalar.
+ if (ty[off] == 'S') {
+ ++off;
+ }
// remember quad.
- if (ty[off] == 'Q') {
+ if (ty[off] == 'Q' || ty[off] == 'H') {
quad = true;
++off;
}
@@ -342,27 +519,52 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
usgn = true;
}
break;
+ case 'b':
+ scal = true;
case 'u':
usgn = true;
poly = false;
if (type == 'f')
type = 'i';
+ if (type == 'd')
+ type = 'l';
break;
+ case '$':
+ scal = true;
case 'x':
usgn = false;
poly = false;
if (type == 'f')
type = 'i';
+ if (type == 'd')
+ type = 'l';
break;
+ case 'o':
+ scal = true;
+ type = 'd';
+ usgn = false;
+ break;
+ case 'y':
+ scal = true;
case 'f':
if (type == 'h')
quad = true;
type = 'f';
usgn = false;
break;
+ case 'F':
+ type = 'd';
+ usgn = false;
+ break;
case 'g':
quad = false;
break;
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'j':
+ quad = true;
+ break;
case 'w':
type = Widen(type);
quad = true;
@@ -379,6 +581,14 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
scal = true;
usgn = true;
break;
+ case 'z':
+ type = Narrow(type);
+ scal = true;
+ break;
+ case 'r':
+ type = Widen(type);
+ scal = true;
+ break;
case 's':
case 'a':
scal = true;
@@ -397,16 +607,28 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
if (type == 'h')
quad = false;
break;
+ case 'q':
+ type = Narrow(type);
+ quad = true;
+ break;
case 'e':
type = Narrow(type);
usgn = true;
break;
+ case 'm':
+ type = Narrow(type);
+ quad = false;
+ break;
default:
break;
}
return type;
}
+static bool IsMultiVecProto(const char p) {
+ return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
+}
+
/// TypeString - for a modifier and type, generate the name of the typedef for
/// that type. QUc -> uint8x8_t.
static std::string TypeString(const char mod, StringRef typestr) {
@@ -453,7 +675,7 @@ static std::string TypeString(const char mod, StringRef typestr) {
s += quad ? "x4" : "x2";
break;
case 'l':
- s += "int64";
+ s += (poly && !usgn)? "poly64" : "int64";
if (scal)
break;
s += quad ? "x2" : "x1";
@@ -470,15 +692,22 @@ static std::string TypeString(const char mod, StringRef typestr) {
break;
s += quad ? "x4" : "x2";
break;
+ case 'd':
+ s += "float64";
+ if (scal)
+ break;
+ s += quad ? "x2" : "x1";
+ break;
+
default:
PrintFatalError("unhandled type!");
}
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
s += "x2";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
s += "x3";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
s += "x4";
// Append _t, finishing the type string typedef type.
@@ -527,7 +756,8 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
type = 's';
usgn = true;
}
- usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
+ usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
+ scal && type != 'f' && type != 'd');
if (scal) {
SmallString<128> s;
@@ -554,10 +784,12 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
// fashion, storing them to a pointer arg.
if (ret) {
- if (mod >= '2' && mod <= '4')
+ if (IsMultiVecProto(mod))
return "vv*"; // void result with void* first argument
if (mod == 'f' || (ck != ClassB && type == 'f'))
return quad ? "V4f" : "V2f";
+ if (mod == 'F' || (ck != ClassB && type == 'd'))
+ return quad ? "V2d" : "V1d";
if (ck != ClassB && type == 's')
return quad ? "V8s" : "V4s";
if (ck != ClassB && type == 'i')
@@ -569,15 +801,17 @@ static std::string BuiltinTypeString(const char mod, StringRef typestr,
}
// Non-return array types are passed as individual vectors.
- if (mod == '2')
+ if (mod == '2' || mod == 'B')
return quad ? "V16ScV16Sc" : "V8ScV8Sc";
- if (mod == '3')
+ if (mod == '3' || mod == 'C')
return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
- if (mod == '4')
+ if (mod == '4' || mod == 'D')
return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
if (mod == 'f' || (ck != ClassB && type == 'f'))
return quad ? "V4f" : "V2f";
+ if (mod == 'F' || (ck != ClassB && type == 'd'))
+ return quad ? "V2d" : "V1d";
if (ck != ClassB && type == 's')
return quad ? "V8s" : "V4s";
if (ck != ClassB && type == 'i')
@@ -625,7 +859,7 @@ static void InstructionTypeCode(const StringRef &typeStr,
break;
case 'l':
switch (ck) {
- case ClassS: typeCode = usgn ? "u64" : "s64"; break;
+ case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
case ClassI: typeCode = "i64"; break;
case ClassW: typeCode = "64"; break;
default: break;
@@ -647,17 +881,60 @@ static void InstructionTypeCode(const StringRef &typeStr,
default: break;
}
break;
+ case 'd':
+ switch (ck) {
+ case ClassS:
+ case ClassI:
+ typeCode += "f64";
+ break;
+ case ClassW:
+ PrintFatalError("unhandled type!");
+ default:
+ break;
+ }
+ break;
default:
PrintFatalError("unhandled type!");
}
}
+static char Insert_BHSD_Suffix(StringRef typestr){
+ unsigned off = 0;
+ if(typestr[off++] == 'S'){
+ while(typestr[off] == 'Q' || typestr[off] == 'H'||
+ typestr[off] == 'P' || typestr[off] == 'U')
+ ++off;
+ switch (typestr[off]){
+ default : break;
+ case 'c' : return 'b';
+ case 's' : return 'h';
+ case 'i' :
+ case 'f' : return 's';
+ case 'l' :
+ case 'd' : return 'd';
+ }
+ }
+ return 0;
+}
+
+static bool endsWith_xN(std::string const &name) {
+ if (name.length() > 3) {
+ if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
+ name.compare(name.length() - 3, 3, "_x3") == 0 ||
+ name.compare(name.length() - 3, 3, "_x4") == 0)
+ return true;
+ }
+ return false;
+}
+
/// MangleName - Append a type or width suffix to a base neon function name,
-/// and insert a 'q' in the appropriate location if the operation works on
-/// 128b rather than 64b. E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
+/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
+/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
+/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
static std::string MangleName(const std::string &name, StringRef typestr,
ClassKind ck) {
- if (name == "vcvt_f32_f16")
+ if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
+ name == "vcvt_f64_f32")
return name;
bool quad = false;
@@ -668,7 +945,11 @@ static std::string MangleName(const std::string &name, StringRef typestr,
std::string s = name;
if (typeCode.size() > 0) {
- s += "_" + typeCode;
+ // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
+ if (endsWith_xN(s))
+ s.insert(s.length() - 3, "_" + typeCode);
+ else
+ s += "_" + typeCode;
}
if (ck == ClassB)
@@ -676,9 +957,14 @@ static std::string MangleName(const std::string &name, StringRef typestr,
// Insert a 'q' before the first '_' character so that it ends up before
// _lane or _n on vector-scalar operations.
- if (quad) {
+ if (typestr.find("Q") != StringRef::npos) {
+ size_t pos = s.find('_');
+ s = s.insert(pos, "q");
+ }
+ char ins = Insert_BHSD_Suffix(typestr);
+ if(ins){
size_t pos = s.find('_');
- s = s.insert(pos, "q");
+ s = s.insert(pos, &ins, 1);
}
return s;
@@ -770,9 +1056,7 @@ GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
// a dup/lane instruction.
if (IsLDSTOne) {
if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
- RegisterSuffix += ", :" + OutTypeCode;
- } else if (OutTypeCode == "64") {
- RegisterSuffix += ", :64";
+ RegisterSuffix += ":" + OutTypeCode;
}
}
@@ -828,6 +1112,7 @@ static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
switch (Proto[i]) {
case 'u':
case 'f':
+ case 'F':
case 'd':
case 's':
case 'x':
@@ -840,6 +1125,7 @@ static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
NormedProto += 'q';
break;
case 'g':
+ case 'j':
case 'h':
case 'e':
NormedProto += 'd';
@@ -1158,7 +1444,8 @@ static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
}
// Generate the string "(argtype a, argtype b, ...)"
-static std::string GenArgs(const std::string &proto, StringRef typestr) {
+static std::string GenArgs(const std::string &proto, StringRef typestr,
+ const std::string &name) {
bool define = UseMacro(proto);
char arg = 'a';
@@ -1176,6 +1463,9 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) {
s += TypeString(proto[i], typestr) + " __";
}
s.push_back(arg);
+ //To avoid argument being multiple defined, add extra number for renaming.
+ if (name == "vcopy_lane" || name == "vcopy_laneq")
+ s.push_back('1');
if ((i + 1) < e)
s += ", ";
}
@@ -1186,7 +1476,8 @@ static std::string GenArgs(const std::string &proto, StringRef typestr) {
// Macro arguments are not type-checked like inline function arguments, so
// assign them to local temporaries to get the right type checking.
-static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
+static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
+ const std::string &name ) {
char arg = 'a';
std::string s;
bool generatedLocal = false;
@@ -1197,11 +1488,18 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
if (MacroArgUsedDirectly(proto, i))
continue;
generatedLocal = true;
+ bool extranumber = false;
+ if (name == "vcopy_lane" || name == "vcopy_laneq")
+ extranumber = true;
s += TypeString(proto[i], typestr) + " __";
s.push_back(arg);
+ if(extranumber)
+ s.push_back('1');
s += " = (";
s.push_back(arg);
+ if(extranumber)
+ s.push_back('1');
s += "); ";
}
@@ -1211,13 +1509,60 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
}
// Use the vmovl builtin to sign-extend or zero-extend a vector.
-static std::string Extend(StringRef typestr, const std::string &a) {
+static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
+ std::string s, high;
+ high = h ? "_high" : "";
+ s = MangleName("vmovl" + high, typestr, ClassS);
+ s += "(" + a + ")";
+ return s;
+}
+
+// Get the high 64-bit part of a vector
+static std::string GetHigh(const std::string &a, StringRef typestr) {
std::string s;
- s = MangleName("vmovl", typestr, ClassS);
+ s = MangleName("vget_high", typestr, ClassS);
s += "(" + a + ")";
return s;
}
+// Gen operation with two operands and get high 64-bit for both of two operands.
+static std::string Gen2OpWith2High(StringRef typestr,
+ const std::string &op,
+ const std::string &a,
+ const std::string &b) {
+ std::string s;
+ std::string Op1 = GetHigh(a, typestr);
+ std::string Op2 = GetHigh(b, typestr);
+ s = MangleName(op, typestr, ClassS);
+ s += "(" + Op1 + ", " + Op2 + ");";
+ return s;
+}
+
+// Gen operation with three operands and get high 64-bit of the latter
+// two operands.
+static std::string Gen3OpWith2High(StringRef typestr,
+ const std::string &op,
+ const std::string &a,
+ const std::string &b,
+ const std::string &c) {
+ std::string s;
+ std::string Op1 = GetHigh(b, typestr);
+ std::string Op2 = GetHigh(c, typestr);
+ s = MangleName(op, typestr, ClassS);
+ s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
+ return s;
+}
+
+// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
+static std::string GenCombine(std::string typestr,
+ const std::string &a,
+ const std::string &b) {
+ std::string s;
+ s = MangleName("vcombine", typestr, ClassS);
+ s += "(" + a + ", " + b + ")";
+ return s;
+}
+
static std::string Duplicate(unsigned nElts, StringRef typestr,
const std::string &a) {
std::string s;
@@ -1242,6 +1587,15 @@ static std::string SplatLane(unsigned nElts, const std::string &vec,
return s;
}
+static std::string RemoveHigh(const std::string &name) {
+ std::string s = name;
+ std::size_t found = s.find("_high_");
+ if (found == std::string::npos)
+ PrintFatalError("name should contain \"_high_\" for high intrinsics");
+ s.replace(found, 5, "");
+ return s;
+}
+
static unsigned GetNumElements(StringRef typestr, bool &quad) {
quad = false;
bool dummy = false;
@@ -1254,6 +1608,9 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) {
case 'l': nElts = 1; break;
case 'h': nElts = 4; break;
case 'f': nElts = 2; break;
+ case 'd':
+ nElts = 1;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1262,8 +1619,8 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) {
}
// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
-static std::string GenOpString(OpKind op, const std::string &proto,
- StringRef typestr) {
+static std::string GenOpString(const std::string &name, OpKind op,
+ const std::string &proto, StringRef typestr) {
bool quad;
unsigned nElts = GetNumElements(typestr, quad);
bool define = UseMacro(proto);
@@ -1281,31 +1638,59 @@ static std::string GenOpString(OpKind op, const std::string &proto,
case OpAddl:
s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
break;
+ case OpAddlHi:
+ s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpAddw:
s += "__a + " + Extend(typestr, "__b") + ";";
break;
+ case OpAddwHi:
+ s += "__a + " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpSub:
s += "__a - __b;";
break;
case OpSubl:
s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
break;
+ case OpSublHi:
+ s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpSubw:
s += "__a - " + Extend(typestr, "__b") + ";";
break;
+ case OpSubwHi:
+ s += "__a - " + Extend(typestr, "__b", 1) + ";";
+ break;
case OpMulN:
s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
break;
case OpMulLane:
s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
break;
+ case OpMulXLane:
+ s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
+ SplatLane(nElts, "__b", "__c") + ");";
+ break;
case OpMul:
s += "__a * __b;";
break;
+ case OpFMlaN:
+ s += MangleName("vfma", typestr, ClassS);
+ s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+ break;
+ case OpFMlsN:
+ s += MangleName("vfms", typestr, ClassS);
+ s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
+ break;
case OpMullLane:
s += MangleName("vmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
break;
+ case OpMullHiLane:
+ s += MangleName("vmull", typestr, ClassS) + "(" +
+ GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
+ break;
case OpMlaN:
s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
break;
@@ -1323,15 +1708,45 @@ static std::string GenOpString(OpKind op, const std::string &proto,
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
SplatLane(nElts, "__c", "__d") + ");";
break;
+ case OpMlalHiLane:
+ s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
+ GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
+ break;
case OpMlal:
s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
+ case OpMullHi:
+ s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
+ break;
+ case OpMullHiN:
+ s += MangleName("vmull_n", typestr, ClassS);
+ s += "(" + GetHigh("__a", typestr) + ", __b);";
+ return s;
+ case OpMlalHi:
+ s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
+ break;
+ case OpMlalHiN:
+ s += MangleName("vmlal_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
case OpMlsN:
s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
break;
case OpMlsLane:
s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
break;
+ case OpFMSLane:
+ s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
+ s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
+ s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
+ s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
+ break;
+ case OpFMSLaneQ:
+ s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
+ s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
+ s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
+ s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
+ break;
case OpMls:
s += "__a - (__b * __c);";
break;
@@ -1343,21 +1758,44 @@ static std::string GenOpString(OpKind op, const std::string &proto,
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
SplatLane(nElts, "__c", "__d") + ");";
break;
+ case OpMlslHiLane:
+ s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
+ GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
+ break;
case OpMlsl:
s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
break;
+ case OpMlslHi:
+ s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
+ break;
+ case OpMlslHiN:
+ s += MangleName("vmlsl_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ break;
case OpQDMullLane:
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
break;
+ case OpQDMullHiLane:
+ s += MangleName("vqdmull", typestr, ClassS) + "(" +
+ GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
+ break;
case OpQDMlalLane:
s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
SplatLane(nElts, "__c", "__d") + ");";
break;
+ case OpQDMlalHiLane:
+ s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
+ GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
+ break;
case OpQDMlslLane:
s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
SplatLane(nElts, "__c", "__d") + ");";
break;
+ case OpQDMlslHiLane:
+ s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
+ GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
+ break;
case OpQDMulhLane:
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
SplatLane(nElts, "__b", "__c") + ");";
@@ -1410,12 +1848,17 @@ static std::string GenOpString(OpKind op, const std::string &proto,
s += ", (int64x1_t)__b, 0, 1);";
break;
case OpHi:
- s += "(" + ts +
- ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
+ // nElts is for the result vector, so the source is twice that number.
+ s += "__builtin_shufflevector(__a, __a";
+ for (unsigned i = nElts; i < nElts * 2; ++i)
+ s += ", " + utostr(i);
+ s+= ");";
break;
case OpLo:
- s += "(" + ts +
- ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
+ s += "__builtin_shufflevector(__a, __a";
+ for (unsigned i = 0; i < nElts; ++i)
+ s += ", " + utostr(i);
+ s+= ");";
break;
case OpDup:
s += Duplicate(nElts, typestr, "__a") + ";";
@@ -1455,6 +1898,94 @@ static std::string GenOpString(OpKind op, const std::string &proto,
s += ");";
break;
}
+ case OpXtnHi: {
+ s = TypeString(proto[1], typestr) + " __a1 = " +
+ MangleName("vmovn", typestr, ClassS) + "(__b);\n " +
+ "return __builtin_shufflevector(__a, __a1";
+ for (unsigned i = 0; i < nElts * 4; ++i)
+ s += ", " + utostr(i);
+ s += ");";
+ break;
+ }
+ case OpSqxtunHi: {
+ s = TypeString(proto[1], typestr) + " __a1 = " +
+ MangleName("vqmovun", typestr, ClassS) + "(__b);\n " +
+ "return __builtin_shufflevector(__a, __a1";
+ for (unsigned i = 0; i < nElts * 4; ++i)
+ s += ", " + utostr(i);
+ s += ");";
+ break;
+ }
+ case OpQxtnHi: {
+ s = TypeString(proto[1], typestr) + " __a1 = " +
+ MangleName("vqmovn", typestr, ClassS) + "(__b);\n " +
+ "return __builtin_shufflevector(__a, __a1";
+ for (unsigned i = 0; i < nElts * 4; ++i)
+ s += ", " + utostr(i);
+ s += ");";
+ break;
+ }
+ case OpFcvtnHi: {
+ std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
+ s = TypeString(proto[1], typestr) + " __a1 = " +
+ MangleName(FName, typestr, ClassS) + "(__b);\n " +
+ "return __builtin_shufflevector(__a, __a1";
+ for (unsigned i = 0; i < nElts * 4; ++i)
+ s += ", " + utostr(i);
+ s += ");";
+ break;
+ }
+ case OpFcvtlHi: {
+ std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
+ s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
+ ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);";
+ break;
+ }
+ case OpFcvtxnHi: {
+ s = TypeString(proto[1], typestr) + " __a1 = " +
+ MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " +
+ "return __builtin_shufflevector(__a, __a1";
+ for (unsigned i = 0; i < nElts * 4; ++i)
+ s += ", " + utostr(i);
+ s += ");";
+ break;
+ }
+ case OpUzp1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < nElts; i++)
+ s += ", " + utostr(2*i);
+ s += ");";
+ break;
+ case OpUzp2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < nElts; i++)
+ s += ", " + utostr(2*i+1);
+ s += ");";
+ break;
+ case OpZip1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(i) + ", " + utostr(i+nElts);
+ s += ");";
+ break;
+ case OpZip2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = nElts/2; i < nElts; i++)
+ s += ", " + utostr(i) + ", " + utostr(i+nElts);
+ s += ");";
+ break;
+ case OpTrn1:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
+ s += ");";
+ break;
+ case OpTrn2:
+ s += "__builtin_shufflevector(__a, __b";
+ for (unsigned i = 0; i < (nElts/2); i++)
+ s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
+ s += ");";
+ break;
case OpAbdl: {
std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
if (typestr[0] != 'U') {
@@ -1468,23 +1999,247 @@ static std::string GenOpString(OpKind op, const std::string &proto,
}
break;
}
+ case OpAbdlHi:
+ s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
+ break;
+ case OpAddhnHi: {
+ std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
+ s += ";";
+ break;
+ }
+ case OpRAddhnHi: {
+ std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
+ s += ";";
+ break;
+ }
+ case OpSubhnHi: {
+ std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
+ s += ";";
+ break;
+ }
+ case OpRSubhnHi: {
+ std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
+ s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
+ s += ";";
+ break;
+ }
case OpAba:
s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
break;
- case OpAbal: {
- s += "__a + ";
- std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
- if (typestr[0] != 'U') {
- // vabd results are always unsigned and must be zero-extended.
- std::string utype = "U" + typestr.str();
- s += "(" + TypeString(proto[0], typestr) + ")";
- abd = "(" + TypeString('d', utype) + ")" + abd;
- s += Extend(utype, abd) + ";";
+ case OpAbal:
+ s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
+ break;
+ case OpAbalHi:
+ s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
+ break;
+ case OpQDMullHi:
+ s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
+ break;
+ case OpQDMullHiN:
+ s += MangleName("vqdmull_n", typestr, ClassS);
+ s += "(" + GetHigh("__a", typestr) + ", __b);";
+ return s;
+ case OpQDMlalHi:
+ s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
+ break;
+ case OpQDMlalHiN:
+ s += MangleName("vqdmlal_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
+ case OpQDMlslHi:
+ s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
+ break;
+ case OpQDMlslHiN:
+ s += MangleName("vqdmlsl_n", typestr, ClassS);
+ s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
+ return s;
+ case OpDiv:
+ s += "__a / __b;";
+ break;
+ case OpMovlHi: {
+ s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
+ MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s;
+ s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
+ s += "(__a1, 0);";
+ break;
+ }
+ case OpLongHi: {
+ // Another local variable __a1 is needed for calling a Macro,
+ // or using __a will have naming conflict when Macro expanding.
+ s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
+ MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
+ s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
+ "(__a1, __b);";
+ break;
+ }
+ case OpNarrowHi: {
+ s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
+ MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
+ break;
+ }
+ case OpCopyLane: {
+ s += TypeString('s', typestr) + " __c2 = " +
+ MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " +
+ MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
+ break;
+ }
+ case OpCopyQLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
+ "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
+ break;
+ }
+ case OpCopyLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
+ "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
+ break;
+ }
+ case OpScalarMulLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
+ "(__b, __c);\\\n __a * __d1;";
+ break;
+ }
+ case OpScalarMulLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
+ "(__b, __c);\\\n __a * __d1;";
+ break;
+ }
+ case OpScalarMulXLane: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
+ "(__b, __c);\\\n vmulx" + type + "_" +
+ typeCode + "(__a, __d1);";
+ break;
+ }
+ case OpScalarMulXLaneQ: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
+ typeCode + "(__b, __c);\\\n vmulx" + type +
+ "_" + typeCode + "(__a, __d1);";
+ break;
+ }
+
+ case OpScalarVMulXLane: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" +
+ typeCode + "(__a, 0);\\\n" +
+ " " + TypeString('s', typestr) + " __e1 = vget_lane_" +
+ typeCode + "(__b, __c);\\\n" +
+ " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
+ typeCode + "(__d1, __e1);\\\n" +
+ " " + TypeString('d', typestr) + " __g1;\\\n" +
+ " vset_lane_" + typeCode + "(__f1, __g1, __c);";
+ break;
+ }
+
+ case OpScalarVMulXLaneQ: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" +
+ typeCode + "(__a, 0);\\\n" +
+ " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
+ typeCode + "(__b, __c);\\\n" +
+ " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
+ typeCode + "(__d1, __e1);\\\n" +
+ " " + TypeString('d', typestr) + " __g1;\\\n" +
+ " vset_lane_" + typeCode + "(__f1, __g1, 0);";
+ break;
+ }
+ case OpScalarQDMullLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(b, __c));";
+ break;
+ }
+ case OpScalarQDMullLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(b, __c));";
+ break;
+ }
+ case OpScalarQDMulHiLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQDMulHiLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQRDMulHiLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+ "vget_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarQRDMulHiLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+ "vgetq_lane_" + typeCode + "(__b, __c));";
+ break;
+ }
+ case OpScalarGetLane:{
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ if (quad) {
+ s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
+ s += " vgetq_lane_s16(__a1, __b);";
} else {
- s += Extend(typestr, abd) + ";";
+ s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
+ s += " vget_lane_s16(__a1, __b);";
}
break;
}
+ case OpScalarSetLane:{
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += "int16_t __a1 = (int16_t)__a;\\\n";
+ if (quad) {
+ s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
+ s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
+ s += " vreinterpretq_f16_s16(__b2);";
+ } else {
+ s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
+ s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
+ s += " vreinterpret_f16_s16(__b2);";
+ }
+ break;
+ }
+
default:
PrintFatalError("unknown OpKind!");
}
@@ -1494,7 +2249,7 @@ static std::string GenOpString(OpKind op, const std::string &proto,
static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
unsigned mod = proto[0];
- if (mod == 'v' || mod == 'f')
+ if (mod == 'v' || mod == 'f' || mod == 'F')
mod = proto[1];
bool quad = false;
@@ -1522,7 +2277,7 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
ET = NeonTypeFlags::Int32;
break;
case 'l':
- ET = NeonTypeFlags::Int64;
+ ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
break;
case 'h':
ET = NeonTypeFlags::Float16;
@@ -1530,6 +2285,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
case 'f':
ET = NeonTypeFlags::Float32;
break;
+ case 'd':
+ ET = NeonTypeFlags::Float64;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1537,6 +2295,19 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
return Flags.getFlags();
}
+// We don't check 'a' in this function, because for builtin function the
+// argument matching to 'a' uses a vector type splatted from a scalar type.
+static bool ProtoHasScalar(const std::string proto)
+{
+ return (proto.find('s') != std::string::npos
+ || proto.find('z') != std::string::npos
+ || proto.find('r') != std::string::npos
+ || proto.find('b') != std::string::npos
+ || proto.find('$') != std::string::npos
+ || proto.find('y') != std::string::npos
+ || proto.find('o') != std::string::npos);
+}
+
// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
static std::string GenBuiltin(const std::string &name, const std::string &proto,
StringRef typestr, ClassKind ck) {
@@ -1544,14 +2315,14 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
// sret-like argument.
- bool sret = (proto[0] >= '2' && proto[0] <= '4');
+ bool sret = IsMultiVecProto(proto[0]);
bool define = UseMacro(proto);
// Check if the prototype has a scalar operand with the type of the vector
// elements. If not, bitcasting the args will take care of arg checking.
// The actual signedness etc. will be taken care of with special enums.
- if (proto.find('s') == std::string::npos)
+ if (!ProtoHasScalar(proto))
ck = ClassB;
if (proto[0] != 'v') {
@@ -1604,12 +2375,19 @@ static std::string GenBuiltin(const std::string &name, const std::string &proto,
// Handle multiple-vector values specially, emitting each subvector as an
// argument to the __builtin.
+ unsigned NumOfVec = 0;
if (proto[i] >= '2' && proto[i] <= '4') {
+ NumOfVec = proto[i] - '0';
+ } else if (proto[i] >= 'B' && proto[i] <= 'D') {
+ NumOfVec = proto[i] - 'A' + 1;
+ }
+
+ if (NumOfVec > 0) {
// Check if an explicit cast is needed.
if (argType != 'c' || argPoly || argUsgn)
args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
- for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+ for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
s += args + ".val[" + utostr(vi) + "]";
if ((vi + 1) < ve)
s += ", ";
@@ -1662,7 +2440,7 @@ static std::string GenBuiltinDef(const std::string &name,
// If all types are the same size, bitcasting the args will take care
// of arg checking. The actual signedness etc. will be taken care of with
// special enums.
- if (proto.find('s') == std::string::npos)
+ if (!ProtoHasScalar(proto))
ck = ClassB;
s += MangleName(name, typestr, ck);
@@ -1706,12 +2484,12 @@ static std::string GenIntrinsic(const std::string &name,
s += mangledName;
// Function arguments
- s += GenArgs(proto, inTypeStr);
+ s += GenArgs(proto, inTypeStr, name);
// Definition.
if (define) {
s += " __extension__ ({ \\\n ";
- s += GenMacroLocals(proto, inTypeStr);
+ s += GenMacroLocals(proto, inTypeStr, name);
} else if (kind == OpUnavailable) {
s += " __attribute__((unavailable));\n";
return s;
@@ -1719,7 +2497,7 @@ static std::string GenIntrinsic(const std::string &name,
s += " {\n ";
if (kind != OpNone)
- s += GenOpString(kind, proto, outTypeStr);
+ s += GenOpString(name, kind, proto, outTypeStr);
else
s += GenBuiltin(name, proto, outTypeStr, classKind);
if (define)
@@ -1773,7 +2551,7 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#ifndef __ARM_NEON_H\n";
OS << "#define __ARM_NEON_H\n\n";
- OS << "#ifndef __ARM_NEON__\n";
+ OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
OS << "#error \"NEON support not enabled\"\n";
OS << "#endif\n\n";
@@ -1781,19 +2559,50 @@ void NeonEmitter::run(raw_ostream &OS) {
// Emit NEON-specific scalar typedefs.
OS << "typedef float float32_t;\n";
+ OS << "typedef __fp16 float16_t;\n";
+
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef double float64_t;\n";
+ OS << "#endif\n\n";
+
+ // For now, signedness of polynomial types depends on target
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef uint8_t poly8_t;\n";
+ OS << "typedef uint16_t poly16_t;\n";
+ OS << "typedef uint64_t poly64_t;\n";
+ OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
- OS << "typedef uint16_t float16_t;\n";
+ OS << "#endif\n";
// Emit Neon vector typedefs.
- std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
+ std::string TypedefTypes(
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
SmallVector<StringRef, 24> TDTypeVec;
ParseTypes(0, TypedefTypes, TDTypeVec);
// Emit vector typedefs.
+ bool isA64 = false;
+ bool preinsert;
+ bool postinsert;
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
- (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ preinsert = false;
+ postinsert = false;
+
+ if (type == 'd' || (type == 'l' && poly)) {
+ preinsert = isA64? false: true;
+ isA64 = true;
+ } else {
+ postinsert = isA64? true: false;
+ isA64 = false;
+ }
+ if (postinsert)
+ OS << "#endif\n";
+ if (preinsert)
+ OS << "#ifdef __aarch64__\n";
+
if (poly)
OS << "typedef __attribute__((neon_polyvector_type(";
else
@@ -1806,50 +2615,130 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << TypeString('s', TDTypeVec[i]);
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
+
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
OS << "\n";
// Emit struct typedefs.
+ isA64 = false;
for (unsigned vi = 2; vi != 5; ++vi) {
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+ bool dummy, quad = false, poly = false;
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ preinsert = false;
+ postinsert = false;
+
+ if (type == 'd' || (type == 'l' && poly)) {
+ preinsert = isA64? false: true;
+ isA64 = true;
+ } else {
+ postinsert = isA64? true: false;
+ isA64 = false;
+ }
+ if (postinsert)
+ OS << "#endif\n";
+ if (preinsert)
+ OS << "#ifdef __aarch64__\n";
+
std::string ts = TypeString('d', TDTypeVec[i]);
std::string vs = TypeString('0' + vi, TDTypeVec[i]);
OS << "typedef struct " << vs << " {\n";
OS << " " << ts << " val";
OS << "[" << utostr(vi) << "]";
OS << ";\n} ";
- OS << vs << ";\n\n";
+ OS << vs << ";\n";
+ OS << "\n";
}
}
+ postinsert = isA64? true: false;
+ if (postinsert)
+ OS << "#endif\n";
+ OS << "\n";
OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<ClassKind> EmittedMap;
+
// Emit vmovl, vmull and vabd intrinsics first so they can be used by other
// intrinsics. (Some of the saturating multiply instructions are also
// used to implement the corresponding "_lane" variants, but tablegen
// sorts the records into alphabetical order so that the "_lane" variants
// come after the intrinsics they use.)
- emitIntrinsic(OS, Records.getDef("VMOVL"));
- emitIntrinsic(OS, Records.getDef("VMULL"));
- emitIntrinsic(OS, Records.getDef("VABD"));
-
+ emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
+
+ // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
+ // common intrinsics appear only once in the output stream.
+ // The check for uniquiness is done in emitIntrinsic.
+ // Emit ARM intrinsics.
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
- if (R->getName() != "VMOVL" &&
- R->getName() != "VMULL" &&
+
+ // Skip AArch64 intrinsics; they will be emitted at the end.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (isA64)
+ continue;
+
+ if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
R->getName() != "VABD")
- emitIntrinsic(OS, R);
+ emitIntrinsic(OS, R, EmittedMap);
+ }
+
+ // Emit AArch64-specific intrinsics.
+ OS << "#ifdef __aarch64__\n";
+
+ emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
+
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ // Skip ARM intrinsics already included above.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ // Skip crypto temporarily, and will emit them all together at the end.
+ bool isCrypto = R->getValueAsBit("isCrypto");
+ if (isCrypto)
+ continue;
+
+ emitIntrinsic(OS, R, EmittedMap);
+ }
+
+ OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
+
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ // Skip crypto temporarily, and will emit them all together at the end.
+ bool isCrypto = R->getValueAsBit("isCrypto");
+ if (!isCrypto)
+ continue;
+
+ emitIntrinsic(OS, R, EmittedMap);
}
+
+ OS << "#endif\n\n";
+
+ OS << "#endif\n\n";
OS << "#undef __ai\n\n";
OS << "#endif /* __ARM_NEON_H */\n";
}
/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
-/// intrinsics specified by record R.
-void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
+/// intrinsics specified by record R checking for intrinsic uniqueness.
+void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap) {
std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
std::string Types = R->getValueAsString("Types");
@@ -1876,12 +2765,20 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
- OpCast, ClassS);
+ std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
+ OpCast, ClassS);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = ClassS;
+ OS << s;
}
} else {
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
- kind, classKind);
+ std::string s =
+ GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = classKind;
+ OS << s;
}
}
OS << "\n";
@@ -1902,6 +2799,7 @@ static unsigned RangeFromType(const char mod, StringRef typestr) {
case 'f':
case 'i':
return (2 << (int)quad) - 1;
+ case 'd':
case 'l':
return (1 << (int)quad) - 1;
default:
@@ -1909,56 +2807,198 @@ static unsigned RangeFromType(const char mod, StringRef typestr) {
}
}
-/// runHeader - Emit a file with sections defining:
-/// 1. the NEON section of BuiltinsARM.def.
-/// 2. the SemaChecking code for the type overload checking.
-/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
-void NeonEmitter::runHeader(raw_ostream &OS) {
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
+ // base type to get the type string for.
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
+ switch (type) {
+ case 'c':
+ return 7;
+ case 'h':
+ case 's':
+ return 15;
+ case 'f':
+ case 'i':
+ return 31;
+ case 'd':
+ case 'l':
+ return 63;
+ default:
+ PrintFatalError("unhandled type!");
+ }
+}
+
+/// Generate the ARM and AArch64 intrinsic range checking code for
+/// shift/lane immediates, checking for unique declarations.
+void
+NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
StringMap<OpKind> EmittedMap;
- // Generate BuiltinsARM.def for NEON
- OS << "#ifdef GET_NEON_BUILTINS\n";
+ // Generate the intrinsic range checking code for shift/lane immediates.
+ if (isA64RangeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
+
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
+ std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
+ std::string Types = R->getValueAsString("Types");
+ std::string Rename = name + "@" + Proto;
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- std::string Types = R->getValueAsString("Types");
+ // Functions which do not have an immediate do not need to have range
+ // checking code emitted.
+ size_t immPos = Proto.find('i');
+ if (immPos == std::string::npos)
+ continue;
+
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
- std::string name = R->getValueAsString("Name");
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ if (!ProtoHasScalar(Proto))
+ ck = ClassB;
+
+ // Do not include AArch64 range checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64RangeCheck && isA64)
+ continue;
+
+ // Include ARM range checks in AArch64 but only if ARM intrinsics are not
+ // redefined by AArch64 to handle new types.
+ if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
+ ClassKind &A64CK = A64IntrinsicMap[Rename];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- // Generate the BuiltinsARM.def declaration for this builtin, ensuring
- // that each unique BUILTIN() macro appears only once in the output
- // stream.
- std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
- if (EmittedMap.count(bd))
+ std::string namestr, shiftstr, rangestr;
+
+ if (R->getValueAsBit("isVCVT_N")) {
+ // VCVT between floating- and fixed-point values takes an immediate
+ // in the range [1, 32] for f32, or [1, 64] for f64.
+ ck = ClassB;
+ if (name.find("32") != std::string::npos)
+ rangestr = "l = 1; u = 31"; // upper bound = l + u
+ else if (name.find("64") != std::string::npos)
+ rangestr = "l = 1; u = 63";
+ else
+ PrintFatalError(R->getLoc(),
+ "Fixed point convert name should contains \"32\" or \"64\"");
+
+ } else if (R->getValueAsBit("isScalarShift")) {
+ // Right shifts have an 'r' in the name, left shifts do not. Convert
+ // instructions have the same bounds and right shifts.
+ if (name.find('r') != std::string::npos ||
+ name.find("cvt") != std::string::npos)
+ rangestr = "l = 1; ";
+
+ unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
+ // Narrow shift has half the upper bound
+ if (R->getValueAsBit("isScalarNarrowShift"))
+ upBound /= 2;
+
+ rangestr += "u = " + utostr(upBound);
+ } else if (R->getValueAsBit("isShift")) {
+ // Builtins which are overloaded by type will need to have their upper
+ // bound computed at Sema time based on the type constant.
+ shiftstr = ", true";
+
+ // Right shifts have an 'r' in the name, left shifts do not.
+ if (name.find('r') != std::string::npos)
+ rangestr = "l = 1; ";
+
+ rangestr += "u = RFT(TV" + shiftstr + ")";
+ } else {
+ // The immediate generally refers to a lane in the preceding argument.
+ assert(immPos > 0 && "unexpected immediate operand");
+ rangestr =
+ "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
+ }
+ // Make sure cases appear only once by uniquing them in a string map.
+ namestr = MangleName(name, TypeVec[ti], ck);
+ if (EmittedMap.count(namestr))
continue;
+ EmittedMap[namestr] = OpNone;
- EmittedMap[bd] = OpNone;
- OS << bd << "\n";
+ // Calculate the index of the immediate that should be range checked.
+ unsigned immidx = 0;
+
+ // Builtins that return a struct of multiple vectors have an extra
+ // leading arg for the struct return.
+ if (IsMultiVecProto(Proto[0]))
+ ++immidx;
+
+ // Add one to the index for each argument until we reach the immediate
+ // to be checked. Structs of vectors are passed as multiple arguments.
+ for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
+ switch (Proto[ii]) {
+ default:
+ immidx += 1;
+ break;
+ case '2':
+ case 'B':
+ immidx += 2;
+ break;
+ case '3':
+ case 'C':
+ immidx += 3;
+ break;
+ case '4':
+ case 'D':
+ immidx += 4;
+ break;
+ case 'i':
+ ie = ii + 1;
+ break;
+ }
+ }
+ if (isA64RangeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
+ << rangestr << "; break;\n";
}
}
OS << "#endif\n\n";
+}
+
+/// Generate the ARM and AArch64 overloaded type checking code for
+/// SemaChecking.cpp, checking for unique builtin declarations.
+void
+NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
// Generate the overloaded type checking code for SemaChecking.cpp
- OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+ if (isA64TypeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
@@ -1968,7 +3008,8 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
std::string Proto = R->getValueAsString("Prototype");
std::string Types = R->getValueAsString("Types");
std::string name = R->getValueAsString("Name");
-
+ std::string Rename = name + "@" + Proto;
+
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
@@ -1976,7 +3017,7 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
// Functions which have a scalar argument cannot be overloaded, no need to
// check them if we are emitting the type checking code.
- if (Proto.find('s') != std::string::npos)
+ if (ProtoHasScalar(Proto))
continue;
SmallVector<StringRef, 16> TypeVec;
@@ -1985,6 +3026,21 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
+ // Do not include AArch64 type checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64TypeCheck && isA64)
+ continue;
+
+ // Include ARM type check in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
+ ClassKind &A64CK = A64IntrinsicMap[Rename];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
+
int si = -1, qi = -1;
uint64_t mask = 0, qmask = 0;
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
@@ -2017,7 +3073,7 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
}
// For sret builtins, adjust the pointer argument index.
- if (PtrArgNum >= 0 && (Proto[0] >= '2' && Proto[0] <= '4'))
+ if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
PtrArgNum += 1;
// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
@@ -2032,9 +3088,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
if (mask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[si], ClassB)
- << ": mask = " << "0x" << utohexstr(mask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
+ << "0x" << utohexstr(mask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
@@ -2042,9 +3101,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
OS << "; break;\n";
}
if (qmask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[qi], ClassB)
- << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
+ << "0x" << utohexstr(qmask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
@@ -2053,31 +3115,38 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
}
OS << "#endif\n\n";
+}
+
+/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
+/// declaration of builtins, checking for unique builtin declarations.
+void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
+
+ // Generate BuiltinsARM.def and BuiltinsAArch64.def
+ if (isA64GenBuiltinDef)
+ OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
+ else
+ OS << "#ifdef GET_NEON_BUILTINS\n";
- // Generate the intrinsic range checking code for shift/lane immediates.
- OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
-
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
- std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
+ std::string name = R->getValueAsString("Name");
+ std::string Rename = name + "@" + Proto;
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- // Functions which do not have an immediate do not need to have range
- // checking code emitted.
- size_t immPos = Proto.find('i');
- if (immPos == std::string::npos)
- continue;
-
+ std::string Types = R->getValueAsString("Types");
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2086,70 +3155,92 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- std::string namestr, shiftstr, rangestr;
-
- if (R->getValueAsBit("isVCVT_N")) {
- // VCVT between floating- and fixed-point values takes an immediate
- // in the range 1 to 32.
- ck = ClassB;
- rangestr = "l = 1; u = 31"; // upper bound = l + u
- } else if (Proto.find('s') == std::string::npos) {
- // Builtins which are overloaded by type will need to have their upper
- // bound computed at Sema time based on the type constant.
- ck = ClassB;
- if (R->getValueAsBit("isShift")) {
- shiftstr = ", true";
+ // Do not include AArch64 BUILTIN() macros if not generating
+ // code for AArch64
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64GenBuiltinDef && isA64)
+ continue;
- // Right shifts have an 'r' in the name, left shifts do not.
- if (name.find('r') != std::string::npos)
- rangestr = "l = 1; ";
- }
- rangestr += "u = RFT(TV" + shiftstr + ")";
- } else {
- // The immediate generally refers to a lane in the preceding argument.
- assert(immPos > 0 && "unexpected immediate operand");
- rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
- }
- // Make sure cases appear only once by uniquing them in a string map.
- namestr = MangleName(name, TypeVec[ti], ck);
- if (EmittedMap.count(namestr))
+ // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
+ ClassKind &A64CK = A64IntrinsicMap[Rename];
+ if (A64CK == ck && ck != ClassNone)
continue;
- EmittedMap[namestr] = OpNone;
-
- // Calculate the index of the immediate that should be range checked.
- unsigned immidx = 0;
+ }
- // Builtins that return a struct of multiple vectors have an extra
- // leading arg for the struct return.
- if (Proto[0] >= '2' && Proto[0] <= '4')
- ++immidx;
+ for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+ // Generate the declaration for this builtin, ensuring
+ // that each unique BUILTIN() macro appears only once in the output
+ // stream.
+ std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
+ if (EmittedMap.count(bd))
+ continue;
- // Add one to the index for each argument until we reach the immediate
- // to be checked. Structs of vectors are passed as multiple arguments.
- for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
- switch (Proto[ii]) {
- default: immidx += 1; break;
- case '2': immidx += 2; break;
- case '3': immidx += 3; break;
- case '4': immidx += 4; break;
- case 'i': ie = ii + 1; break;
- }
- }
- OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
- << ": i = " << immidx << "; " << rangestr << "; break;\n";
+ EmittedMap[bd] = OpNone;
+ OS << bd << "\n";
}
}
OS << "#endif\n\n";
}
+/// runHeader - Emit a file with sections defining:
+/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
+/// 2. the SemaChecking code for the type overload checking.
+/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
+void NeonEmitter::runHeader(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+
+ // build a map of AArch64 intriniscs to be used in uniqueness checks.
+ StringMap<ClassKind> A64IntrinsicMap;
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ ClassKind CK = ClassNone;
+ if (R->getSuperClasses().size() >= 2)
+ CK = ClassMap[R->getSuperClasses()[1]];
+
+ std::string Name = R->getValueAsString("Name");
+ std::string Proto = R->getValueAsString("Prototype");
+ std::string Rename = Name + "@" + Proto;
+ if (A64IntrinsicMap.count(Rename))
+ continue;
+ A64IntrinsicMap[Rename] = CK;
+ }
+
+ // Generate BuiltinsARM.def for ARM
+ genBuiltinsDef(OS, A64IntrinsicMap, false);
+
+ // Generate BuiltinsAArch64.def for AArch64
+ genBuiltinsDef(OS, A64IntrinsicMap, true);
+
+ // Generate ARM overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate AArch64 overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
+
+ // Generate ARM range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate the AArch64 range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
+}
+
/// GenTest - Write out a test for the intrinsic specified by the name and
/// type strings, including the embedded patterns for FileCheck to match.
static std::string GenTest(const std::string &name,
const std::string &proto,
StringRef outTypeStr, StringRef inTypeStr,
bool isShift, bool isHiddenLOp,
- ClassKind ck, const std::string &InstName) {
+ ClassKind ck, const std::string &InstName,
+ bool isA64,
+ std::string & testFuncProto) {
assert(!proto.empty() && "");
std::string s;
@@ -2164,12 +3255,17 @@ static std::string GenTest(const std::string &name,
mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
}
+ // todo: GenerateChecksForIntrinsic does not generate CHECK
+ // for aarch64 instructions yet
std::vector<std::string> FileCheckPatterns;
- GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
- isHiddenLOp, FileCheckPatterns);
+ if (!isA64) {
+ GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
+ isHiddenLOp, FileCheckPatterns);
+ s+= "// CHECK_ARM: test_" + mangledName + "\n";
+ }
+ s += "// CHECK_AARCH64: test_" + mangledName + "\n";
// Emit the FileCheck patterns.
- s += "// CHECK: test_" + mangledName + "\n";
// If for any reason we do not want to emit a check, mangledInst
// will be the empty string.
if (FileCheckPatterns.size()) {
@@ -2177,23 +3273,27 @@ static std::string GenTest(const std::string &name,
e = FileCheckPatterns.end();
i != e;
++i) {
- s += "// CHECK: " + *i + "\n";
+ s += "// CHECK_ARM: " + *i + "\n";
}
}
// Emit the start of the test function.
- s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
+
+ testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
char arg = 'a';
std::string comma;
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
// Do not create arguments for values that must be immediate constants.
if (proto[i] == 'i')
continue;
- s += comma + TypeString(proto[i], inTypeStr) + " ";
- s.push_back(arg);
+ testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
+ testFuncProto.push_back(arg);
comma = ", ";
}
- s += ") {\n ";
+ testFuncProto += ")";
+
+ s+= testFuncProto;
+ s+= " {\n ";
if (proto[0] != 'v')
s += "return ";
@@ -2217,18 +3317,14 @@ static std::string GenTest(const std::string &name,
return s;
}
-/// runTests - Write out a complete set of tests for all of the Neon
-/// intrinsics.
-void NeonEmitter::runTests(raw_ostream &OS) {
- OS <<
- "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n"
- "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
- "// RUN: | FileCheck %s\n"
- "\n"
- "#include <arm_neon.h>\n"
- "\n";
+/// Write out all intrinsic tests for the specified target, checking
+/// for intrinsic test uniqueness.
+void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64GenTest) {
+ if (isA64GenTest)
+ OS << "#ifdef __aarch64__\n";
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
std::string name = R->getValueAsString("Name");
@@ -2237,6 +3333,12 @@ void NeonEmitter::runTests(raw_ostream &OS) {
bool isShift = R->getValueAsBit("isShift");
std::string InstName = R->getValueAsString("InstName");
bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
+ bool isA64 = R->getValueAsBit("isA64");
+
+ // do not include AArch64 intrinsic test if not generating
+ // code for AArch64
+ if (!isA64GenTest && isA64)
+ continue;
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2256,16 +3358,56 @@ void NeonEmitter::runTests(raw_ostream &OS) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
+ isShift, isHiddenLOp, ck, InstName, isA64,
+ testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
} else {
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
+ isHiddenLOp, ck, InstName, isA64, testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
}
- OS << "\n";
}
+
+ if (isA64GenTest)
+ OS << "#endif\n";
+}
+/// runTests - Write out a complete set of tests for all of the Neon
+/// intrinsics.
+void NeonEmitter::runTests(raw_ostream &OS) {
+ OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
+ "apcs-gnu\\\n"
+ "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
+ "\n"
+ "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
+ "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
+ "\n"
+ "// REQUIRES: long_tests\n"
+ "\n"
+ "#include <arm_neon.h>\n"
+ "\n";
+
+ // ARM tests must be emitted before AArch64 tests to ensure
+ // tests for intrinsics that are common to ARM and AArch64
+ // appear only once in the output stream.
+ // The check for uniqueness is done in genTargetTest.
+ StringMap<OpKind> EmittedMap;
+
+ genTargetTest(OS, EmittedMap, false);
+
+ genTargetTest(OS, EmittedMap, true);
}
namespace clang {
OpenPOWER on IntegriCloud