From fc84956ac8b7cd244ef30e7a4d4d38a58dec5904 Mon Sep 17 00:00:00 2001 From: dim Date: Thu, 16 Sep 2010 21:14:28 +0000 Subject: Make vendor/clang/dist exactly the same as upstream's r108428. Some files and directories were already added/removed in the upstream repository, but were not added/removed here, when the previous snapshot was imported. Note: I did not import the file test/Lexer/conflict-marker.c, because it contains merge conflict markers on purpose, which upsets our pre-commit hooks. Approved by: rpaulo (mentor) --- lib/Headers/arm_neon.h | 537 ------------------------------------------------ lib/Headers/arm_neon.td | 341 ------------------------------ 2 files changed, 878 deletions(-) delete mode 100644 lib/Headers/arm_neon.h delete mode 100644 lib/Headers/arm_neon.td (limited to 'lib/Headers') diff --git a/lib/Headers/arm_neon.h b/lib/Headers/arm_neon.h deleted file mode 100644 index 4508a27..0000000 --- a/lib/Headers/arm_neon.h +++ /dev/null @@ -1,537 +0,0 @@ -/*===---- arm_neon.h - NEON intrinsics --------------------------------------=== - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===-----------------------------------------------------------------------=== - */ - -#ifndef __ARM_NEON_H -#define __ARM_NEON_H - -#ifndef __ARM_NEON__ -#error "NEON support not enabled" -#endif - -// NEON document appears to be specified in terms of stdint types. -#include - -// Define some NEON-specific scalar types for floats and polynomials. -typedef float float32_t; -typedef uint8_t poly8_t; - -// FIXME: probably need a 'poly' attribute or something for correct codegen to -// disambiguate from uint16_t. -typedef uint16_t poly16_t; - -typedef __attribute__(( __vector_size__(8) )) int8_t __neon_int8x8_t; -typedef __attribute__(( __vector_size__(16) )) int8_t __neon_int8x16_t; -typedef __attribute__(( __vector_size__(8) )) int16_t __neon_int16x4_t; -typedef __attribute__(( __vector_size__(16) )) int16_t __neon_int16x8_t; -typedef __attribute__(( __vector_size__(8) )) int32_t __neon_int32x2_t; -typedef __attribute__(( __vector_size__(16) )) int32_t __neon_int32x4_t; -typedef __attribute__(( __vector_size__(8) )) int64_t __neon_int64x1_t; -typedef __attribute__(( __vector_size__(16) )) int64_t __neon_int64x2_t; -typedef __attribute__(( __vector_size__(8) )) uint8_t __neon_uint8x8_t; -typedef __attribute__(( __vector_size__(16) )) uint8_t __neon_uint8x16_t; -typedef __attribute__(( __vector_size__(8) )) uint16_t __neon_uint16x4_t; -typedef __attribute__(( __vector_size__(16) )) uint16_t __neon_uint16x8_t; -typedef __attribute__(( __vector_size__(8) )) uint32_t __neon_uint32x2_t; -typedef __attribute__(( __vector_size__(16) )) uint32_t __neon_uint32x4_t; -typedef __attribute__(( __vector_size__(8) )) uint64_t __neon_uint64x1_t; -typedef __attribute__(( __vector_size__(16) )) uint64_t __neon_uint64x2_t; -typedef __attribute__(( __vector_size__(8) )) uint16_t __neon_float16x4_t; -typedef __attribute__(( __vector_size__(16) )) uint16_t __neon_float16x8_t; -typedef __attribute__(( __vector_size__(8) )) float32_t __neon_float32x2_t; -typedef __attribute__(( __vector_size__(16) )) float32_t __neon_float32x4_t; -typedef __attribute__(( __vector_size__(8) )) poly8_t __neon_poly8x8_t; -typedef __attribute__(( __vector_size__(16) )) poly8_t __neon_poly8x16_t; -typedef __attribute__(( __vector_size__(8) )) poly16_t __neon_poly16x4_t; -typedef __attribute__(( __vector_size__(16) )) poly16_t __neon_poly16x8_t; - -typedef struct __int8x8_t { - __neon_int8x8_t val; -} int8x8_t; - -typedef struct __int8x16_t { - __neon_int8x16_t val; -} int8x16_t; - -typedef struct __int16x4_t { - __neon_int16x4_t val; -} int16x4_t; - -typedef struct __int16x8_t { - __neon_int16x8_t val; -} int16x8_t; - -typedef struct __int32x2_t { - __neon_int32x2_t val; -} int32x2_t; - -typedef struct __int32x4_t { - __neon_int32x4_t val; -} int32x4_t; - -typedef struct __int64x1_t { - __neon_int64x1_t val; -} int64x1_t; - -typedef struct __int64x2_t { - __neon_int64x2_t val; -} int64x2_t; - -typedef struct __uint8x8_t { - __neon_uint8x8_t val; -} uint8x8_t; - -typedef struct __uint8x16_t { - __neon_uint8x16_t val; -} uint8x16_t; - -typedef struct __uint16x4_t { - __neon_uint16x4_t val; -} uint16x4_t; - -typedef struct __uint16x8_t { - __neon_uint16x8_t val; -} uint16x8_t; - -typedef struct __uint32x2_t { - __neon_uint32x2_t val; -} uint32x2_t; - -typedef struct __uint32x4_t { - __neon_uint32x4_t val; -} uint32x4_t; - -typedef struct __uint64x1_t { - __neon_uint64x1_t val; -} uint64x1_t; - -typedef struct __uint64x2_t { - __neon_uint64x2_t val; -} uint64x2_t; - -typedef struct __float16x4_t { - __neon_float16x4_t val; -} float16x4_t; - -typedef struct __float16x8_t { - __neon_float16x8_t val; -} float16x8_t; - -typedef struct __float32x2_t { - __neon_float32x2_t val; -} float32x2_t; - -typedef struct __float32x4_t { - __neon_float32x4_t val; -} float32x4_t; - -typedef struct __poly8x8_t { - __neon_poly8x8_t val; -} poly8x8_t; - -typedef struct __poly8x16_t { - __neon_poly8x16_t val; -} poly8x16_t; - -typedef struct __poly16x4_t { - __neon_poly16x4_t val; -} poly16x4_t; - -typedef struct __poly16x8_t { - __neon_poly16x8_t val; -} poly16x8_t; - -// FIXME: write tool to stamp out the structure-of-array types, possibly gen this whole file. - -// Intrinsics, per ARM document DUI0348B -#define __ai static __attribute__((__always_inline__)) - -#define INTTYPES_WIDE(op, builtin) \ - __ai int16x8_t op##_s8(int16x8_t a, int8x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \ - __ai int32x4_t op##_s16(int32x4_t a, int16x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \ - __ai int64x2_t op##_s32(int64x2_t a, int32x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \ - __ai uint16x8_t op##_u8(uint16x8_t a, uint8x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \ - __ai uint32x4_t op##_u16(uint32x4_t a, uint16x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } \ - __ai uint64x2_t op##_u32(uint64x2_t a, uint32x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; } - -#define INTTYPES_WIDENING(op, builtin) \ - __ai int16x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \ - __ai int32x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \ - __ai int64x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \ - __ai uint16x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \ - __ai uint32x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } \ - __ai uint64x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; } - -#define INTTYPES_WIDENING_MUL(op, builtin) \ - __ai int16x8_t op##_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return (int16x8_t){ builtin(a.val, b.val, c.val) }; } \ - __ai int32x4_t op##_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return (int32x4_t){ builtin(a.val, b.val, c.val) }; } \ - __ai int64x2_t op##_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return (int64x2_t){ builtin(a.val, b.val, c.val) }; } \ - __ai uint16x8_t op##_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return (uint16x8_t){ builtin(a.val, b.val, c.val) }; } \ - __ai uint32x4_t op##_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return (uint32x4_t){ builtin(a.val, b.val, c.val) }; } \ - __ai uint64x2_t op##_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return (uint64x2_t){ builtin(a.val, b.val, c.val) }; } - -#define INTTYPES_NARROWING(op, builtin) \ - __ai int8x8_t op##_s16(int16x8_t a, int16x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \ - __ai int16x4_t op##_s32(int32x4_t a, int32x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \ - __ai int32x2_t op##_s64(int64x2_t a, int64x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \ - __ai uint8x8_t op##_u16(uint16x8_t a, uint16x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \ - __ai uint16x4_t op##_u32(uint32x4_t a, uint32x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \ - __ai uint32x2_t op##_u64(uint64x2_t a, uint64x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } - -#define INTTYPES_ADD_32(op, builtin) \ - __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \ - __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \ - __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \ - __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \ - __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \ - __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \ - __ai int8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){ builtin(a.val, b.val) }; } \ - __ai int16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){ builtin(a.val, b.val) }; } \ - __ai int32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){ builtin(a.val, b.val) }; } \ - __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){ builtin(a.val, b.val) }; } \ - __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){ builtin(a.val, b.val) }; } \ - __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } - -#define INTTYPES_ADD_64(op, builtin) \ - __ai int64x1_t op##_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){ builtin(a.val, b.val) }; } \ - __ai uint64x1_t op##_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){ builtin(a.val, b.val) }; } \ - __ai int64x2_t op##q_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){ builtin(a.val, b.val) }; } \ - __ai uint64x2_t op##q_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){ builtin(a.val, b.val) }; } - -#define FLOATTYPES_CMP(op, builtin) \ - __ai uint32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \ - __ai uint32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (uint32x4_t){ builtin(a.val, b.val) }; } - -#define INT_FLOAT_CMP_OP(op, cc) \ - __ai uint8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (uint8x8_t){(__neon_uint8x8_t)(a.val cc b.val)}; } \ - __ai uint16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (uint16x4_t){(__neon_uint16x4_t)(a.val cc b.val)}; } \ - __ai uint32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (uint32x2_t){(__neon_uint32x2_t)(a.val cc b.val)}; } \ - __ai uint32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (uint32x2_t){(__neon_uint32x2_t)(a.val cc b.val)}; } \ - __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){a.val cc b.val}; } \ - __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){a.val cc b.val}; } \ - __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){a.val cc b.val}; } \ - __ai uint8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (uint8x16_t){(__neon_uint8x16_t)(a.val cc b.val)}; } \ - __ai uint16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (uint16x8_t){(__neon_uint16x8_t)(a.val cc b.val)}; } \ - __ai uint32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (uint32x4_t){(__neon_uint32x4_t)(a.val cc b.val)}; } \ - __ai uint32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (uint32x4_t){(__neon_uint32x4_t)(a.val cc b.val)}; } \ - __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){a.val cc b.val}; } \ - __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){a.val cc b.val}; } \ - __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){a.val cc b.val}; } - -#define INT_UNARY(op, builtin) \ - __ai int8x8_t op##_s8(int8x8_t a) { return (int8x8_t){ builtin(a.val) }; } \ - __ai int16x4_t op##_s16(int16x4_t a) { return (int16x4_t){ builtin(a.val) }; } \ - __ai int32x2_t op##_s32(int32x2_t a) { return (int32x2_t){ builtin(a.val) }; } \ - __ai int8x16_t op##q_s8(int8x16_t a) { return (int8x16_t){ builtin(a.val) }; } \ - __ai int16x8_t op##q_s16(int16x8_t a) { return (int16x8_t){ builtin(a.val) }; } \ - __ai int32x4_t op##q_s32(int32x4_t a) { return (int32x4_t){ builtin(a.val) }; } - -#define FP_UNARY(op, builtin) \ - __ai float32x2_t op##_f32(float32x2_t a) { return (float32x2_t){ builtin(a.val) }; } \ - __ai float32x4_t op##q_f32(float32x4_t a) { return (float32x4_t){ builtin(a.val) }; } - -#define FP_BINARY(op, builtin) \ - __ai float32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){ builtin(a.val, b.val) }; } \ - __ai float32x4_t op##q_f32(float32x4_t a, float32x4_t b) { return (float32x4_t){ builtin(a.val, b.val) }; } - -#define INT_FP_PAIRWISE_ADD(op, builtin) \ - __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ builtin(a.val, b.val) }; } \ - __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ builtin(a.val, b.val) }; } \ - __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ builtin(a.val, b.val) }; } \ - __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ builtin(a.val, b.val) }; } \ - __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ builtin(a.val, b.val) }; } \ - __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ builtin(a.val, b.val) }; } \ - __ai float32x2_t op##_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){ builtin(a.val, b.val) }; } - -#define INT_LOGICAL_OP(op, lop) \ - __ai int8x8_t op##_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){ a.val lop b.val }; } \ - __ai int16x4_t op##_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){ a.val lop b.val }; } \ - __ai int32x2_t op##_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){ a.val lop b.val }; } \ - __ai int64x1_t op##_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){ a.val lop b.val }; } \ - __ai uint8x8_t op##_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){ a.val lop b.val }; } \ - __ai uint16x4_t op##_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){ a.val lop b.val }; } \ - __ai uint32x2_t op##_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){ a.val lop b.val }; } \ - __ai uint64x1_t op##_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){ a.val lop b.val }; } \ - __ai int8x16_t op##q_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){ a.val lop b.val }; } \ - __ai int16x8_t op##q_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){ a.val lop b.val }; } \ - __ai int32x4_t op##q_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){ a.val lop b.val }; } \ - __ai int64x2_t op##q_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){ a.val lop b.val }; } \ - __ai uint8x16_t op##q_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){ a.val lop b.val }; } \ - __ai uint16x8_t op##q_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){ a.val lop b.val }; } \ - __ai uint32x4_t op##q_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){ a.val lop b.val }; } \ - __ai uint64x2_t op##q_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){ a.val lop b.val }; } - -// vector add -__ai int8x8_t vadd_s8(int8x8_t a, int8x8_t b) { return (int8x8_t){a.val + b.val}; } -__ai int16x4_t vadd_s16(int16x4_t a, int16x4_t b) { return (int16x4_t){a.val + b.val}; } -__ai int32x2_t vadd_s32(int32x2_t a, int32x2_t b) { return (int32x2_t){a.val + b.val}; } -__ai int64x1_t vadd_s64(int64x1_t a, int64x1_t b) { return (int64x1_t){a.val + b.val}; } -__ai float32x2_t vadd_f32(float32x2_t a, float32x2_t b) { return (float32x2_t){a.val + b.val}; } -__ai uint8x8_t vadd_u8(uint8x8_t a, uint8x8_t b) { return (uint8x8_t){a.val + b.val}; } -__ai uint16x4_t vadd_u16(uint16x4_t a, uint16x4_t b) { return (uint16x4_t){a.val + b.val}; } -__ai uint32x2_t vadd_u32(uint32x2_t a, uint32x2_t b) { return (uint32x2_t){a.val + b.val}; } -__ai uint64x1_t vadd_u64(uint64x1_t a, uint64x1_t b) { return (uint64x1_t){a.val + b.val}; } -__ai int8x16_t vaddq_s8(int8x16_t a, int8x16_t b) { return (int8x16_t){a.val + b.val}; } -__ai int16x8_t vaddq_s16(int16x8_t a, int16x8_t b) { return (int16x8_t){a.val + b.val}; } -__ai int32x4_t vaddq_s32(int32x4_t a, int32x4_t b) { return (int32x4_t){a.val + b.val}; } -__ai int64x2_t vaddq_s64(int64x2_t a, int64x2_t b) { return (int64x2_t){a.val + b.val}; } -__ai float32x4_t vaddq_f32(float32x4_t a, float32x4_t b) { return (float32x4_t){a.val + b.val}; } -__ai uint8x16_t vaddq_u8(uint8x16_t a, uint8x16_t b) { return (uint8x16_t){a.val + b.val}; } -__ai uint16x8_t vaddq_u16(uint16x8_t a, uint16x8_t b) { return (uint16x8_t){a.val + b.val}; } -__ai uint32x4_t vaddq_u32(uint32x4_t a, uint32x4_t b) { return (uint32x4_t){a.val + b.val}; } -__ai uint64x2_t vaddq_u64(uint64x2_t a, uint64x2_t b) { return (uint64x2_t){a.val + b.val}; } - -// vector long add -INTTYPES_WIDENING(vaddl, __builtin_neon_vaddl) - -// vector wide add -INTTYPES_WIDE(vaddw, __builtin_neon_vaddw) - -// halving add -// rounding halving add -INTTYPES_ADD_32(vhadd, __builtin_neon_vhadd) -INTTYPES_ADD_32(vrhadd, __builtin_neon_vrhadd) - -// saturating add -INTTYPES_ADD_32(vqadd, __builtin_neon_vqadd) -INTTYPES_ADD_64(vqadd, __builtin_neon_vqadd) - -// add high half -// rounding add high half -INTTYPES_NARROWING(vaddhn, __builtin_neon_vaddhn) -INTTYPES_NARROWING(vraddhn, __builtin_neon_vraddhn) - -// multiply -// mul-poly - -// multiple accumulate -// multiple subtract - -// multiple accumulate long -// multiple subtract long -INTTYPES_WIDENING_MUL(vmlal, __builtin_neon_vmlal) -INTTYPES_WIDENING_MUL(vmlsl, __builtin_neon_vmlsl) - -// saturating doubling multiply high -// saturating rounding doubling multiply high - -// saturating doubling multiply accumulate long -// saturating doubling multiply subtract long - -// long multiply -// long multiply-poly -INTTYPES_WIDENING(vmull, __builtin_neon_vmull) -__ai poly16x8_t vmull_p8(poly8x8_t a, poly8x8_t b) { return (poly16x8_t){ __builtin_neon_vmull(a.val, b.val) }; } - -// saturating doubling long multiply - -// subtract - -// long subtract -INTTYPES_WIDENING(vsubl, __builtin_neon_vsubl) - -// wide subtract -INTTYPES_WIDE(vsubw, __builtin_neon_vsubw) - -// saturating subtract -INTTYPES_ADD_32(vqsub, __builtin_neon_vqsub) -INTTYPES_ADD_64(vqsub, __builtin_neon_vqsub) - -// halving subtract -INTTYPES_ADD_32(vhsub, __builtin_neon_vhsub) - -// subtract high half -// rounding subtract high half -INTTYPES_NARROWING(vsubhn, __builtin_neon_vsubhn) -INTTYPES_NARROWING(vrsubhn, __builtin_neon_vrsubhn) - -// compare eq -// compare ge -// compare le -// compare gt -// compare lt -INT_FLOAT_CMP_OP(vceq, ==) -INT_FLOAT_CMP_OP(vcge, >=) -INT_FLOAT_CMP_OP(vcle, <=) -INT_FLOAT_CMP_OP(vcgt, >) -INT_FLOAT_CMP_OP(vclt, <) - -// compare eq-poly - -// compare abs ge -// compare abs le -// compare abs gt -// compare abs lt -FLOATTYPES_CMP(vcage, __builtin_neon_vcage) -FLOATTYPES_CMP(vcale, __builtin_neon_vcale) -FLOATTYPES_CMP(vcagt, __builtin_neon_vcagt) -FLOATTYPES_CMP(vcalt, __builtin_neon_vcalt) - -// test bits - -// abs diff -INTTYPES_ADD_32(vabd, __builtin_neon_vabd) -FP_BINARY(vabd, __builtin_neon_vabd) - -// abs diff long -INTTYPES_WIDENING(vabdl, __builtin_neon_vabdl) - -// abs diff accumulate -// abs diff accumulate long - -// max -// min -INTTYPES_ADD_32(vmax, __builtin_neon_vmax) -FP_BINARY(vmax, __builtin_neon_vmax) -INTTYPES_ADD_32(vmin, __builtin_neon_vmin) -FP_BINARY(vmin, __builtin_neon_vmin) - -// pairwise add -// pairwise max -// pairwise min -INT_FP_PAIRWISE_ADD(vpadd, __builtin_neon_vpadd) -INT_FP_PAIRWISE_ADD(vpmax, __builtin_neon_vpmax) -INT_FP_PAIRWISE_ADD(vpmin, __builtin_neon_vpmin) - -// long pairwise add -// long pairwise add accumulate - -// recip -// recip sqrt -FP_BINARY(vrecps, __builtin_neon_vrecps) -FP_BINARY(vrsqrts, __builtin_neon_vrsqrts) - -// shl by vec -// saturating shl by vec -// rounding shl by vec -// saturating rounding shl by vec - -// shr by constant -// shl by constant -// rounding shr by constant -// shr by constant and accumulate -// rounding shr by constant and accumulate -// saturating shl by constant -// s->u saturating shl by constant -// narrowing saturating shr by constant -// s->u narrowing saturating shr by constant -// s->u rounding narrowing saturating shr by constant -// narrowing saturating shr by constant -// rounding narrowing shr by constant -// rounding narrowing saturating shr by constant -// widening shl by constant - -// shr and insert -// shl and insert - -// loads and stores, single vector -// loads and stores, lane -// loads, dupe - -// loads and stores, arrays - -// vget,vgetq lane -// vset, vsetq lane - -// vcreate -// vdup, vdupq -// vmov, vmovq -// vdup_lane, vdupq_lane -// vcombine -// vget_high, vget_low - -// vcvt {u,s} <-> f, f <-> f16 -// narrow -// long move (unpack) -// saturating narrow -// saturating narrow s->u - -// table lookup -// extended table lookup - -// mla with scalar -// widening mla with scalar -// widening saturating doubling mla with scalar -// mls with scalar -// widening mls with scalar -// widening saturating doubling mls with scalar -// mul by scalar -// long mul with scalar -// long mul by scalar -// saturating doubling long mul with scalar -// saturating doubling long mul by scalar -// saturating doubling mul high with scalar -// saturating doubling mul high by scalar -// saturating rounding doubling mul high with scalar -// saturating rounding doubling mul high by scalar -// mla with scalar -// widening mla with sclar -// widening saturating doubling mla with scalar -// mls with scalar -// widening mls with scalar -// widening saturating doubling mls with scalar - -// extract - -// endian swap (vrev) - -// negate - -// abs -// saturating abs -// saturating negate -// count leading signs -INT_UNARY(vabs, __builtin_neon_vabs) -FP_UNARY(vabs, __builtin_neon_vabs) -INT_UNARY(vqabs, __builtin_neon_vqabs) -INT_UNARY(vqneg, __builtin_neon_vqneg) -INT_UNARY(vcls, __builtin_neon_vcls) - -// count leading zeroes -// popcount - -// recip_est -// recip_sqrt_est - -// not-poly -// not - -// and -// or -// xor -// andn -// orn -INT_LOGICAL_OP(vand, &) -INT_LOGICAL_OP(vorr, |) -INT_LOGICAL_OP(veor, ^) -INT_LOGICAL_OP(vbic, &~) -INT_LOGICAL_OP(vorn, |~) - -// bitselect - -// transpose elts -// interleave elts -// deinterleave elts - -// vreinterpret - -#endif /* __ARM_NEON_H */ diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td deleted file mode 100644 index 7ffbfb4..0000000 --- a/lib/Headers/arm_neon.td +++ /dev/null @@ -1,341 +0,0 @@ -//===--- arm_neon.td - ARM NEON compiler interface ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the TableGen definitions from which the ARM NEON header -// file will be generated. See ARM document DUI0348B. -// -//===----------------------------------------------------------------------===// - -class Op; - -def OP_NONE : Op; -def OP_ADD : Op; -def OP_SUB : Op; -def OP_MUL : Op; -def OP_MLA : Op; -def OP_MLS : Op; -def OP_MUL_N : Op; -def OP_MLA_N : Op; -def OP_MLS_N : Op; -def OP_EQ : Op; -def OP_GE : Op; -def OP_LE : Op; -def OP_GT : Op; -def OP_LT : Op; -def OP_NEG : Op; -def OP_NOT : Op; -def OP_AND : Op; -def OP_OR : Op; -def OP_XOR : Op; -def OP_ANDN : Op; -def OP_ORN : Op; -def OP_CAST : Op; -def OP_HI : Op; -def OP_LO : Op; -def OP_CONC : Op; -def OP_DUP : Op; -def OP_SEL : Op; -def OP_REV64 : Op; -def OP_REV32 : Op; -def OP_REV16 : Op; - -class Inst { - string Prototype = p; - string Types = t; - Op Operand = o; - bit isShift = 0; -} - -// Used to generate Builtins.def -class SInst : Inst {} -class IInst : Inst {} -class WInst : Inst {} - -// prototype: return (arg, arg, ...) -// v: void -// t: best-fit integer (int/poly args) -// x: signed integer (int/float args) -// u: unsigned integer (int/float args) -// f: float (int args) -// d: default -// w: double width elements, same num elts -// n: double width elements, half num elts -// h: half width elements, double num elts -// e: half width elements, double num elts, unsigned -// i: constant int -// l: constant uint64 -// s: scalar of element type -// a: scalar of element type (splat to vector type) -// k: default elt width, double num elts -// #: array of default vectors -// p: pointer type -// c: const pointer type - -// sizes: -// c: char -// s: short -// i: int -// l: long -// f: float -// h: half-float - -// size modifiers: -// U: unsigned -// Q: 128b -// P: polynomial - -//////////////////////////////////////////////////////////////////////////////// -// E.3.1 Addition -def VADD : Inst<"ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>; -def VADDL : SInst<"wdd", "csiUcUsUi">; -def VADDW : SInst<"wwd", "csiUcUsUi">; -def VHADD : SInst<"ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VRHADD : SInst<"ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VQADD : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VADDHN : IInst<"dww", "csiUcUsUi">; -def VRADDHN : IInst<"dww", "csiUcUsUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.2 Multiplication -def VMUL : Inst<"ddd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_MUL>; -def VMLA : Inst<"dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; -def VMLAL : SInst<"wwdd", "csiUcUsUi">; -def VMLS : Inst<"dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; -def VMLSL : SInst<"wwdd", "csiUcUsUi">; -def VQDMULH : SInst<"ddd", "siQsQi">; -def VQRDMULH : SInst<"ddd", "siQsQi">; -def VQDMLAL : SInst<"wwdd", "si">; -def VQDMLSL : SInst<"wwdd", "si">; -def VMULL : SInst<"wdd", "csiUcUsUiPc">; -def VQDMULL : SInst<"wdd", "si">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.3 Subtraction -def VSUB : Inst<"ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>; -def VSUBL : SInst<"wdd", "csiUcUsUi">; -def VSUBW : SInst<"wwd", "csiUcUsUi">; -def VQSUB : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VHSUB : SInst<"ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VSUBHN : IInst<"dww", "csiUcUsUi">; -def VRSUBHN : IInst<"dww", "csiUcUsUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.4 Comparison -def VCEQ : Inst<"udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; -def VCGE : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; -def VCLE : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; -def VCGT : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; -def VCLT : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; -def VCAGE : IInst<"udd", "fQf">; -def VCALE : IInst<"udd", "fQf">; -def VCAGT : IInst<"udd", "fQf">; -def VCALT : IInst<"udd", "fQf">; -def VTST : WInst<"udd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.5 Absolute Difference -def VABD : SInst<"ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VABDL : SInst<"wdd", "csiUcUsUi">; -def VABA : SInst<"dddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VABAL : SInst<"wwdd", "csiUcUsUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.6 Max/Min -def VMAX : SInst<"ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VMIN : SInst<"ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.7 Pairdise Addition -def VPADD : IInst<"ddd", "csiUcUsUif">; -def VPADDL : SInst<"nd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VPADAL : SInst<"nnd", "csiUcUsUiQcQsQiQUcQUsQUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.8-9 Folding Max/Min -def VPMAX : SInst<"ddd", "csiUcUsUif">; -def VPMIN : SInst<"ddd", "csiUcUsUif">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.10 Reciprocal/Sqrt -def VRECPS : IInst<"ddd", "fQf">; -def VRSQRTS : IInst<"ddd", "fQf">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.11 Shifts by signed variable -def VSHL : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHL : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQRSHL : SInst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.12 Shifts by constant -let isShift = 1 in { -def VSHR_N : SInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSHL_N : IInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHR_N : SInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSRA_N : SInst<"dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSRA_N : SInst<"dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL_N : SInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHLU_N : SInst<"udi", "csilQcQsQiQl">; -def VSHRN_N : IInst<"hki", "silUsUiUl">; -def VQSHRUN_N : SInst<"eki", "sil">; -def VQRSHRUN_N : SInst<"eki", "sil">; -def VQSHRN_N : SInst<"hki", "silUsUiUl">; -def VRSHRN_N : IInst<"hki", "silUsUiUl">; -def VQRSHRN_N : SInst<"hki", "silUsUiUl">; -def VSHLL_N : SInst<"wdi", "csiUcUsUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.13 Shifts with insert -def VSRI_N : WInst<"dddi", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; -def VSLI_N : WInst<"dddi", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; -} - -//////////////////////////////////////////////////////////////////////////////// -// E.3.14 Loads and stores of a single vector -def VLD1 : WInst<"dc", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VLD1_LANE : WInst<"dci", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VLD1_DUP : WInst<"dc", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VST1 : WInst<"vpd", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VST1_LANE : WInst<"vpdi", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.15 Loads and stores of an N-element structure -def VLD2 : WInst<"2c", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VLD3 : WInst<"3c", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VLD4 : WInst<"4c", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VLD2_DUP : WInst<"2c", "UcUsUiUlcsilhfPcPs">; -def VLD3_DUP : WInst<"3c", "UcUsUiUlcsilhfPcPs">; -def VLD4_DUP : WInst<"4c", "UcUsUiUlcsilhfPcPs">; -def VLD2_LANE : WInst<"2ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; -def VLD3_LANE : WInst<"3ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; -def VLD4_LANE : WInst<"4ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; -def VST2 : WInst<"vp2", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VST3 : WInst<"vp3", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VST4 : WInst<"vp4", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">; -def VST2_LANE : WInst<"vp2i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; -def VST3_LANE : WInst<"vp3i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; -def VST4_LANE : WInst<"vp4i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.16 Extract lanes from a vector -def VGET_LANE : IInst<"sdi", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.17 Set lanes within a vector -def VSET_LANE : IInst<"dsdi", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.18 Initialize a vector from bit pattern -def VCREATE: Inst<"dl", "csihfUcUsUiUlPcPsl", OP_CAST>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.19 Set all lanes to same value -def VDUP_N : Inst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; -def VMOV_N : Inst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.20 Combining vectors -def VCOMBINE : Inst<"kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.21 Splitting vectors -def VGET_HIGH : Inst<"dk", "csilhfUcUsUiUlPcPs", OP_HI>; -def VGET_LOW : Inst<"dk", "csilhfUcUsUiUlPcPs", OP_LO>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.22 Converting vectors -def VCVT_S32 : SInst<"xd", "fQf">; -def VCVT_U32 : SInst<"ud", "fQf">; -def VCVT_F16 : SInst<"hk", "f">; -def VCVT_N_S32 : SInst<"xdi", "fQf">; -def VCVT_N_U32 : SInst<"udi", "fQf">; -def VCVT_F32 : SInst<"fd", "iUiQiQUi">; -def VCVT_F32_F16 : SInst<"kh", "f">; -def VCVT_N_F32 : SInst<"fdi", "iUiQiQUi">; -def VMOVN : IInst<"hk", "silUsUiUl">; -def VMOVL : SInst<"wd", "csiUcUsUi">; -def VQMOVN : SInst<"hk", "silUsUiUl">; -def VQMOVUN : SInst<"ek", "sil">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.23-24 Table lookup, Extended table lookup -def VTBL1 : WInst<"ddt", "UccPc">; -def VTBL2 : WInst<"d2t", "UccPc">; -def VTBL3 : WInst<"d3t", "UccPc">; -def VTBL4 : WInst<"d4t", "UccPc">; -def VTBX1 : WInst<"dddt", "UccPc">; -def VTBX2 : WInst<"dd2t", "UccPc">; -def VTBX3 : WInst<"dd3t", "UccPc">; -def VTBX4 : WInst<"dd4t", "UccPc">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.25 Operations with a scalar value -def VMLA_LANE : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">; -def VMLAL_LANE : SInst<"wwddi", "siUsUi">; -def VQDMLAL_LANE : SInst<"wwddi", "si">; -def VMLS_LANE : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">; -def VMLSL_LANE : SInst<"wwddi", "siUsUi">; -def VQDMLSL_LANE : SInst<"wwddi", "si">; -def VMUL_N : Inst<"dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; -def VMULL_N : SInst<"wda", "siUsUi">; -def VMULL_LANE : SInst<"wddi", "siUsUi">; -def VQDMULL_N : SInst<"wda", "si">; -def VQDMULL_LANE : SInst<"wddi", "si">; -def VQDMULH_N : SInst<"dda", "siQsQi">; -def VQDMULH_LANE : SInst<"dddi", "siQsQi">; -def VQRDMULH_N : SInst<"dda", "siQsQi">; -def VQRDMULH_LANE : SInst<"dddi", "siQsQi">; -def VMLA_N : Inst<"ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; -def VMLAL_N : SInst<"wwda", "siUsUi">; -def VQDMLAL_N : SInst<"wwda", "si">; -def VMLS_N : Inst<"ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; -def VMLSL_N : SInst<"wwda", "siUsUi">; -def VQDMLSL_N : SInst<"wwda", "si">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.26 Vector Extract -def VEXT : WInst<"dddi", "cUcPcsUsPsiUilUlQcQUcQPcQsQUsQPsQiQUiQlQUl">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.27 Reverse vector elements (sdap endianness) -def VREV64 : Inst<"dd", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", OP_REV64>; -def VREV32 : Inst<"dd", "csUcUsPcQcQsQUcQUsQPc", OP_REV32>; -def VREV16 : Inst<"dd", "cUcPcQcQUcQPc", OP_REV16>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.28 Other single operand arithmetic -def VABS : SInst<"dd", "csifQcQsQiQf">; -def VQABS : SInst<"dd", "csiQcQsQi">; -def VNEG : Inst<"dd", "csifQcQsQiQf", OP_NEG>; -def VQNEG : SInst<"dd", "csiQcQsQi">; -def VCLS : SInst<"dd", "csiQcQsQi">; -def VCLZ : IInst<"dd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VCNT : WInst<"dd", "UccPcQUcQcQPc">; -def VRECPE : SInst<"dd", "fUiQfQUi">; -def VRSQRTE : SInst<"dd", "fUiQfQUi">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.29 Logical operations -def VMVN : Inst<"dd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; -def VAND : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; -def VORR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; -def VEOR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; -def VBIC : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; -def VORN : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; -def VBSL : Inst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs", OP_SEL>; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.30 Transposition operations -def VTRN: WInst<"2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VZIP: WInst<"2dd", "csUcUsfPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VUZP: WInst<"2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; - -//////////////////////////////////////////////////////////////////////////////// -// E.3.31 Vector reinterpret cast operations -- cgit v1.1