diff options
Diffstat (limited to 'lib/Headers')
-rw-r--r-- | lib/Headers/CMakeLists.txt | 3 | ||||
-rw-r--r-- | lib/Headers/Makefile | 1 | ||||
-rw-r--r-- | lib/Headers/altivec.h | 4512 | ||||
-rw-r--r-- | lib/Headers/avxintrin.h | 1156 | ||||
-rw-r--r-- | lib/Headers/emmintrin.h | 9 | ||||
-rw-r--r-- | lib/Headers/immintrin.h | 59 | ||||
-rw-r--r-- | lib/Headers/mmintrin.h | 58 | ||||
-rw-r--r-- | lib/Headers/nmmintrin.h | 44 | ||||
-rw-r--r-- | lib/Headers/smmintrin.h | 12 | ||||
-rw-r--r-- | lib/Headers/stddef.h | 3 | ||||
-rw-r--r-- | lib/Headers/x86intrin.h | 31 | ||||
-rw-r--r-- | lib/Headers/xmmintrin.h | 51 |
12 files changed, 5781 insertions, 158 deletions
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 97a99d6..a1b5f50 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -1,12 +1,15 @@ set(files altivec.h + avxintrin.h emmintrin.h float.h + immintrin.h iso646.h limits.h mm_malloc.h mmintrin.h pmmintrin.h + smmintrin.h stdarg.h stdbool.h stddef.h diff --git a/lib/Headers/Makefile b/lib/Headers/Makefile index ebb8384..d75b1a2 100644 --- a/lib/Headers/Makefile +++ b/lib/Headers/Makefile @@ -38,6 +38,7 @@ all-local:: $(OBJHEADERS) PROJ_headers := $(DESTDIR)$(PROJ_prefix)/lib/clang/$(CLANG_VERSION)/include INSTHEADERS := $(addprefix $(PROJ_headers)/, $(HEADERS)) +INSTHEADERS += $(PROJ_headers)/arm_neon.h $(PROJ_headers): $(Verb) $(MKDIR) $@ diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index d3d5ad9..89bd259 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -45,18 +45,30 @@ vec_perm(vector signed char a, vector signed char b, vector unsigned char c); static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char a, vector unsigned char b, vector unsigned char c); +static vector bool char __ATTRS_o_ai +vec_perm(vector bool char a, vector bool char b, vector unsigned char c); + static vector short __ATTRS_o_ai vec_perm(vector short a, vector short b, vector unsigned char c); static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short a, vector unsigned short b, vector unsigned char c); +static vector bool short __ATTRS_o_ai +vec_perm(vector bool short a, vector bool short b, vector unsigned char c); + +static vector pixel __ATTRS_o_ai +vec_perm(vector pixel a, vector pixel b, vector unsigned char c); + static vector int __ATTRS_o_ai vec_perm(vector int a, vector int b, vector unsigned char c); static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int a, vector unsigned int b, vector unsigned char c); +static vector bool int __ATTRS_o_ai +vec_perm(vector bool int a, vector bool int b, vector unsigned char c); + static vector float __ATTRS_o_ai vec_perm(vector float a, vector float b, vector unsigned char c); @@ -123,36 +135,108 @@ vec_add(vector signed char a, vector signed char b) return a + b; } +static vector signed char __ATTRS_o_ai +vec_add(vector bool char a, vector signed char b) +{ + return (vector signed char)a + b; +} + +static vector signed char __ATTRS_o_ai +vec_add(vector signed char a, vector bool char b) +{ + return a + (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char a, vector unsigned char b) { return a + b; } +static vector unsigned char __ATTRS_o_ai +vec_add(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a + b; +} + +static vector unsigned char __ATTRS_o_ai +vec_add(vector unsigned char a, vector bool char b) +{ + return a + (vector unsigned char)b; +} + static vector short __ATTRS_o_ai vec_add(vector short a, vector short b) { return a + b; } +static vector short __ATTRS_o_ai +vec_add(vector bool short a, vector short b) +{ + return (vector short)a + b; +} + +static vector short __ATTRS_o_ai +vec_add(vector short a, vector bool short b) +{ + return a + (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short a, vector unsigned short b) { return a + b; } +static vector unsigned short __ATTRS_o_ai +vec_add(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a + b; +} + +static vector unsigned short __ATTRS_o_ai +vec_add(vector unsigned short a, vector bool short b) +{ + return a + (vector unsigned short)b; +} + static vector int __ATTRS_o_ai vec_add(vector int a, vector int b) { return a + b; } +static vector int __ATTRS_o_ai +vec_add(vector bool int a, vector int b) +{ + return (vector int)a + b; +} + +static vector int __ATTRS_o_ai +vec_add(vector int a, vector bool int b) +{ + return a + (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int a, vector unsigned int b) { return a + b; } +static vector unsigned int __ATTRS_o_ai +vec_add(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a + b; +} + +static vector unsigned int __ATTRS_o_ai +vec_add(vector unsigned int a, vector bool int b) +{ + return a + (vector unsigned int)b; +} + static vector float __ATTRS_o_ai vec_add(vector float a, vector float b) { @@ -169,12 +253,36 @@ vec_vaddubm(vector signed char a, vector signed char b) return a + b; } +static vector signed char __ATTRS_o_ai +vec_vaddubm(vector bool char a, vector signed char b) +{ + return (vector signed char)a + b; +} + +static vector signed char __ATTRS_o_ai +vec_vaddubm(vector signed char a, vector bool char b) +{ + return a + (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char a, vector unsigned char b) { return a + b; } +static vector unsigned char __ATTRS_o_ai +vec_vaddubm(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a + b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubm(vector unsigned char a, vector bool char b) +{ + return a + (vector unsigned char)b; +} + /* vec_vadduhm */ #define __builtin_altivec_vadduhm vec_vadduhm @@ -185,12 +293,36 @@ vec_vadduhm(vector short a, vector short b) return a + b; } +static vector short __ATTRS_o_ai +vec_vadduhm(vector bool short a, vector short b) +{ + return (vector short)a + b; +} + +static vector short __ATTRS_o_ai +vec_vadduhm(vector short a, vector bool short b) +{ + return a + (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short a, vector unsigned short b) { return a + b; } +static vector unsigned short __ATTRS_o_ai +vec_vadduhm(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a + b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhm(vector unsigned short a, vector bool short b) +{ + return a + (vector unsigned short)b; +} + /* vec_vadduwm */ #define __builtin_altivec_vadduwm vec_vadduwm @@ -201,12 +333,36 @@ vec_vadduwm(vector int a, vector int b) return a + b; } +static vector int __ATTRS_o_ai +vec_vadduwm(vector bool int a, vector int b) +{ + return (vector int)a + b; +} + +static vector int __ATTRS_o_ai +vec_vadduwm(vector int a, vector bool int b) +{ + return a + (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int a, vector unsigned int b) { return a + b; } +static vector unsigned int __ATTRS_o_ai +vec_vadduwm(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a + b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduwm(vector unsigned int a, vector bool int b) +{ + return a + (vector unsigned int)b; +} + /* vec_vaddfp */ #define __builtin_altivec_vaddfp vec_vaddfp @@ -241,84 +397,228 @@ vec_adds(vector signed char a, vector signed char b) return __builtin_altivec_vaddsbs(a, b); } +static vector signed char __ATTRS_o_ai +vec_adds(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vaddsbs((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_adds(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vaddsbs(a, (vector signed char)b); +} + static vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vaddubs(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_adds(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vaddubs((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_adds(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vaddubs(a, (vector unsigned char)b); +} + static vector short __ATTRS_o_ai vec_adds(vector short a, vector short b) { return __builtin_altivec_vaddshs(a, b); } +static vector short __ATTRS_o_ai +vec_adds(vector bool short a, vector short b) +{ + return __builtin_altivec_vaddshs((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_adds(vector short a, vector bool short b) +{ + return __builtin_altivec_vaddshs(a, (vector short)b); +} + static vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vadduhs(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_adds(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vadduhs((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_adds(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vadduhs(a, (vector unsigned short)b); +} + static vector int __ATTRS_o_ai vec_adds(vector int a, vector int b) { return __builtin_altivec_vaddsws(a, b); } +static vector int __ATTRS_o_ai +vec_adds(vector bool int a, vector int b) +{ + return __builtin_altivec_vaddsws((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_adds(vector int a, vector bool int b) +{ + return __builtin_altivec_vaddsws(a, (vector int)b); +} + static vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vadduws(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_adds(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vadduws((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_adds(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vadduws(a, (vector unsigned int)b); +} + /* vec_vaddsbs */ -static vector signed char __attribute__((__always_inline__)) +static vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char a, vector signed char b) { return __builtin_altivec_vaddsbs(a, b); } +static vector signed char __ATTRS_o_ai +vec_vaddsbs(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vaddsbs((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_vaddsbs(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vaddsbs(a, (vector signed char)b); +} + /* vec_vaddubs */ -static vector unsigned char __attribute__((__always_inline__)) +static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vaddubs(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_vaddubs(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vaddubs((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubs(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vaddubs(a, (vector unsigned char)b); +} + /* vec_vaddshs */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vaddshs(vector short a, vector short b) { return __builtin_altivec_vaddshs(a, b); } +static vector short __ATTRS_o_ai +vec_vaddshs(vector bool short a, vector short b) +{ + return __builtin_altivec_vaddshs((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_vaddshs(vector short a, vector bool short b) +{ + return __builtin_altivec_vaddshs(a, (vector short)b); +} + /* vec_vadduhs */ -static vector unsigned short __attribute__((__always_inline__)) +static vector unsigned short __ATTRS_o_ai vec_vadduhs(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vadduhs(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_vadduhs(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vadduhs((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhs(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vadduhs(a, (vector unsigned short)b); +} + /* vec_vaddsws */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vaddsws(vector int a, vector int b) { return __builtin_altivec_vaddsws(a, b); } +static vector int __ATTRS_o_ai +vec_vaddsws(vector bool int a, vector int b) +{ + return __builtin_altivec_vaddsws((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_vaddsws(vector int a, vector bool int b) +{ + return __builtin_altivec_vaddsws(a, (vector int)b); +} + /* vec_vadduws */ -static vector unsigned int __attribute__((__always_inline__)) +static vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vadduws(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_vadduws(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vadduws((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduws(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vadduws(a, (vector unsigned int)b); +} + /* vec_and */ #define __builtin_altivec_vand vec_and @@ -329,36 +629,126 @@ vec_and(vector signed char a, vector signed char b) return a & b; } +static vector signed char __ATTRS_o_ai +vec_and(vector bool char a, vector signed char b) +{ + return (vector signed char)a & b; +} + +static vector signed char __ATTRS_o_ai +vec_and(vector signed char a, vector bool char b) +{ + return a & (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char a, vector unsigned char b) { return a & b; } +static vector unsigned char __ATTRS_o_ai +vec_and(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a & b; +} + +static vector unsigned char __ATTRS_o_ai +vec_and(vector unsigned char a, vector bool char b) +{ + return a & (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_and(vector bool char a, vector bool char b) +{ + return a & b; +} + static vector short __ATTRS_o_ai vec_and(vector short a, vector short b) { return a & b; } +static vector short __ATTRS_o_ai +vec_and(vector bool short a, vector short b) +{ + return (vector short)a & b; +} + +static vector short __ATTRS_o_ai +vec_and(vector short a, vector bool short b) +{ + return a & (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short a, vector unsigned short b) { return a & b; } +static vector unsigned short __ATTRS_o_ai +vec_and(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a & b; +} + +static vector unsigned short __ATTRS_o_ai +vec_and(vector unsigned short a, vector bool short b) +{ + return a & (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_and(vector bool short a, vector bool short b) +{ + return a & b; +} + static vector int __ATTRS_o_ai vec_and(vector int a, vector int b) { return a & b; } +static vector int __ATTRS_o_ai +vec_and(vector bool int a, vector int b) +{ + return (vector int)a & b; +} + +static vector int __ATTRS_o_ai +vec_and(vector int a, vector bool int b) +{ + return a & (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int a, vector unsigned int b) { return a & b; } +static vector unsigned int __ATTRS_o_ai +vec_and(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a & b; +} + +static vector unsigned int __ATTRS_o_ai +vec_and(vector unsigned int a, vector bool int b) +{ + return a & (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_and(vector bool int a, vector bool int b) +{ + return a & b; +} + static vector float __ATTRS_o_ai vec_and(vector float a, vector float b) { @@ -366,6 +756,20 @@ vec_and(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_and(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a & (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_and(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a & (vector unsigned int)b; + return (vector float)res; +} + /* vec_vand */ static vector signed char __ATTRS_o_ai @@ -374,36 +778,126 @@ vec_vand(vector signed char a, vector signed char b) return a & b; } +static vector signed char __ATTRS_o_ai +vec_vand(vector bool char a, vector signed char b) +{ + return (vector signed char)a & b; +} + +static vector signed char __ATTRS_o_ai +vec_vand(vector signed char a, vector bool char b) +{ + return a & (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char a, vector unsigned char b) { return a & b; } +static vector unsigned char __ATTRS_o_ai +vec_vand(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a & b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vand(vector unsigned char a, vector bool char b) +{ + return a & (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_vand(vector bool char a, vector bool char b) +{ + return a & b; +} + static vector short __ATTRS_o_ai vec_vand(vector short a, vector short b) { return a & b; } +static vector short __ATTRS_o_ai +vec_vand(vector bool short a, vector short b) +{ + return (vector short)a & b; +} + +static vector short __ATTRS_o_ai +vec_vand(vector short a, vector bool short b) +{ + return a & (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short a, vector unsigned short b) { return a & b; } +static vector unsigned short __ATTRS_o_ai +vec_vand(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a & b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vand(vector unsigned short a, vector bool short b) +{ + return a & (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_vand(vector bool short a, vector bool short b) +{ + return a & b; +} + static vector int __ATTRS_o_ai vec_vand(vector int a, vector int b) { return a & b; } +static vector int __ATTRS_o_ai +vec_vand(vector bool int a, vector int b) +{ + return (vector int)a & b; +} + +static vector int __ATTRS_o_ai +vec_vand(vector int a, vector bool int b) +{ + return a & (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int a, vector unsigned int b) { return a & b; } +static vector unsigned int __ATTRS_o_ai +vec_vand(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a & b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vand(vector unsigned int a, vector bool int b) +{ + return a & (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_vand(vector bool int a, vector bool int b) +{ + return a & b; +} + static vector float __ATTRS_o_ai vec_vand(vector float a, vector float b) { @@ -411,6 +905,20 @@ vec_vand(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_vand(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a & (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_vand(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a & (vector unsigned int)b; + return (vector float)res; +} + /* vec_andc */ #define __builtin_altivec_vandc vec_andc @@ -421,36 +929,126 @@ vec_andc(vector signed char a, vector signed char b) return a & ~b; } +static vector signed char __ATTRS_o_ai +vec_andc(vector bool char a, vector signed char b) +{ + return (vector signed char)a & ~b; +} + +static vector signed char __ATTRS_o_ai +vec_andc(vector signed char a, vector bool char b) +{ + return a & ~(vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char a, vector unsigned char b) { return a & ~b; } +static vector unsigned char __ATTRS_o_ai +vec_andc(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a & ~b; +} + +static vector unsigned char __ATTRS_o_ai +vec_andc(vector unsigned char a, vector bool char b) +{ + return a & ~(vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_andc(vector bool char a, vector bool char b) +{ + return a & ~b; +} + static vector short __ATTRS_o_ai vec_andc(vector short a, vector short b) { return a & ~b; } +static vector short __ATTRS_o_ai +vec_andc(vector bool short a, vector short b) +{ + return (vector short)a & ~b; +} + +static vector short __ATTRS_o_ai +vec_andc(vector short a, vector bool short b) +{ + return a & ~(vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short a, vector unsigned short b) { return a & ~b; } +static vector unsigned short __ATTRS_o_ai +vec_andc(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a & ~b; +} + +static vector unsigned short __ATTRS_o_ai +vec_andc(vector unsigned short a, vector bool short b) +{ + return a & ~(vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_andc(vector bool short a, vector bool short b) +{ + return a & ~b; +} + static vector int __ATTRS_o_ai vec_andc(vector int a, vector int b) { return a & ~b; } +static vector int __ATTRS_o_ai +vec_andc(vector bool int a, vector int b) +{ + return (vector int)a & ~b; +} + +static vector int __ATTRS_o_ai +vec_andc(vector int a, vector bool int b) +{ + return a & ~(vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int a, vector unsigned int b) { return a & ~b; } +static vector unsigned int __ATTRS_o_ai +vec_andc(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a & ~b; +} + +static vector unsigned int __ATTRS_o_ai +vec_andc(vector unsigned int a, vector bool int b) +{ + return a & ~(vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_andc(vector bool int a, vector bool int b) +{ + return a & ~b; +} + static vector float __ATTRS_o_ai vec_andc(vector float a, vector float b) { @@ -458,6 +1056,20 @@ vec_andc(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_andc(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a & ~(vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_andc(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a & ~(vector unsigned int)b; + return (vector float)res; +} + /* vec_vandc */ static vector signed char __ATTRS_o_ai @@ -466,36 +1078,126 @@ vec_vandc(vector signed char a, vector signed char b) return a & ~b; } +static vector signed char __ATTRS_o_ai +vec_vandc(vector bool char a, vector signed char b) +{ + return (vector signed char)a & ~b; +} + +static vector signed char __ATTRS_o_ai +vec_vandc(vector signed char a, vector bool char b) +{ + return a & ~(vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char a, vector unsigned char b) { return a & ~b; } +static vector unsigned char __ATTRS_o_ai +vec_vandc(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a & ~b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vandc(vector unsigned char a, vector bool char b) +{ + return a & ~(vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_vandc(vector bool char a, vector bool char b) +{ + return a & ~b; +} + static vector short __ATTRS_o_ai vec_vandc(vector short a, vector short b) { return a & ~b; } +static vector short __ATTRS_o_ai +vec_vandc(vector bool short a, vector short b) +{ + return (vector short)a & ~b; +} + +static vector short __ATTRS_o_ai +vec_vandc(vector short a, vector bool short b) +{ + return a & ~(vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short a, vector unsigned short b) { return a & ~b; } +static vector unsigned short __ATTRS_o_ai +vec_vandc(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a & ~b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vandc(vector unsigned short a, vector bool short b) +{ + return a & ~(vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_vandc(vector bool short a, vector bool short b) +{ + return a & ~b; +} + static vector int __ATTRS_o_ai vec_vandc(vector int a, vector int b) { return a & ~b; } +static vector int __ATTRS_o_ai +vec_vandc(vector bool int a, vector int b) +{ + return (vector int)a & ~b; +} + +static vector int __ATTRS_o_ai +vec_vandc(vector int a, vector bool int b) +{ + return a & ~(vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int a, vector unsigned int b) { return a & ~b; } +static vector unsigned int __ATTRS_o_ai +vec_vandc(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a & ~b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vandc(vector unsigned int a, vector bool int b) +{ + return a & ~(vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_vandc(vector bool int a, vector bool int b) +{ + return a & ~b; +} + static vector float __ATTRS_o_ai vec_vandc(vector float a, vector float b) { @@ -503,6 +1205,20 @@ vec_vandc(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_vandc(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a & ~(vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_vandc(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a & ~(vector unsigned int)b; + return (vector float)res; +} + /* vec_avg */ static vector signed char __ATTRS_o_ai @@ -623,214 +1339,218 @@ vec_vcmpbfp(vector float a, vector float b) /* vec_cmpeq */ -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char a, vector signed char b) { - return __builtin_altivec_vcmpequb((vector char)a, (vector char)b); + return (vector bool char) + __builtin_altivec_vcmpequb((vector char)a, (vector char)b); } -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmpeq(vector unsigned char a, vector unsigned char b) { - return __builtin_altivec_vcmpequb((vector char)a, (vector char)b); + return (vector bool char) + __builtin_altivec_vcmpequb((vector char)a, (vector char)b); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmpeq(vector short a, vector short b) { - return __builtin_altivec_vcmpequh(a, b); + return (vector bool short)__builtin_altivec_vcmpequh(a, b); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmpeq(vector unsigned short a, vector unsigned short b) { - return __builtin_altivec_vcmpequh((vector short)a, (vector short)b); + return (vector bool short) + __builtin_altivec_vcmpequh((vector short)a, (vector short)b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpeq(vector int a, vector int b) { - return __builtin_altivec_vcmpequw(a, b); + return (vector bool int)__builtin_altivec_vcmpequw(a, b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpeq(vector unsigned int a, vector unsigned int b) { - return __builtin_altivec_vcmpequw((vector int)a, (vector int)b); + return (vector bool int) + __builtin_altivec_vcmpequw((vector int)a, (vector int)b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpeq(vector float a, vector float b) { - return __builtin_altivec_vcmpeqfp(a, b); + return (vector bool int)__builtin_altivec_vcmpeqfp(a, b); } /* vec_cmpge */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_cmpge(vector float a, vector float b) { - return __builtin_altivec_vcmpgefp(a, b); + return (vector bool int)__builtin_altivec_vcmpgefp(a, b); } /* vec_vcmpgefp */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_vcmpgefp(vector float a, vector float b) { - return __builtin_altivec_vcmpgefp(a, b); + return (vector bool int)__builtin_altivec_vcmpgefp(a, b); } /* vec_cmpgt */ -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char a, vector signed char b) { - return __builtin_altivec_vcmpgtsb(a, b); + return (vector bool char)__builtin_altivec_vcmpgtsb(a, b); } -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmpgt(vector unsigned char a, vector unsigned char b) { - return __builtin_altivec_vcmpgtub(a, b); + return (vector bool char)__builtin_altivec_vcmpgtub(a, b); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmpgt(vector short a, vector short b) { - return __builtin_altivec_vcmpgtsh(a, b); + return (vector bool short)__builtin_altivec_vcmpgtsh(a, b); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmpgt(vector unsigned short a, vector unsigned short b) { - return __builtin_altivec_vcmpgtuh(a, b); + return (vector bool short)__builtin_altivec_vcmpgtuh(a, b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpgt(vector int a, vector int b) { - return __builtin_altivec_vcmpgtsw(a, b); + return (vector bool int)__builtin_altivec_vcmpgtsw(a, b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpgt(vector unsigned int a, vector unsigned int b) { - return __builtin_altivec_vcmpgtuw(a, b); + return (vector bool int)__builtin_altivec_vcmpgtuw(a, b); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmpgt(vector float a, vector float b) { - return __builtin_altivec_vcmpgtfp(a, b); + return (vector bool int)__builtin_altivec_vcmpgtfp(a, b); } /* vec_vcmpgtsb */ -static vector /*bool*/ char __attribute__((__always_inline__)) +static vector bool char __attribute__((__always_inline__)) vec_vcmpgtsb(vector signed char a, vector signed char b) { - return __builtin_altivec_vcmpgtsb(a, b); + return (vector bool char)__builtin_altivec_vcmpgtsb(a, b); } /* vec_vcmpgtub */ -static vector /*bool*/ char __attribute__((__always_inline__)) +static vector bool char __attribute__((__always_inline__)) vec_vcmpgtub(vector unsigned char a, vector unsigned char b) { - return __builtin_altivec_vcmpgtub(a, b); + return (vector bool char)__builtin_altivec_vcmpgtub(a, b); } /* vec_vcmpgtsh */ -static vector /*bool*/ short __attribute__((__always_inline__)) +static vector bool short __attribute__((__always_inline__)) vec_vcmpgtsh(vector short a, vector short b) { - return __builtin_altivec_vcmpgtsh(a, b); + return (vector bool short)__builtin_altivec_vcmpgtsh(a, b); } /* vec_vcmpgtuh */ -static vector /*bool*/ short __attribute__((__always_inline__)) +static vector bool short __attribute__((__always_inline__)) vec_vcmpgtuh(vector unsigned short a, vector unsigned short b) { - return __builtin_altivec_vcmpgtuh(a, b); + return (vector bool short)__builtin_altivec_vcmpgtuh(a, b); } /* vec_vcmpgtsw */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_vcmpgtsw(vector int a, vector int b) { - return __builtin_altivec_vcmpgtsw(a, b); + return (vector bool int)__builtin_altivec_vcmpgtsw(a, b); } /* vec_vcmpgtuw */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_vcmpgtuw(vector unsigned int a, vector unsigned int b) { - return __builtin_altivec_vcmpgtuw(a, b); + return (vector bool int)__builtin_altivec_vcmpgtuw(a, b); } /* vec_vcmpgtfp */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_vcmpgtfp(vector float a, vector float b) { - return __builtin_altivec_vcmpgtfp(a, b); + return (vector bool int)__builtin_altivec_vcmpgtfp(a, b); } /* vec_cmple */ -static vector /*bool*/ int __attribute__((__always_inline__)) +static vector bool int __attribute__((__always_inline__)) vec_cmple(vector float a, vector float b) { - return __builtin_altivec_vcmpgefp(b, a); + return (vector bool int)__builtin_altivec_vcmpgefp(b, a); } /* vec_cmplt */ -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmplt(vector signed char a, vector signed char b) { - return __builtin_altivec_vcmpgtsb(b, a); + return (vector bool char)__builtin_altivec_vcmpgtsb(b, a); } -static vector /*bool*/ char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char a, vector unsigned char b) { - return __builtin_altivec_vcmpgtub(b, a); + return (vector bool char)__builtin_altivec_vcmpgtub(b, a); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmplt(vector short a, vector short b) { - return __builtin_altivec_vcmpgtsh(b, a); + return (vector bool short)__builtin_altivec_vcmpgtsh(b, a); } -static vector /*bool*/ short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short a, vector unsigned short b) { - return __builtin_altivec_vcmpgtuh(b, a); + return (vector bool short)__builtin_altivec_vcmpgtuh(b, a); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmplt(vector int a, vector int b) { - return __builtin_altivec_vcmpgtsw(b, a); + return (vector bool int)__builtin_altivec_vcmpgtsw(b, a); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int a, vector unsigned int b) { - return __builtin_altivec_vcmpgtuw(b, a); + return (vector bool int)__builtin_altivec_vcmpgtuw(b, a); } -static vector /*bool*/ int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_cmplt(vector float a, vector float b) { - return __builtin_altivec_vcmpgtfp(b, a); + return (vector bool int)__builtin_altivec_vcmpgtfp(b, a); } /* vec_ctf */ @@ -1001,6 +1721,12 @@ vec_ld(int a, unsigned char *b) return (vector unsigned char)__builtin_altivec_lvx(a, b); } +static vector bool char __ATTRS_o_ai +vec_ld(int a, vector bool char *b) +{ + return (vector bool char)__builtin_altivec_lvx(a, b); +} + static vector short __ATTRS_o_ai vec_ld(int a, vector short *b) { @@ -1025,6 +1751,18 @@ vec_ld(int a, unsigned short *b) return (vector unsigned short)__builtin_altivec_lvx(a, b); } +static vector bool short __ATTRS_o_ai +vec_ld(int a, vector bool short *b) +{ + return (vector bool short)__builtin_altivec_lvx(a, b); +} + +static vector pixel __ATTRS_o_ai +vec_ld(int a, vector pixel *b) +{ + return (vector pixel)__builtin_altivec_lvx(a, b); +} + static vector int __ATTRS_o_ai vec_ld(int a, vector int *b) { @@ -1049,6 +1787,12 @@ vec_ld(int a, unsigned int *b) return (vector unsigned int)__builtin_altivec_lvx(a, b); } +static vector bool int __ATTRS_o_ai +vec_ld(int a, vector bool int *b) +{ + return (vector bool int)__builtin_altivec_lvx(a, b); +} + static vector float __ATTRS_o_ai vec_ld(int a, vector float *b) { @@ -1087,6 +1831,12 @@ vec_lvx(int a, unsigned char *b) return (vector unsigned char)__builtin_altivec_lvx(a, b); } +static vector bool char __ATTRS_o_ai +vec_lvx(int a, vector bool char *b) +{ + return (vector bool char)__builtin_altivec_lvx(a, b); +} + static vector short __ATTRS_o_ai vec_lvx(int a, vector short *b) { @@ -1111,6 +1861,18 @@ vec_lvx(int a, unsigned short *b) return (vector unsigned short)__builtin_altivec_lvx(a, b); } +static vector bool short __ATTRS_o_ai +vec_lvx(int a, vector bool short *b) +{ + return (vector bool short)__builtin_altivec_lvx(a, b); +} + +static vector pixel __ATTRS_o_ai +vec_lvx(int a, vector pixel *b) +{ + return (vector pixel)__builtin_altivec_lvx(a, b); +} + static vector int __ATTRS_o_ai vec_lvx(int a, vector int *b) { @@ -1135,6 +1897,12 @@ vec_lvx(int a, unsigned int *b) return (vector unsigned int)__builtin_altivec_lvx(a, b); } +static vector bool int __ATTRS_o_ai +vec_lvx(int a, vector bool int *b) +{ + return (vector bool int)__builtin_altivec_lvx(a, b); +} + static vector float __ATTRS_o_ai vec_lvx(int a, vector float *b) { @@ -1265,6 +2033,12 @@ vec_ldl(int a, unsigned char *b) return (vector unsigned char)__builtin_altivec_lvxl(a, b); } +static vector bool char __ATTRS_o_ai +vec_ldl(int a, vector bool char *b) +{ + return (vector bool char)__builtin_altivec_lvxl(a, b); +} + static vector short __ATTRS_o_ai vec_ldl(int a, vector short *b) { @@ -1289,6 +2063,18 @@ vec_ldl(int a, unsigned short *b) return (vector unsigned short)__builtin_altivec_lvxl(a, b); } +static vector bool short __ATTRS_o_ai +vec_ldl(int a, vector bool short *b) +{ + return (vector bool short)__builtin_altivec_lvxl(a, b); +} + +static vector pixel __ATTRS_o_ai +vec_ldl(int a, vector pixel *b) +{ + return (vector pixel short)__builtin_altivec_lvxl(a, b); +} + static vector int __ATTRS_o_ai vec_ldl(int a, vector int *b) { @@ -1313,6 +2099,12 @@ vec_ldl(int a, unsigned int *b) return (vector unsigned int)__builtin_altivec_lvxl(a, b); } +static vector bool int __ATTRS_o_ai +vec_ldl(int a, vector bool int *b) +{ + return (vector bool int)__builtin_altivec_lvxl(a, b); +} + static vector float __ATTRS_o_ai vec_ldl(int a, vector float *b) { @@ -1351,6 +2143,12 @@ vec_lvxl(int a, unsigned char *b) return (vector unsigned char)__builtin_altivec_lvxl(a, b); } +static vector bool char __ATTRS_o_ai +vec_lvxl(int a, vector bool char *b) +{ + return (vector bool char)__builtin_altivec_lvxl(a, b); +} + static vector short __ATTRS_o_ai vec_lvxl(int a, vector short *b) { @@ -1375,6 +2173,18 @@ vec_lvxl(int a, unsigned short *b) return (vector unsigned short)__builtin_altivec_lvxl(a, b); } +static vector bool short __ATTRS_o_ai +vec_lvxl(int a, vector bool short *b) +{ + return (vector bool short)__builtin_altivec_lvxl(a, b); +} + +static vector pixel __ATTRS_o_ai +vec_lvxl(int a, vector pixel *b) +{ + return (vector pixel)__builtin_altivec_lvxl(a, b); +} + static vector int __ATTRS_o_ai vec_lvxl(int a, vector int *b) { @@ -1399,6 +2209,12 @@ vec_lvxl(int a, unsigned int *b) return (vector unsigned int)__builtin_altivec_lvxl(a, b); } +static vector bool int __ATTRS_o_ai +vec_lvxl(int a, vector bool int *b) +{ + return (vector bool int)__builtin_altivec_lvxl(a, b); +} + static vector float __ATTRS_o_ai vec_lvxl(int a, vector float *b) { @@ -1549,41 +2365,113 @@ vec_vmhaddshs(vector signed short a, vector signed short b, vector signed short /* vec_max */ static vector signed char __ATTRS_o_ai -vec_max(vector signed char a, vector signed char b) +vec_max(vector signed char a, vector signed char b) { return __builtin_altivec_vmaxsb(a, b); } +static vector signed char __ATTRS_o_ai +vec_max(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vmaxsb((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_max(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vmaxsb(a, (vector signed char)b); +} + static vector unsigned char __ATTRS_o_ai -vec_max(vector unsigned char a, vector unsigned char b) +vec_max(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vmaxub(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_max(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vmaxub((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_max(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vmaxub(a, (vector unsigned char)b); +} + static vector short __ATTRS_o_ai vec_max(vector short a, vector short b) { return __builtin_altivec_vmaxsh(a, b); } +static vector short __ATTRS_o_ai +vec_max(vector bool short a, vector short b) +{ + return __builtin_altivec_vmaxsh((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_max(vector short a, vector bool short b) +{ + return __builtin_altivec_vmaxsh(a, (vector short)b); +} + static vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vmaxuh(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_max(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vmaxuh((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_max(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vmaxuh(a, (vector unsigned short)b); +} + static vector int __ATTRS_o_ai vec_max(vector int a, vector int b) { return __builtin_altivec_vmaxsw(a, b); } +static vector int __ATTRS_o_ai +vec_max(vector bool int a, vector int b) +{ + return __builtin_altivec_vmaxsw((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_max(vector int a, vector bool int b) +{ + return __builtin_altivec_vmaxsw(a, (vector int)b); +} + static vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vmaxuw(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_max(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vmaxuw((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_max(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vmaxuw(a, (vector unsigned int)b); +} + static vector float __ATTRS_o_ai vec_max(vector float a, vector float b) { @@ -1592,52 +2480,124 @@ vec_max(vector float a, vector float b) /* vec_vmaxsb */ -static vector signed char __attribute__((__always_inline__)) -vec_vmaxsb(vector signed char a, vector signed char b) +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector signed char a, vector signed char b) { return __builtin_altivec_vmaxsb(a, b); } +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vmaxsb((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vmaxsb(a, (vector signed char)b); +} + /* vec_vmaxub */ -static vector unsigned char __attribute__((__always_inline__)) -vec_vmaxub(vector unsigned char a, vector unsigned char b) +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vmaxub(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vmaxub((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vmaxub(a, (vector unsigned char)b); +} + /* vec_vmaxsh */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vmaxsh(vector short a, vector short b) { return __builtin_altivec_vmaxsh(a, b); } +static vector short __ATTRS_o_ai +vec_vmaxsh(vector bool short a, vector short b) +{ + return __builtin_altivec_vmaxsh((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_vmaxsh(vector short a, vector bool short b) +{ + return __builtin_altivec_vmaxsh(a, (vector short)b); +} + /* vec_vmaxuh */ -static vector unsigned short __attribute__((__always_inline__)) +static vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vmaxuh(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_vmaxuh(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vmaxuh((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vmaxuh(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vmaxuh(a, (vector unsigned short)b); +} + /* vec_vmaxsw */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vmaxsw(vector int a, vector int b) { return __builtin_altivec_vmaxsw(a, b); } +static vector int __ATTRS_o_ai +vec_vmaxsw(vector bool int a, vector int b) +{ + return __builtin_altivec_vmaxsw((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_vmaxsw(vector int a, vector bool int b) +{ + return __builtin_altivec_vmaxsw(a, (vector int)b); +} + /* vec_vmaxuw */ -static vector unsigned int __attribute__((__always_inline__)) +static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vmaxuw(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_vmaxuw(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vmaxuw((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vmaxuw(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vmaxuw(a, (vector unsigned int)b); +} + /* vec_vmaxfp */ static vector float __attribute__((__always_inline__)) @@ -1664,6 +2624,14 @@ vec_mergeh(vector unsigned char a, vector unsigned char b) 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } +static vector bool char __ATTRS_o_ai +vec_mergeh(vector bool char a, vector bool char b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + static vector short __ATTRS_o_ai vec_mergeh(vector short a, vector short b) { @@ -1680,6 +2648,22 @@ vec_mergeh(vector unsigned short a, vector unsigned short b) 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } +static vector bool short __ATTRS_o_ai +vec_mergeh(vector bool short a, vector bool short b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector pixel __ATTRS_o_ai +vec_mergeh(vector pixel a, vector pixel b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + static vector int __ATTRS_o_ai vec_mergeh(vector int a, vector int b) { @@ -1696,6 +2680,14 @@ vec_mergeh(vector unsigned int a, vector unsigned int b) 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } +static vector bool int __ATTRS_o_ai +vec_mergeh(vector bool int a, vector bool int b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + static vector float __ATTRS_o_ai vec_mergeh(vector float a, vector float b) { @@ -1724,6 +2716,14 @@ vec_vmrghb(vector unsigned char a, vector unsigned char b) 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); } +static vector bool char __ATTRS_o_ai +vec_vmrghb(vector bool char a, vector bool char b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + /* vec_vmrghh */ #define __builtin_altivec_vmrghh vec_vmrghh @@ -1744,6 +2744,22 @@ vec_vmrghh(vector unsigned short a, vector unsigned short b) 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); } +static vector bool short __ATTRS_o_ai +vec_vmrghh(vector bool short a, vector bool short b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector pixel __ATTRS_o_ai +vec_vmrghh(vector pixel a, vector pixel b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + /* vec_vmrghw */ #define __builtin_altivec_vmrghw vec_vmrghw @@ -1764,6 +2780,14 @@ vec_vmrghw(vector unsigned int a, vector unsigned int b) 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); } +static vector bool int __ATTRS_o_ai +vec_vmrghw(vector bool int a, vector bool int b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + static vector float __ATTRS_o_ai vec_vmrghw(vector float a, vector float b) { @@ -1790,6 +2814,14 @@ vec_mergel(vector unsigned char a, vector unsigned char b) 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } +static vector bool char __ATTRS_o_ai +vec_mergel(vector bool char a, vector bool char b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + static vector short __ATTRS_o_ai vec_mergel(vector short a, vector short b) { @@ -1806,6 +2838,22 @@ vec_mergel(vector unsigned short a, vector unsigned short b) 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } +static vector bool short __ATTRS_o_ai +vec_mergel(vector bool short a, vector bool short b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector pixel __ATTRS_o_ai +vec_mergel(vector pixel a, vector pixel b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + static vector int __ATTRS_o_ai vec_mergel(vector int a, vector int b) { @@ -1822,6 +2870,14 @@ vec_mergel(vector unsigned int a, vector unsigned int b) 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } +static vector bool int __ATTRS_o_ai +vec_mergel(vector bool int a, vector bool int b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + static vector float __ATTRS_o_ai vec_mergel(vector float a, vector float b) { @@ -1850,6 +2906,14 @@ vec_vmrglb(vector unsigned char a, vector unsigned char b) 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); } +static vector bool char __ATTRS_o_ai +vec_vmrglb(vector bool char a, vector bool char b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + /* vec_vmrglh */ #define __builtin_altivec_vmrglh vec_vmrglh @@ -1870,6 +2934,22 @@ vec_vmrglh(vector unsigned short a, vector unsigned short b) 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); } +static vector bool short __ATTRS_o_ai +vec_vmrglh(vector bool short a, vector bool short b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector pixel __ATTRS_o_ai +vec_vmrglh(vector pixel a, vector pixel b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + /* vec_vmrglw */ #define __builtin_altivec_vmrglw vec_vmrglw @@ -1890,6 +2970,14 @@ vec_vmrglw(vector unsigned int a, vector unsigned int b) 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); } +static vector bool int __ATTRS_o_ai +vec_vmrglw(vector bool int a, vector bool int b) +{ + return vec_perm(a, b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + static vector float __ATTRS_o_ai vec_vmrglw(vector float a, vector float b) { @@ -1909,41 +2997,113 @@ vec_mfvscr(void) /* vec_min */ static vector signed char __ATTRS_o_ai -vec_min(vector signed char a, vector signed char b) +vec_min(vector signed char a, vector signed char b) { return __builtin_altivec_vminsb(a, b); } +static vector signed char __ATTRS_o_ai +vec_min(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vminsb((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_min(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vminsb(a, (vector signed char)b); +} + static vector unsigned char __ATTRS_o_ai -vec_min(vector unsigned char a, vector unsigned char b) +vec_min(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vminub(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_min(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vminub((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_min(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vminub(a, (vector unsigned char)b); +} + static vector short __ATTRS_o_ai vec_min(vector short a, vector short b) { return __builtin_altivec_vminsh(a, b); } +static vector short __ATTRS_o_ai +vec_min(vector bool short a, vector short b) +{ + return __builtin_altivec_vminsh((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_min(vector short a, vector bool short b) +{ + return __builtin_altivec_vminsh(a, (vector short)b); +} + static vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vminuh(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_min(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vminuh((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_min(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vminuh(a, (vector unsigned short)b); +} + static vector int __ATTRS_o_ai vec_min(vector int a, vector int b) { return __builtin_altivec_vminsw(a, b); } +static vector int __ATTRS_o_ai +vec_min(vector bool int a, vector int b) +{ + return __builtin_altivec_vminsw((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_min(vector int a, vector bool int b) +{ + return __builtin_altivec_vminsw(a, (vector int)b); +} + static vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vminuw(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_min(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vminuw((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_min(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vminuw(a, (vector unsigned int)b); +} + static vector float __ATTRS_o_ai vec_min(vector float a, vector float b) { @@ -1952,52 +3112,124 @@ vec_min(vector float a, vector float b) /* vec_vminsb */ -static vector signed char __attribute__((__always_inline__)) -vec_vminsb(vector signed char a, vector signed char b) +static vector signed char __ATTRS_o_ai +vec_vminsb(vector signed char a, vector signed char b) { return __builtin_altivec_vminsb(a, b); } +static vector signed char __ATTRS_o_ai +vec_vminsb(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vminsb((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_vminsb(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vminsb(a, (vector signed char)b); +} + /* vec_vminub */ -static vector unsigned char __attribute__((__always_inline__)) -vec_vminub(vector unsigned char a, vector unsigned char b) +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vminub(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vminub((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vminub(a, (vector unsigned char)b); +} + /* vec_vminsh */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vminsh(vector short a, vector short b) { return __builtin_altivec_vminsh(a, b); } +static vector short __ATTRS_o_ai +vec_vminsh(vector bool short a, vector short b) +{ + return __builtin_altivec_vminsh((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_vminsh(vector short a, vector bool short b) +{ + return __builtin_altivec_vminsh(a, (vector short)b); +} + /* vec_vminuh */ -static vector unsigned short __attribute__((__always_inline__)) +static vector unsigned short __ATTRS_o_ai vec_vminuh(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vminuh(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_vminuh(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vminuh((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vminuh(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vminuh(a, (vector unsigned short)b); +} + /* vec_vminsw */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vminsw(vector int a, vector int b) { return __builtin_altivec_vminsw(a, b); } +static vector int __ATTRS_o_ai +vec_vminsw(vector bool int a, vector int b) +{ + return __builtin_altivec_vminsw((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_vminsw(vector int a, vector bool int b) +{ + return __builtin_altivec_vminsw(a, (vector int)b); +} + /* vec_vminuw */ -static vector unsigned int __attribute__((__always_inline__)) +static vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vminuw(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_vminuw(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vminuw((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vminuw(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vminuw(a, (vector unsigned int)b); +} + /* vec_vminfp */ static vector float __attribute__((__always_inline__)) @@ -2179,6 +3411,12 @@ vec_mtvscr(vector unsigned char a) } static void __ATTRS_o_ai +vec_mtvscr(vector bool char a) +{ + __builtin_altivec_mtvscr((vector int)a); +} + +static void __ATTRS_o_ai vec_mtvscr(vector short a) { __builtin_altivec_mtvscr((vector int)a); @@ -2191,6 +3429,18 @@ vec_mtvscr(vector unsigned short a) } static void __ATTRS_o_ai +vec_mtvscr(vector bool short a) +{ + __builtin_altivec_mtvscr((vector int)a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector pixel a) +{ + __builtin_altivec_mtvscr((vector int)a); +} + +static void __ATTRS_o_ai vec_mtvscr(vector int a) { __builtin_altivec_mtvscr((vector int)a); @@ -2203,6 +3453,12 @@ vec_mtvscr(vector unsigned int a) } static void __ATTRS_o_ai +vec_mtvscr(vector bool int a) +{ + __builtin_altivec_mtvscr((vector int)a); +} + +static void __ATTRS_o_ai vec_mtvscr(vector float a) { __builtin_altivec_mtvscr((vector int)a); @@ -2356,6 +3612,12 @@ vec_nor(vector unsigned char a, vector unsigned char b) return ~(a | b); } +static vector bool char __ATTRS_o_ai +vec_nor(vector bool char a, vector bool char b) +{ + return ~(a | b); +} + static vector short __ATTRS_o_ai vec_nor(vector short a, vector short b) { @@ -2368,6 +3630,12 @@ vec_nor(vector unsigned short a, vector unsigned short b) return ~(a | b); } +static vector bool short __ATTRS_o_ai +vec_nor(vector bool short a, vector bool short b) +{ + return ~(a | b); +} + static vector int __ATTRS_o_ai vec_nor(vector int a, vector int b) { @@ -2380,6 +3648,12 @@ vec_nor(vector unsigned int a, vector unsigned int b) return ~(a | b); } +static vector bool int __ATTRS_o_ai +vec_nor(vector bool int a, vector bool int b) +{ + return ~(a | b); +} + static vector float __ATTRS_o_ai vec_nor(vector float a, vector float b) { @@ -2401,6 +3675,12 @@ vec_vnor(vector unsigned char a, vector unsigned char b) return ~(a | b); } +static vector bool char __ATTRS_o_ai +vec_vnor(vector bool char a, vector bool char b) +{ + return ~(a | b); +} + static vector short __ATTRS_o_ai vec_vnor(vector short a, vector short b) { @@ -2413,6 +3693,12 @@ vec_vnor(vector unsigned short a, vector unsigned short b) return ~(a | b); } +static vector bool short __ATTRS_o_ai +vec_vnor(vector bool short a, vector bool short b) +{ + return ~(a | b); +} + static vector int __ATTRS_o_ai vec_vnor(vector int a, vector int b) { @@ -2425,6 +3711,12 @@ vec_vnor(vector unsigned int a, vector unsigned int b) return ~(a | b); } +static vector bool int __ATTRS_o_ai +vec_vnor(vector bool int a, vector bool int b) +{ + return ~(a | b); +} + static vector float __ATTRS_o_ai vec_vnor(vector float a, vector float b) { @@ -2442,36 +3734,126 @@ vec_or(vector signed char a, vector signed char b) return a | b; } +static vector signed char __ATTRS_o_ai +vec_or(vector bool char a, vector signed char b) +{ + return (vector signed char)a | b; +} + +static vector signed char __ATTRS_o_ai +vec_or(vector signed char a, vector bool char b) +{ + return a | (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char a, vector unsigned char b) { return a | b; } +static vector unsigned char __ATTRS_o_ai +vec_or(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a | b; +} + +static vector unsigned char __ATTRS_o_ai +vec_or(vector unsigned char a, vector bool char b) +{ + return a | (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_or(vector bool char a, vector bool char b) +{ + return a | b; +} + static vector short __ATTRS_o_ai vec_or(vector short a, vector short b) { return a | b; } +static vector short __ATTRS_o_ai +vec_or(vector bool short a, vector short b) +{ + return (vector short)a | b; +} + +static vector short __ATTRS_o_ai +vec_or(vector short a, vector bool short b) +{ + return a | (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short a, vector unsigned short b) { return a | b; } +static vector unsigned short __ATTRS_o_ai +vec_or(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a | b; +} + +static vector unsigned short __ATTRS_o_ai +vec_or(vector unsigned short a, vector bool short b) +{ + return a | (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_or(vector bool short a, vector bool short b) +{ + return a | b; +} + static vector int __ATTRS_o_ai vec_or(vector int a, vector int b) { return a | b; } +static vector int __ATTRS_o_ai +vec_or(vector bool int a, vector int b) +{ + return (vector int)a | b; +} + +static vector int __ATTRS_o_ai +vec_or(vector int a, vector bool int b) +{ + return a | (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int a, vector unsigned int b) { return a | b; } +static vector unsigned int __ATTRS_o_ai +vec_or(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a | b; +} + +static vector unsigned int __ATTRS_o_ai +vec_or(vector unsigned int a, vector bool int b) +{ + return a | (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_or(vector bool int a, vector bool int b) +{ + return a | b; +} + static vector float __ATTRS_o_ai vec_or(vector float a, vector float b) { @@ -2479,6 +3861,20 @@ vec_or(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_or(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a | (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_or(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a | (vector unsigned int)b; + return (vector float)res; +} + /* vec_vor */ static vector signed char __ATTRS_o_ai @@ -2487,36 +3883,126 @@ vec_vor(vector signed char a, vector signed char b) return a | b; } +static vector signed char __ATTRS_o_ai +vec_vor(vector bool char a, vector signed char b) +{ + return (vector signed char)a | b; +} + +static vector signed char __ATTRS_o_ai +vec_vor(vector signed char a, vector bool char b) +{ + return a | (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char a, vector unsigned char b) { return a | b; } +static vector unsigned char __ATTRS_o_ai +vec_vor(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a | b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vor(vector unsigned char a, vector bool char b) +{ + return a | (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_vor(vector bool char a, vector bool char b) +{ + return a | b; +} + static vector short __ATTRS_o_ai vec_vor(vector short a, vector short b) { return a | b; } +static vector short __ATTRS_o_ai +vec_vor(vector bool short a, vector short b) +{ + return (vector short)a | b; +} + +static vector short __ATTRS_o_ai +vec_vor(vector short a, vector bool short b) +{ + return a | (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short a, vector unsigned short b) { return a | b; } +static vector unsigned short __ATTRS_o_ai +vec_vor(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a | b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vor(vector unsigned short a, vector bool short b) +{ + return a | (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_vor(vector bool short a, vector bool short b) +{ + return a | b; +} + static vector int __ATTRS_o_ai vec_vor(vector int a, vector int b) { return a | b; } +static vector int __ATTRS_o_ai +vec_vor(vector bool int a, vector int b) +{ + return (vector int)a | b; +} + +static vector int __ATTRS_o_ai +vec_vor(vector int a, vector bool int b) +{ + return a | (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int a, vector unsigned int b) { return a | b; } +static vector unsigned int __ATTRS_o_ai +vec_vor(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a | b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vor(vector unsigned int a, vector bool int b) +{ + return a | (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_vor(vector bool int a, vector bool int b) +{ + return a | b; +} + static vector float __ATTRS_o_ai vec_vor(vector float a, vector float b) { @@ -2524,6 +4010,20 @@ vec_vor(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_vor(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a | (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_vor(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a | (vector unsigned int)b; + return (vector float)res; +} + /* vec_pack */ static vector signed char __ATTRS_o_ai @@ -2542,6 +4042,14 @@ vec_pack(vector unsigned short a, vector unsigned short b) 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); } +static vector bool char __ATTRS_o_ai +vec_pack(vector bool short a, vector bool short b) +{ + return (vector bool char)vec_perm(a, b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + static vector short __ATTRS_o_ai vec_pack(vector int a, vector int b) { @@ -2558,6 +4066,14 @@ vec_pack(vector unsigned int a, vector unsigned int b) 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); } +static vector bool short __ATTRS_o_ai +vec_pack(vector bool int a, vector bool int b) +{ + return (vector bool short)vec_perm(a, b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + /* vec_vpkuhum */ #define __builtin_altivec_vpkuhum vec_vpkuhum @@ -2578,6 +4094,14 @@ vec_vpkuhum(vector unsigned short a, vector unsigned short b) 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); } +static vector bool char __ATTRS_o_ai +vec_vpkuhum(vector bool short a, vector bool short b) +{ + return (vector bool char)vec_perm(a, b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + /* vec_vpkuwum */ #define __builtin_altivec_vpkuwum vec_vpkuwum @@ -2598,6 +4122,14 @@ vec_vpkuwum(vector unsigned int a, vector unsigned int b) 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); } +static vector bool short __ATTRS_o_ai +vec_vpkuwum(vector bool int a, vector bool int b) +{ + return (vector bool short)vec_perm(a, b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + /* vec_packpx */ static vector pixel __attribute__((__always_inline__)) @@ -2740,6 +4272,12 @@ vec_perm(vector unsigned char a, vector unsigned char b, vector unsigned char c) return (vector unsigned char)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool char __ATTRS_o_ai +vec_perm(vector bool char a, vector bool char b, vector unsigned char c) +{ + return (vector bool char)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector short __ATTRS_o_ai vec_perm(vector short a, vector short b, vector unsigned char c) { @@ -2752,6 +4290,18 @@ vec_perm(vector unsigned short a, vector unsigned short b, vector unsigned char return (vector unsigned short)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool short __ATTRS_o_ai +vec_perm(vector bool short a, vector bool short b, vector unsigned char c) +{ + return (vector bool short)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + +vector pixel __ATTRS_o_ai +vec_perm(vector pixel a, vector pixel b, vector unsigned char c) +{ + return (vector pixel)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector int __ATTRS_o_ai vec_perm(vector int a, vector int b, vector unsigned char c) { @@ -2764,6 +4314,12 @@ vec_perm(vector unsigned int a, vector unsigned int b, vector unsigned char c) return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool int __ATTRS_o_ai +vec_perm(vector bool int a, vector bool int b, vector unsigned char c) +{ + return (vector bool int)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector float __ATTRS_o_ai vec_perm(vector float a, vector float b, vector unsigned char c) { @@ -2784,6 +4340,12 @@ vec_vperm(vector unsigned char a, vector unsigned char b, vector unsigned char c return (vector unsigned char)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool char __ATTRS_o_ai +vec_vperm(vector bool char a, vector bool char b, vector unsigned char c) +{ + return (vector bool char)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector short __ATTRS_o_ai vec_vperm(vector short a, vector short b, vector unsigned char c) { @@ -2796,6 +4358,18 @@ vec_vperm(vector unsigned short a, vector unsigned short b, vector unsigned char return (vector unsigned short)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool short __ATTRS_o_ai +vec_vperm(vector bool short a, vector bool short b, vector unsigned char c) +{ + return (vector bool short)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + +vector pixel __ATTRS_o_ai +vec_vperm(vector pixel a, vector pixel b, vector unsigned char c) +{ + return (vector pixel)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector int __ATTRS_o_ai vec_vperm(vector int a, vector int b, vector unsigned char c) { @@ -2808,6 +4382,12 @@ vec_vperm(vector unsigned int a, vector unsigned int b, vector unsigned char c) return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); } +vector bool int __ATTRS_o_ai +vec_vperm(vector bool int a, vector bool int b, vector unsigned char c) +{ + return (vector bool int)__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); +} + vector float __ATTRS_o_ai vec_vperm(vector float a, vector float b, vector unsigned char c) { @@ -2952,36 +4532,108 @@ vec_sel(vector signed char a, vector signed char b, vector unsigned char c) return (a & ~(vector signed char)c) | (b & (vector signed char)c); } +static vector signed char __ATTRS_o_ai +vec_sel(vector signed char a, vector signed char b, vector bool char c) +{ + return (a & ~(vector signed char)c) | (b & (vector signed char)c); +} + static vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char a, vector unsigned char b, vector unsigned char c) { return (a & ~c) | (b & c); } +static vector unsigned char __ATTRS_o_ai +vec_sel(vector unsigned char a, vector unsigned char b, vector bool char c) +{ + return (a & ~(vector unsigned char)c) | (b & (vector unsigned char)c); +} + +static vector bool char __ATTRS_o_ai +vec_sel(vector bool char a, vector bool char b, vector unsigned char c) +{ + return (a & ~(vector bool char)c) | (b & (vector bool char)c); +} + +static vector bool char __ATTRS_o_ai +vec_sel(vector bool char a, vector bool char b, vector bool char c) +{ + return (a & ~c) | (b & c); +} + static vector short __ATTRS_o_ai vec_sel(vector short a, vector short b, vector unsigned short c) { return (a & ~(vector short)c) | (b & (vector short)c); } +static vector short __ATTRS_o_ai +vec_sel(vector short a, vector short b, vector bool short c) +{ + return (a & ~(vector short)c) | (b & (vector short)c); +} + static vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short a, vector unsigned short b, vector unsigned short c) { return (a & ~c) | (b & c); } +static vector unsigned short __ATTRS_o_ai +vec_sel(vector unsigned short a, vector unsigned short b, vector bool short c) +{ + return (a & ~(vector unsigned short)c) | (b & (vector unsigned short)c); +} + +static vector bool short __ATTRS_o_ai +vec_sel(vector bool short a, vector bool short b, vector unsigned short c) +{ + return (a & ~(vector bool short)c) | (b & (vector bool short)c); +} + +static vector bool short __ATTRS_o_ai +vec_sel(vector bool short a, vector bool short b, vector bool short c) +{ + return (a & ~c) | (b & c); +} + static vector int __ATTRS_o_ai vec_sel(vector int a, vector int b, vector unsigned int c) { return (a & ~(vector int)c) | (b & (vector int)c); } +static vector int __ATTRS_o_ai +vec_sel(vector int a, vector int b, vector bool int c) +{ + return (a & ~(vector int)c) | (b & (vector int)c); +} + static vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int a, vector unsigned int b, vector unsigned int c) { return (a & ~c) | (b & c); } +static vector unsigned int __ATTRS_o_ai +vec_sel(vector unsigned int a, vector unsigned int b, vector bool int c) +{ + return (a & ~(vector unsigned int)c) | (b & (vector unsigned int)c); +} + +static vector bool int __ATTRS_o_ai +vec_sel(vector bool int a, vector bool int b, vector unsigned int c) +{ + return (a & ~(vector bool int)c) | (b & (vector bool int)c); +} + +static vector bool int __ATTRS_o_ai +vec_sel(vector bool int a, vector bool int b, vector bool int c) +{ + return (a & ~c) | (b & c); +} + static vector float __ATTRS_o_ai vec_sel(vector float a, vector float b, vector unsigned int c) { @@ -2989,6 +4641,13 @@ vec_sel(vector float a, vector float b, vector unsigned int c) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_sel(vector float a, vector float b, vector bool int c) +{ + vector int res = ((vector int)a & ~(vector int)c) | ((vector int)b & (vector int)c); + return (vector float)res; +} + /* vec_vsel */ static vector signed char __ATTRS_o_ai @@ -2997,36 +4656,108 @@ vec_vsel(vector signed char a, vector signed char b, vector unsigned char c) return (a & ~(vector signed char)c) | (b & (vector signed char)c); } +static vector signed char __ATTRS_o_ai +vec_vsel(vector signed char a, vector signed char b, vector bool char c) +{ + return (a & ~(vector signed char)c) | (b & (vector signed char)c); +} + static vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char a, vector unsigned char b, vector unsigned char c) { return (a & ~c) | (b & c); } +static vector unsigned char __ATTRS_o_ai +vec_vsel(vector unsigned char a, vector unsigned char b, vector bool char c) +{ + return (a & ~(vector unsigned char)c) | (b & (vector unsigned char)c); +} + +static vector bool char __ATTRS_o_ai +vec_vsel(vector bool char a, vector bool char b, vector unsigned char c) +{ + return (a & ~(vector bool char)c) | (b & (vector bool char)c); +} + +static vector bool char __ATTRS_o_ai +vec_vsel(vector bool char a, vector bool char b, vector bool char c) +{ + return (a & ~c) | (b & c); +} + static vector short __ATTRS_o_ai vec_vsel(vector short a, vector short b, vector unsigned short c) { return (a & ~(vector short)c) | (b & (vector short)c); } +static vector short __ATTRS_o_ai +vec_vsel(vector short a, vector short b, vector bool short c) +{ + return (a & ~(vector short)c) | (b & (vector short)c); +} + static vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short a, vector unsigned short b, vector unsigned short c) { return (a & ~c) | (b & c); } +static vector unsigned short __ATTRS_o_ai +vec_vsel(vector unsigned short a, vector unsigned short b, vector bool short c) +{ + return (a & ~(vector unsigned short)c) | (b & (vector unsigned short)c); +} + +static vector bool short __ATTRS_o_ai +vec_vsel(vector bool short a, vector bool short b, vector unsigned short c) +{ + return (a & ~(vector bool short)c) | (b & (vector bool short)c); +} + +static vector bool short __ATTRS_o_ai +vec_vsel(vector bool short a, vector bool short b, vector bool short c) +{ + return (a & ~c) | (b & c); +} + static vector int __ATTRS_o_ai vec_vsel(vector int a, vector int b, vector unsigned int c) { return (a & ~(vector int)c) | (b & (vector int)c); } +static vector int __ATTRS_o_ai +vec_vsel(vector int a, vector int b, vector bool int c) +{ + return (a & ~(vector int)c) | (b & (vector int)c); +} + static vector unsigned int __ATTRS_o_ai vec_vsel(vector unsigned int a, vector unsigned int b, vector unsigned int c) { return (a & ~c) | (b & c); } +static vector unsigned int __ATTRS_o_ai +vec_vsel(vector unsigned int a, vector unsigned int b, vector bool int c) +{ + return (a & ~(vector unsigned int)c) | (b & (vector unsigned int)c); +} + +static vector bool int __ATTRS_o_ai +vec_vsel(vector bool int a, vector bool int b, vector unsigned int c) +{ + return (a & ~(vector bool int)c) | (b & (vector bool int)c); +} + +static vector bool int __ATTRS_o_ai +vec_vsel(vector bool int a, vector bool int b, vector bool int c) +{ + return (a & ~c) | (b & c); +} + static vector float __ATTRS_o_ai vec_vsel(vector float a, vector float b, vector unsigned int c) { @@ -3034,6 +4765,13 @@ vec_vsel(vector float a, vector float b, vector unsigned int c) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_vsel(vector float a, vector float b, vector bool int c) +{ + vector int res = ((vector int)a & ~(vector int)c) | ((vector int)b & (vector int)c); + return (vector float)res; +} + /* vec_sl */ static vector signed char __ATTRS_o_ai @@ -3127,7 +4865,7 @@ vec_vslw(vector unsigned int a, vector unsigned int b) static vector signed char __ATTRS_o_ai vec_sld(vector signed char a, vector signed char b, unsigned char c) { - return (vector signed char)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3135,7 +4873,7 @@ vec_sld(vector signed char a, vector signed char b, unsigned char c) static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char a, vector unsigned char b, unsigned char c) { - return (vector unsigned char)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3143,7 +4881,7 @@ vec_sld(vector unsigned char a, vector unsigned char b, unsigned char c) static vector short __ATTRS_o_ai vec_sld(vector short a, vector short b, unsigned char c) { - return (vector short)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3151,7 +4889,15 @@ vec_sld(vector short a, vector short b, unsigned char c) static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short a, vector unsigned short b, unsigned char c) { - return (vector unsigned short)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) + (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, + c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); +} + +static vector pixel __ATTRS_o_ai +vec_sld(vector pixel a, vector pixel b, unsigned char c) +{ + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3167,7 +4913,7 @@ vec_sld(vector int a, vector int b, unsigned char c) static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int a, vector unsigned int b, unsigned char c) { - return (vector unsigned int)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3175,7 +4921,7 @@ vec_sld(vector unsigned int a, vector unsigned int b, unsigned char c) static vector float __ATTRS_o_ai vec_sld(vector float a, vector float b, unsigned char c) { - return (vector float)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3185,7 +4931,7 @@ vec_sld(vector float a, vector float b, unsigned char c) static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char a, vector signed char b, unsigned char c) { - return (vector signed char)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3193,7 +4939,7 @@ vec_vsldoi(vector signed char a, vector signed char b, unsigned char c) static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned char a, vector unsigned char b, unsigned char c) { - return (vector unsigned char)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3201,7 +4947,7 @@ vec_vsldoi(vector unsigned char a, vector unsigned char b, unsigned char c) static vector short __ATTRS_o_ai vec_vsldoi(vector short a, vector short b, unsigned char c) { - return (vector short)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3209,7 +4955,15 @@ vec_vsldoi(vector short a, vector short b, unsigned char c) static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned short a, vector unsigned short b, unsigned char c) { - return (vector unsigned short)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) + (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, + c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); +} + +static vector pixel __ATTRS_o_ai +vec_vsldoi(vector pixel a, vector pixel b, unsigned char c) +{ + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3225,7 +4979,7 @@ vec_vsldoi(vector int a, vector int b, unsigned char c) static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned int a, vector unsigned int b, unsigned char c) { - return (vector unsigned int)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3233,7 +4987,7 @@ vec_vsldoi(vector unsigned int a, vector unsigned int b, unsigned char c) static vector float __ATTRS_o_ai vec_vsldoi(vector float a, vector float b, unsigned char c) { - return (vector float)vec_perm(a, b, (vector unsigned char) + return vec_perm(a, b, (vector unsigned char) (c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, c+8, c+9, c+10, c+11, c+12, c+13, c+14, c+15)); } @@ -3276,6 +5030,24 @@ vec_sll(vector unsigned char a, vector unsigned int b) return (vector unsigned char)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char a, vector unsigned char b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char a, vector unsigned short b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char a, vector unsigned int b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + static vector short __ATTRS_o_ai vec_sll(vector short a, vector unsigned char b) { @@ -3312,6 +5084,42 @@ vec_sll(vector unsigned short a, vector unsigned int b) return (vector unsigned short)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short a, vector unsigned char b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short a, vector unsigned short b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short a, vector unsigned int b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel a, vector unsigned short b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel a, vector unsigned int b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_sll(vector int a, vector unsigned char b) { @@ -3348,6 +5156,24 @@ vec_sll(vector unsigned int a, vector unsigned int b) return (vector unsigned int)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int a, vector unsigned char b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int a, vector unsigned short b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int a, vector unsigned int b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + /* vec_vsl */ static vector signed char __ATTRS_o_ai @@ -3386,6 +5212,24 @@ vec_vsl(vector unsigned char a, vector unsigned int b) return (vector unsigned char)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char a, vector unsigned char b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char a, vector unsigned short b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char a, vector unsigned int b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + static vector short __ATTRS_o_ai vec_vsl(vector short a, vector unsigned char b) { @@ -3422,6 +5266,42 @@ vec_vsl(vector unsigned short a, vector unsigned int b) return (vector unsigned short)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short a, vector unsigned char b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short a, vector unsigned short b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short a, vector unsigned int b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel a, vector unsigned short b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel a, vector unsigned int b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_vsl(vector int a, vector unsigned char b) { @@ -3458,6 +5338,24 @@ vec_vsl(vector unsigned int a, vector unsigned int b) return (vector unsigned int)__builtin_altivec_vsl((vector int)a, (vector int)b); } +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int a, vector unsigned char b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int a, vector unsigned short b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int a, vector unsigned int b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)a, (vector int)b); +} + /* vec_slo */ static vector signed char __ATTRS_o_ai @@ -3508,6 +5406,18 @@ vec_slo(vector unsigned short a, vector unsigned char b) return (vector unsigned short)__builtin_altivec_vslo((vector int)a, (vector int)b); } +static vector pixel __ATTRS_o_ai +vec_slo(vector pixel a, vector signed char b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_slo(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_slo(vector int a, vector signed char b) { @@ -3594,6 +5504,18 @@ vec_vslo(vector unsigned short a, vector unsigned char b) return (vector unsigned short)__builtin_altivec_vslo((vector int)a, (vector int)b); } +static vector pixel __ATTRS_o_ai +vec_vslo(vector pixel a, vector signed char b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vslo(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_vslo(vector int a, vector signed char b) { @@ -3635,20 +5557,26 @@ vec_vslo(vector float a, vector unsigned char b) static vector signed char __ATTRS_o_ai vec_splat(vector signed char a, unsigned char b) { - return (vector signed char)vec_perm(a, a, (vector unsigned char)(b)); + return vec_perm(a, a, (vector unsigned char)(b)); } static vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char a, unsigned char b) { - return (vector unsigned char)vec_perm(a, a, (vector unsigned char)(b)); + return vec_perm(a, a, (vector unsigned char)(b)); +} + +static vector bool char __ATTRS_o_ai +vec_splat(vector bool char a, unsigned char b) +{ + return vec_perm(a, a, (vector unsigned char)(b)); } static vector short __ATTRS_o_ai vec_splat(vector short a, unsigned char b) { b *= 2; - return (vector short)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); } @@ -3656,7 +5584,23 @@ static vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short a, unsigned char b) { b *= 2; - return (vector unsigned short)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); +} + +static vector bool short __ATTRS_o_ai +vec_splat(vector bool short a, unsigned char b) +{ + b *= 2; + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); +} + +static vector pixel __ATTRS_o_ai +vec_splat(vector pixel a, unsigned char b) +{ + b *= 2; + return vec_perm(a, a, (vector unsigned char) (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); } @@ -3672,7 +5616,15 @@ static vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int a, unsigned char b) { b *= 4; - return (vector unsigned int)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); +} + +static vector bool int __ATTRS_o_ai +vec_splat(vector bool int a, unsigned char b) +{ + b *= 4; + return vec_perm(a, a, (vector unsigned char) (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); } @@ -3680,7 +5632,7 @@ static vector float __ATTRS_o_ai vec_splat(vector float a, unsigned char b) { b *= 4; - return (vector float)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); } @@ -3691,13 +5643,19 @@ vec_splat(vector float a, unsigned char b) static vector signed char __ATTRS_o_ai vec_vspltb(vector signed char a, unsigned char b) { - return (vector signed char)vec_perm(a, a, (vector unsigned char)(b)); + return vec_perm(a, a, (vector unsigned char)(b)); } static vector unsigned char __ATTRS_o_ai vec_vspltb(vector unsigned char a, unsigned char b) { - return (vector unsigned char)vec_perm(a, a, (vector unsigned char)(b)); + return vec_perm(a, a, (vector unsigned char)(b)); +} + +static vector bool char __ATTRS_o_ai +vec_vspltb(vector bool char a, unsigned char b) +{ + return vec_perm(a, a, (vector unsigned char)(b)); } /* vec_vsplth */ @@ -3708,7 +5666,7 @@ static vector short __ATTRS_o_ai vec_vsplth(vector short a, unsigned char b) { b *= 2; - return (vector short)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); } @@ -3716,7 +5674,23 @@ static vector unsigned short __ATTRS_o_ai vec_vsplth(vector unsigned short a, unsigned char b) { b *= 2; - return (vector unsigned short)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); +} + +static vector bool short __ATTRS_o_ai +vec_vsplth(vector bool short a, unsigned char b) +{ + b *= 2; + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); +} + +static vector pixel __ATTRS_o_ai +vec_vsplth(vector pixel a, unsigned char b) +{ + b *= 2; + return vec_perm(a, a, (vector unsigned char) (b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1, b, b+1)); } @@ -3728,7 +5702,7 @@ static vector int __ATTRS_o_ai vec_vspltw(vector int a, unsigned char b) { b *= 4; - return (vector int)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); } @@ -3736,7 +5710,15 @@ static vector unsigned int __ATTRS_o_ai vec_vspltw(vector unsigned int a, unsigned char b) { b *= 4; - return (vector unsigned int)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) + (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); +} + +static vector bool int __ATTRS_o_ai +vec_vspltw(vector bool int a, unsigned char b) +{ + b *= 4; + return vec_perm(a, a, (vector unsigned char) (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); } @@ -3744,7 +5726,7 @@ static vector float __ATTRS_o_ai vec_vspltw(vector float a, unsigned char b) { b *= 4; - return (vector float)vec_perm(a, a, (vector unsigned char) + return vec_perm(a, a, (vector unsigned char) (b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3, b, b+1, b+2, b+3)); } @@ -4039,6 +6021,24 @@ vec_srl(vector unsigned char a, vector unsigned int b) return (vector unsigned char)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char a, vector unsigned char b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char a, vector unsigned short b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char a, vector unsigned int b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + static vector short __ATTRS_o_ai vec_srl(vector short a, vector unsigned char b) { @@ -4075,6 +6075,42 @@ vec_srl(vector unsigned short a, vector unsigned int b) return (vector unsigned short)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short a, vector unsigned char b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short a, vector unsigned short b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short a, vector unsigned int b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel a, vector unsigned short b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel a, vector unsigned int b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_srl(vector int a, vector unsigned char b) { @@ -4111,6 +6147,24 @@ vec_srl(vector unsigned int a, vector unsigned int b) return (vector unsigned int)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int a, vector unsigned char b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int a, vector unsigned short b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int a, vector unsigned int b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + /* vec_vsr */ static vector signed char __ATTRS_o_ai @@ -4149,6 +6203,24 @@ vec_vsr(vector unsigned char a, vector unsigned int b) return (vector unsigned char)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char a, vector unsigned char b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char a, vector unsigned short b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char a, vector unsigned int b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + static vector short __ATTRS_o_ai vec_vsr(vector short a, vector unsigned char b) { @@ -4185,6 +6257,42 @@ vec_vsr(vector unsigned short a, vector unsigned int b) return (vector unsigned short)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short a, vector unsigned char b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short a, vector unsigned short b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short a, vector unsigned int b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel a, vector unsigned short b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel a, vector unsigned int b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_vsr(vector int a, vector unsigned char b) { @@ -4221,6 +6329,24 @@ vec_vsr(vector unsigned int a, vector unsigned int b) return (vector unsigned int)__builtin_altivec_vsr((vector int)a, (vector int)b); } +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int a, vector unsigned char b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int a, vector unsigned short b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int a, vector unsigned int b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)a, (vector int)b); +} + /* vec_sro */ static vector signed char __ATTRS_o_ai @@ -4271,6 +6397,18 @@ vec_sro(vector unsigned short a, vector unsigned char b) return (vector unsigned short)__builtin_altivec_vsro((vector int)a, (vector int)b); } +static vector pixel __ATTRS_o_ai +vec_sro(vector pixel a, vector signed char b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_sro(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_sro(vector int a, vector signed char b) { @@ -4357,6 +6495,18 @@ vec_vsro(vector unsigned short a, vector unsigned char b) return (vector unsigned short)__builtin_altivec_vsro((vector int)a, (vector int)b); } +static vector pixel __ATTRS_o_ai +vec_vsro(vector pixel a, vector signed char b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)a, (vector int)b); +} + +static vector pixel __ATTRS_o_ai +vec_vsro(vector pixel a, vector unsigned char b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)a, (vector int)b); +} + static vector int __ATTRS_o_ai vec_vsro(vector int a, vector signed char b) { @@ -4420,6 +6570,24 @@ vec_st(vector unsigned char a, int b, unsigned char *c) } static void __ATTRS_o_ai +vec_st(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool char a, int b, vector bool char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_st(vector short a, int b, vector short *c) { __builtin_altivec_stvx((vector int)a, b, c); @@ -4444,6 +6612,42 @@ vec_st(vector unsigned short a, int b, unsigned short *c) } static void __ATTRS_o_ai +vec_st(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool short a, int b, vector bool short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel a, int b, vector pixel *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_st(vector int a, int b, vector int *c) { __builtin_altivec_stvx(a, b, c); @@ -4468,6 +6672,24 @@ vec_st(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_st(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_st(vector bool int a, int b, vector bool int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_st(vector float a, int b, vector float *c) { __builtin_altivec_stvx((vector int)a, b, c); @@ -4506,6 +6728,24 @@ vec_stvx(vector unsigned char a, int b, unsigned char *c) } static void __ATTRS_o_ai +vec_stvx(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool char a, int b, vector bool char *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvx(vector short a, int b, vector short *c) { __builtin_altivec_stvx((vector int)a, b, c); @@ -4530,6 +6770,42 @@ vec_stvx(vector unsigned short a, int b, unsigned short *c) } static void __ATTRS_o_ai +vec_stvx(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool short a, int b, vector bool short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel a, int b, vector pixel *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvx(vector int a, int b, vector int *c) { __builtin_altivec_stvx(a, b, c); @@ -4554,6 +6830,24 @@ vec_stvx(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_stvx(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool int a, int b, vector bool int *c) +{ + __builtin_altivec_stvx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvx(vector float a, int b, vector float *c) { __builtin_altivec_stvx((vector int)a, b, c); @@ -4580,6 +6874,18 @@ vec_ste(vector unsigned char a, int b, unsigned char *c) } static void __ATTRS_o_ai +vec_ste(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvebx((vector char)a, b, c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvebx((vector char)a, b, c); +} + +static void __ATTRS_o_ai vec_ste(vector short a, int b, short *c) { __builtin_altivec_stvehx(a, b, c); @@ -4592,6 +6898,30 @@ vec_ste(vector unsigned short a, int b, unsigned short *c) } static void __ATTRS_o_ai +vec_ste(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_ste(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_ste(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai vec_ste(vector int a, int b, int *c) { __builtin_altivec_stvewx(a, b, c); @@ -4604,6 +6934,18 @@ vec_ste(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_ste(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvewx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvewx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_ste(vector float a, int b, float *c) { __builtin_altivec_stvewx((vector int)a, b, c); @@ -4623,6 +6965,18 @@ vec_stvebx(vector unsigned char a, int b, unsigned char *c) __builtin_altivec_stvebx((vector char)a, b, c); } +static void __ATTRS_o_ai +vec_stvebx(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvebx((vector char)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvebx(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvebx((vector char)a, b, c); +} + /* vec_stvehx */ static void __ATTRS_o_ai @@ -4637,6 +6991,30 @@ vec_stvehx(vector unsigned short a, int b, unsigned short *c) __builtin_altivec_stvehx((vector short)a, b, c); } +static void __ATTRS_o_ai +vec_stvehx(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvehx((vector short)a, b, c); +} + /* vec_stvewx */ static void __ATTRS_o_ai @@ -4652,6 +7030,18 @@ vec_stvewx(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_stvewx(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvewx((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvewx(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvewx((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvewx(vector float a, int b, float *c) { __builtin_altivec_stvewx((vector int)a, b, c); @@ -4684,6 +7074,24 @@ vec_stl(vector unsigned char a, int b, unsigned char *c) } static void __ATTRS_o_ai +vec_stl(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool char a, int b, vector bool char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stl(vector short a, int b, vector short *c) { __builtin_altivec_stvxl((vector int)a, b, c); @@ -4708,6 +7116,42 @@ vec_stl(vector unsigned short a, int b, unsigned short *c) } static void __ATTRS_o_ai +vec_stl(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool short a, int b, vector bool short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel a, int b, vector pixel *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stl(vector int a, int b, vector int *c) { __builtin_altivec_stvxl(a, b, c); @@ -4732,6 +7176,24 @@ vec_stl(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_stl(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool int a, int b, vector bool int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stl(vector float a, int b, vector float *c) { __builtin_altivec_stvxl((vector int)a, b, c); @@ -4770,6 +7232,24 @@ vec_stvxl(vector unsigned char a, int b, unsigned char *c) } static void __ATTRS_o_ai +vec_stvxl(vector bool char a, int b, signed char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool char a, int b, unsigned char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool char a, int b, vector bool char *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvxl(vector short a, int b, vector short *c) { __builtin_altivec_stvxl((vector int)a, b, c); @@ -4794,6 +7274,42 @@ vec_stvxl(vector unsigned short a, int b, unsigned short *c) } static void __ATTRS_o_ai +vec_stvxl(vector bool short a, int b, short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool short a, int b, unsigned short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool short a, int b, vector bool short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel a, int b, short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel a, int b, unsigned short *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel a, int b, vector pixel *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvxl(vector int a, int b, vector int *c) { __builtin_altivec_stvxl(a, b, c); @@ -4818,6 +7334,24 @@ vec_stvxl(vector unsigned int a, int b, unsigned int *c) } static void __ATTRS_o_ai +vec_stvxl(vector bool int a, int b, int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool int a, int b, unsigned int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool int a, int b, vector bool int *c) +{ + __builtin_altivec_stvxl((vector int)a, b, c); +} + +static void __ATTRS_o_ai vec_stvxl(vector float a, int b, vector float *c) { __builtin_altivec_stvxl((vector int)a, b, c); @@ -4837,36 +7371,108 @@ vec_sub(vector signed char a, vector signed char b) return a - b; } +static vector signed char __ATTRS_o_ai +vec_sub(vector bool char a, vector signed char b) +{ + return (vector signed char)a - b; +} + +static vector signed char __ATTRS_o_ai +vec_sub(vector signed char a, vector bool char b) +{ + return a - (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char a, vector unsigned char b) { return a - b; } +static vector unsigned char __ATTRS_o_ai +vec_sub(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a - b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sub(vector unsigned char a, vector bool char b) +{ + return a - (vector unsigned char)b; +} + static vector short __ATTRS_o_ai vec_sub(vector short a, vector short b) { return a - b; } +static vector short __ATTRS_o_ai +vec_sub(vector bool short a, vector short b) +{ + return (vector short)a - b; +} + +static vector short __ATTRS_o_ai +vec_sub(vector short a, vector bool short b) +{ + return a - (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short a, vector unsigned short b) { return a - b; } +static vector unsigned short __ATTRS_o_ai +vec_sub(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a - b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sub(vector unsigned short a, vector bool short b) +{ + return a - (vector unsigned short)b; +} + static vector int __ATTRS_o_ai vec_sub(vector int a, vector int b) { return a - b; } +static vector int __ATTRS_o_ai +vec_sub(vector bool int a, vector int b) +{ + return (vector int)a - b; +} + +static vector int __ATTRS_o_ai +vec_sub(vector int a, vector bool int b) +{ + return a - (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int a, vector unsigned int b) { return a - b; } +static vector unsigned int __ATTRS_o_ai +vec_sub(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a - b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sub(vector unsigned int a, vector bool int b) +{ + return a - (vector unsigned int)b; +} + static vector float __ATTRS_o_ai vec_sub(vector float a, vector float b) { @@ -4883,12 +7489,36 @@ vec_vsububm(vector signed char a, vector signed char b) return a - b; } +static vector signed char __ATTRS_o_ai +vec_vsububm(vector bool char a, vector signed char b) +{ + return (vector signed char)a - b; +} + +static vector signed char __ATTRS_o_ai +vec_vsububm(vector signed char a, vector bool char b) +{ + return a - (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char a, vector unsigned char b) { return a - b; } +static vector unsigned char __ATTRS_o_ai +vec_vsububm(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a - b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububm(vector unsigned char a, vector bool char b) +{ + return a - (vector unsigned char)b; +} + /* vec_vsubuhm */ #define __builtin_altivec_vsubuhm vec_vsubuhm @@ -4899,12 +7529,36 @@ vec_vsubuhm(vector short a, vector short b) return a - b; } +static vector short __ATTRS_o_ai +vec_vsubuhm(vector bool short a, vector short b) +{ + return (vector short)a - b; +} + +static vector short __ATTRS_o_ai +vec_vsubuhm(vector short a, vector bool short b) +{ + return a - (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short a, vector unsigned short b) { return a - b; } +static vector unsigned short __ATTRS_o_ai +vec_vsubuhm(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a - b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhm(vector unsigned short a, vector bool short b) +{ + return a - (vector unsigned short)b; +} + /* vec_vsubuwm */ #define __builtin_altivec_vsubuwm vec_vsubuwm @@ -4915,12 +7569,36 @@ vec_vsubuwm(vector int a, vector int b) return a - b; } +static vector int __ATTRS_o_ai +vec_vsubuwm(vector bool int a, vector int b) +{ + return (vector int)a - b; +} + +static vector int __ATTRS_o_ai +vec_vsubuwm(vector int a, vector bool int b) +{ + return a - (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int a, vector unsigned int b) { return a - b; } +static vector unsigned int __ATTRS_o_ai +vec_vsubuwm(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a - b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuwm(vector unsigned int a, vector bool int b) +{ + return a - (vector unsigned int)b; +} + /* vec_vsubfp */ #define __builtin_altivec_vsubfp vec_vsubfp @@ -4955,84 +7633,228 @@ vec_subs(vector signed char a, vector signed char b) return __builtin_altivec_vsubsbs(a, b); } +static vector signed char __ATTRS_o_ai +vec_subs(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vsubsbs((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_subs(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vsubsbs(a, (vector signed char)b); +} + static vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vsububs(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_subs(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vsububs((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_subs(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vsububs(a, (vector unsigned char)b); +} + static vector short __ATTRS_o_ai vec_subs(vector short a, vector short b) { return __builtin_altivec_vsubshs(a, b); } +static vector short __ATTRS_o_ai +vec_subs(vector bool short a, vector short b) +{ + return __builtin_altivec_vsubshs((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_subs(vector short a, vector bool short b) +{ + return __builtin_altivec_vsubshs(a, (vector short)b); +} + static vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vsubuhs(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_subs(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vsubuhs((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_subs(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vsubuhs(a, (vector unsigned short)b); +} + static vector int __ATTRS_o_ai vec_subs(vector int a, vector int b) { return __builtin_altivec_vsubsws(a, b); } +static vector int __ATTRS_o_ai +vec_subs(vector bool int a, vector int b) +{ + return __builtin_altivec_vsubsws((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_subs(vector int a, vector bool int b) +{ + return __builtin_altivec_vsubsws(a, (vector int)b); +} + static vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vsubuws(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_subs(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vsubuws((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_subs(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vsubuws(a, (vector unsigned int)b); +} + /* vec_vsubsbs */ -static vector signed char __attribute__((__always_inline__)) +static vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char a, vector signed char b) { return __builtin_altivec_vsubsbs(a, b); } +static vector signed char __ATTRS_o_ai +vec_vsubsbs(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vsubsbs((vector signed char)a, b); +} + +static vector signed char __ATTRS_o_ai +vec_vsubsbs(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vsubsbs(a, (vector signed char)b); +} + /* vec_vsububs */ -static vector unsigned char __attribute__((__always_inline__)) +static vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vsububs(a, b); } +static vector unsigned char __ATTRS_o_ai +vec_vsububs(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vsububs((vector unsigned char)a, b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububs(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vsububs(a, (vector unsigned char)b); +} + /* vec_vsubshs */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vsubshs(vector short a, vector short b) { return __builtin_altivec_vsubshs(a, b); } +static vector short __ATTRS_o_ai +vec_vsubshs(vector bool short a, vector short b) +{ + return __builtin_altivec_vsubshs((vector short)a, b); +} + +static vector short __ATTRS_o_ai +vec_vsubshs(vector short a, vector bool short b) +{ + return __builtin_altivec_vsubshs(a, (vector short)b); +} + /* vec_vsubuhs */ -static vector unsigned short __attribute__((__always_inline__)) +static vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vsubuhs(a, b); } +static vector unsigned short __ATTRS_o_ai +vec_vsubuhs(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vsubuhs((vector unsigned short)a, b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhs(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vsubuhs(a, (vector unsigned short)b); +} + /* vec_vsubsws */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vsubsws(vector int a, vector int b) { return __builtin_altivec_vsubsws(a, b); } +static vector int __ATTRS_o_ai +vec_vsubsws(vector bool int a, vector int b) +{ + return __builtin_altivec_vsubsws((vector int)a, b); +} + +static vector int __ATTRS_o_ai +vec_vsubsws(vector int a, vector bool int b) +{ + return __builtin_altivec_vsubsws(a, (vector int)b); +} + /* vec_vsubuws */ -static vector unsigned int __attribute__((__always_inline__)) +static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vsubuws(a, b); } +static vector unsigned int __ATTRS_o_ai +vec_vsubuws(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vsubuws((vector unsigned int)a, b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuws(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vsubuws(a, (vector unsigned int)b); +} + /* vec_sum4s */ static vector int __ATTRS_o_ai @@ -5133,28 +7955,64 @@ vec_unpackh(vector signed char a) return __builtin_altivec_vupkhsb((vector char)a); } +static vector bool short __ATTRS_o_ai +vec_unpackh(vector bool char a) +{ + return (vector bool short)__builtin_altivec_vupkhsb((vector char)a); +} + static vector int __ATTRS_o_ai vec_unpackh(vector short a) { return __builtin_altivec_vupkhsh(a); } +static vector bool int __ATTRS_o_ai +vec_unpackh(vector bool short a) +{ + return (vector bool int)__builtin_altivec_vupkhsh((vector short)a); +} + +static vector unsigned int __ATTRS_o_ai +vec_unpackh(vector pixel a) +{ + return (vector unsigned int)__builtin_altivec_vupkhsh((vector short)a); +} + /* vec_vupkhsb */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vupkhsb(vector signed char a) { return __builtin_altivec_vupkhsb((vector char)a); } +static vector bool short __ATTRS_o_ai +vec_vupkhsb(vector bool char a) +{ + return (vector bool short)__builtin_altivec_vupkhsb((vector char)a); +} + /* vec_vupkhsh */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vupkhsh(vector short a) { return __builtin_altivec_vupkhsh(a); } +static vector bool int __ATTRS_o_ai +vec_vupkhsh(vector bool short a) +{ + return (vector bool int)__builtin_altivec_vupkhsh((vector short)a); +} + +static vector unsigned int __ATTRS_o_ai +vec_vupkhsh(vector pixel a) +{ + return (vector unsigned int)__builtin_altivec_vupkhsh((vector short)a); +} + /* vec_unpackl */ static vector short __ATTRS_o_ai @@ -5163,28 +8021,64 @@ vec_unpackl(vector signed char a) return __builtin_altivec_vupklsb((vector char)a); } +static vector bool short __ATTRS_o_ai +vec_unpackl(vector bool char a) +{ + return (vector bool short)__builtin_altivec_vupklsb((vector char)a); +} + static vector int __ATTRS_o_ai vec_unpackl(vector short a) { return __builtin_altivec_vupklsh(a); } +static vector bool int __ATTRS_o_ai +vec_unpackl(vector bool short a) +{ + return (vector bool int)__builtin_altivec_vupklsh((vector short)a); +} + +static vector unsigned int __ATTRS_o_ai +vec_unpackl(vector pixel a) +{ + return (vector unsigned int)__builtin_altivec_vupklsh((vector short)a); +} + /* vec_vupklsb */ -static vector short __attribute__((__always_inline__)) +static vector short __ATTRS_o_ai vec_vupklsb(vector signed char a) { return __builtin_altivec_vupklsb((vector char)a); } +static vector bool short __ATTRS_o_ai +vec_vupklsb(vector bool char a) +{ + return (vector bool short)__builtin_altivec_vupklsb((vector char)a); +} + /* vec_vupklsh */ -static vector int __attribute__((__always_inline__)) +static vector int __ATTRS_o_ai vec_vupklsh(vector short a) { return __builtin_altivec_vupklsh(a); } +static vector bool int __ATTRS_o_ai +vec_vupklsh(vector bool short a) +{ + return (vector bool int)__builtin_altivec_vupklsh((vector short)a); +} + +static vector unsigned int __ATTRS_o_ai +vec_vupklsh(vector pixel a) +{ + return (vector unsigned int)__builtin_altivec_vupklsh((vector short)a); +} + /* vec_xor */ #define __builtin_altivec_vxor vec_xor @@ -5195,36 +8089,126 @@ vec_xor(vector signed char a, vector signed char b) return a ^ b; } +static vector signed char __ATTRS_o_ai +vec_xor(vector bool char a, vector signed char b) +{ + return (vector signed char)a ^ b; +} + +static vector signed char __ATTRS_o_ai +vec_xor(vector signed char a, vector bool char b) +{ + return a ^ (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char a, vector unsigned char b) { return a ^ b; } +static vector unsigned char __ATTRS_o_ai +vec_xor(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a ^ b; +} + +static vector unsigned char __ATTRS_o_ai +vec_xor(vector unsigned char a, vector bool char b) +{ + return a ^ (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_xor(vector bool char a, vector bool char b) +{ + return a ^ b; +} + static vector short __ATTRS_o_ai vec_xor(vector short a, vector short b) { return a ^ b; } +static vector short __ATTRS_o_ai +vec_xor(vector bool short a, vector short b) +{ + return (vector short)a ^ b; +} + +static vector short __ATTRS_o_ai +vec_xor(vector short a, vector bool short b) +{ + return a ^ (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short a, vector unsigned short b) { return a ^ b; } +static vector unsigned short __ATTRS_o_ai +vec_xor(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a ^ b; +} + +static vector unsigned short __ATTRS_o_ai +vec_xor(vector unsigned short a, vector bool short b) +{ + return a ^ (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_xor(vector bool short a, vector bool short b) +{ + return a ^ b; +} + static vector int __ATTRS_o_ai vec_xor(vector int a, vector int b) { return a ^ b; } +static vector int __ATTRS_o_ai +vec_xor(vector bool int a, vector int b) +{ + return (vector int)a ^ b; +} + +static vector int __ATTRS_o_ai +vec_xor(vector int a, vector bool int b) +{ + return a ^ (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int a, vector unsigned int b) { return a ^ b; } +static vector unsigned int __ATTRS_o_ai +vec_xor(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a ^ b; +} + +static vector unsigned int __ATTRS_o_ai +vec_xor(vector unsigned int a, vector bool int b) +{ + return a ^ (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_xor(vector bool int a, vector bool int b) +{ + return a ^ b; +} + static vector float __ATTRS_o_ai vec_xor(vector float a, vector float b) { @@ -5232,6 +8216,20 @@ vec_xor(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_xor(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a ^ (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_xor(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a ^ (vector unsigned int)b; + return (vector float)res; +} + /* vec_vxor */ static vector signed char __ATTRS_o_ai @@ -5240,36 +8238,126 @@ vec_vxor(vector signed char a, vector signed char b) return a ^ b; } +static vector signed char __ATTRS_o_ai +vec_vxor(vector bool char a, vector signed char b) +{ + return (vector signed char)a ^ b; +} + +static vector signed char __ATTRS_o_ai +vec_vxor(vector signed char a, vector bool char b) +{ + return a ^ (vector signed char)b; +} + static vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char a, vector unsigned char b) { return a ^ b; } +static vector unsigned char __ATTRS_o_ai +vec_vxor(vector bool char a, vector unsigned char b) +{ + return (vector unsigned char)a ^ b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vxor(vector unsigned char a, vector bool char b) +{ + return a ^ (vector unsigned char)b; +} + +static vector bool char __ATTRS_o_ai +vec_vxor(vector bool char a, vector bool char b) +{ + return a ^ b; +} + static vector short __ATTRS_o_ai vec_vxor(vector short a, vector short b) { return a ^ b; } +static vector short __ATTRS_o_ai +vec_vxor(vector bool short a, vector short b) +{ + return (vector short)a ^ b; +} + +static vector short __ATTRS_o_ai +vec_vxor(vector short a, vector bool short b) +{ + return a ^ (vector short)b; +} + static vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short a, vector unsigned short b) { return a ^ b; } +static vector unsigned short __ATTRS_o_ai +vec_vxor(vector bool short a, vector unsigned short b) +{ + return (vector unsigned short)a ^ b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vxor(vector unsigned short a, vector bool short b) +{ + return a ^ (vector unsigned short)b; +} + +static vector bool short __ATTRS_o_ai +vec_vxor(vector bool short a, vector bool short b) +{ + return a ^ b; +} + static vector int __ATTRS_o_ai vec_vxor(vector int a, vector int b) { return a ^ b; } +static vector int __ATTRS_o_ai +vec_vxor(vector bool int a, vector int b) +{ + return (vector int)a ^ b; +} + +static vector int __ATTRS_o_ai +vec_vxor(vector int a, vector bool int b) +{ + return a ^ (vector int)b; +} + static vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int a, vector unsigned int b) { return a ^ b; } +static vector unsigned int __ATTRS_o_ai +vec_vxor(vector bool int a, vector unsigned int b) +{ + return (vector unsigned int)a ^ b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vxor(vector unsigned int a, vector bool int b) +{ + return a ^ (vector unsigned int)b; +} + +static vector bool int __ATTRS_o_ai +vec_vxor(vector bool int a, vector bool int b) +{ + return a ^ b; +} + static vector float __ATTRS_o_ai vec_vxor(vector float a, vector float b) { @@ -5277,6 +8365,20 @@ vec_vxor(vector float a, vector float b) return (vector float)res; } +static vector float __ATTRS_o_ai +vec_vxor(vector bool int a, vector float b) +{ + vector unsigned int res = (vector unsigned int)a ^ (vector unsigned int)b; + return (vector float)res; +} + +static vector float __ATTRS_o_ai +vec_vxor(vector float a, vector bool int b) +{ + vector unsigned int res = (vector unsigned int)a ^ (vector unsigned int)b; + return (vector float)res; +} + /* ------------------------------ predicates ------------------------------------ */ /* vec_all_eq */ @@ -5288,36 +8390,132 @@ vec_all_eq(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_eq(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); } static int __ATTRS_o_ai +vec_all_eq(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector short a, vector short b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_eq(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); } static int __ATTRS_o_ai +vec_all_eq(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector pixel a, vector pixel b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector int a, vector int b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_eq(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b); } static int __ATTRS_o_ai +vec_all_eq(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_eq(vector float a, vector float b) { return __builtin_altivec_vcmpeqfp_p(__CR6_LT, a, b); @@ -5332,36 +8530,132 @@ vec_all_ge(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_ge(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)b, a); +} + +static int __ATTRS_o_ai vec_all_ge(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, b, a); } static int __ATTRS_o_ai +vec_all_ge(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)b, a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, b, (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai vec_all_ge(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, b, a); } static int __ATTRS_o_ai +vec_all_ge(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector short)b, a); +} + +static int __ATTRS_o_ai vec_all_ge(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, b, a); } static int __ATTRS_o_ai +vec_all_ge(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)b, a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, b, (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai vec_all_ge(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, b, a); } static int __ATTRS_o_ai +vec_all_ge(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector int)b, a); +} + +static int __ATTRS_o_ai vec_all_ge(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, b, a); } static int __ATTRS_o_ai +vec_all_ge(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)b, a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, b, (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai vec_all_ge(vector float a, vector float b) { return __builtin_altivec_vcmpgefp_p(__CR6_LT, a, b); @@ -5376,36 +8670,132 @@ vec_all_gt(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_gt(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, a, (vector signed char)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_gt(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, a, (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)a, b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_gt(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_gt(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, a, (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)a, b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_gt(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, a, b); } static int __ATTRS_o_ai +vec_all_gt(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, a, (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)a, b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai vec_all_gt(vector float a, vector float b) { return __builtin_altivec_vcmpgtfp_p(__CR6_LT, a, b); @@ -5428,36 +8818,132 @@ vec_all_le(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_le(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, a, (vector signed char)b); +} + +static int __ATTRS_o_ai vec_all_le(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_le(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, a, (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)a, b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai vec_all_le(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_le(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_le(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_le(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, a, (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)a, b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai vec_all_le(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_le(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_le(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_le(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, a, (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)a, b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai vec_all_le(vector float a, vector float b) { return __builtin_altivec_vcmpgefp_p(__CR6_LT, b, a); @@ -5472,36 +8958,132 @@ vec_all_lt(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_lt(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)b, a); +} + +static int __ATTRS_o_ai vec_all_lt(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT, b, a); } static int __ATTRS_o_ai +vec_all_lt(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)b, a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, b, (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai vec_all_lt(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT, b, a); } static int __ATTRS_o_ai +vec_all_lt(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector short)b, a); +} + +static int __ATTRS_o_ai vec_all_lt(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT, b, a); } static int __ATTRS_o_ai +vec_all_lt(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)b, a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, b, (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai vec_all_lt(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT, b, a); } static int __ATTRS_o_ai +vec_all_lt(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector int)b, a); +} + +static int __ATTRS_o_ai vec_all_lt(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT, b, a); } static int __ATTRS_o_ai +vec_all_lt(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)b, a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, b, (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai vec_all_lt(vector float a, vector float b) { return __builtin_altivec_vcmpgtfp_p(__CR6_LT, b, a); @@ -5524,36 +9106,132 @@ vec_all_ne(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_all_ne(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); } static int __ATTRS_o_ai +vec_all_ne(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector short a, vector short b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_ne(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); } static int __ATTRS_o_ai +vec_all_ne(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector pixel a, vector pixel b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector int a, vector int b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, a, b); } static int __ATTRS_o_ai +vec_all_ne(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b); } static int __ATTRS_o_ai +vec_all_ne(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai vec_all_ne(vector float a, vector float b) { return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, a, b); @@ -5608,36 +9286,132 @@ vec_any_eq(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_eq(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); } static int __ATTRS_o_ai +vec_any_eq(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector short a, vector short b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_eq(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); } static int __ATTRS_o_ai +vec_any_eq(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector pixel a, vector pixel b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector int a, vector int b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_eq(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b); } static int __ATTRS_o_ai +vec_any_eq(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_eq(vector float a, vector float b) { return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, a, b); @@ -5652,36 +9426,133 @@ vec_any_ge(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_ge(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)b, a); +} + +static int __ATTRS_o_ai vec_any_ge(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, b, a); } static int __ATTRS_o_ai +vec_any_ge(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)b, a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, b, (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai vec_any_ge(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, b, a); } static int __ATTRS_o_ai +vec_any_ge(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector short)b, a); +} + +static int __ATTRS_o_ai vec_any_ge(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, b, a); } static int __ATTRS_o_ai +vec_any_ge(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)b, a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short a, vector unsigned short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, b, (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai vec_any_ge(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, b, a); } static int __ATTRS_o_ai +vec_any_ge(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector int)b, a); +} + +static int __ATTRS_o_ai vec_any_ge(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, b, a); } static int __ATTRS_o_ai +vec_any_ge(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)b, a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, b, (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai vec_any_ge(vector float a, vector float b) { return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, a, b); @@ -5696,36 +9567,135 @@ vec_any_gt(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_gt(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, a, (vector signed char)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_gt(vector unsigned char a, vector bool char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, a, (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char a, vector unsigned char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)a, b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_gt(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_gt(vector unsigned short a, vector bool short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, a, (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)a, b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_gt(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, a, b); } static int __ATTRS_o_ai +vec_any_gt(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, a, (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)a, b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai vec_any_gt(vector float a, vector float b) { return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, a, b); @@ -5740,36 +9710,136 @@ vec_any_le(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_le(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, a, (vector signed char)b); +} + +static int __ATTRS_o_ai vec_any_le(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_le(vector unsigned char a, vector bool char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, a, (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char a, vector unsigned char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)a, b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)a, + (vector unsigned char)b); +} + +static int __ATTRS_o_ai vec_any_le(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_le(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_le(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_le(vector unsigned short a, vector bool short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, a, (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short a, vector unsigned short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)a, b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)a, + (vector unsigned short)b); +} + +static int __ATTRS_o_ai vec_any_le(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_le(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_le(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_le(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, a, (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)a, b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)a, + (vector unsigned int)b); +} + +static int __ATTRS_o_ai vec_any_le(vector float a, vector float b) { return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, b, a); @@ -5784,36 +9854,136 @@ vec_any_lt(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_lt(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)b, a); +} + +static int __ATTRS_o_ai vec_any_lt(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, b, a); } static int __ATTRS_o_ai +vec_any_lt(vector unsigned char a, vector bool char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)b, a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char a, vector unsigned char b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, b, (vector unsigned char)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)b, + (vector unsigned char)a); +} + +static int __ATTRS_o_ai vec_any_lt(vector short a, vector short b) { return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, b, a); } static int __ATTRS_o_ai +vec_any_lt(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector short)b, a); +} + +static int __ATTRS_o_ai vec_any_lt(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, b, a); } static int __ATTRS_o_ai +vec_any_lt(vector unsigned short a, vector bool short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)b, a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short a, vector unsigned short b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, b, (vector unsigned short)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)b, + (vector unsigned short)a); +} + +static int __ATTRS_o_ai vec_any_lt(vector int a, vector int b) { return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, b, a); } static int __ATTRS_o_ai +vec_any_lt(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector int)b, a); +} + +static int __ATTRS_o_ai vec_any_lt(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, b, a); } static int __ATTRS_o_ai +vec_any_lt(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)b, a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, b, (vector unsigned int)a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)b, + (vector unsigned int)a); +} + +static int __ATTRS_o_ai vec_any_lt(vector float a, vector float b) { return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, b, a); @@ -5836,36 +10006,132 @@ vec_any_ne(vector signed char a, vector signed char b) } static int __ATTRS_o_ai +vec_any_ne(vector signed char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector unsigned char a, vector unsigned char b) { return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); } static int __ATTRS_o_ai +vec_any_ne(vector unsigned char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char a, vector signed char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char a, vector unsigned char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char a, vector bool char b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)a, (vector char)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector short a, vector short b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_ne(vector short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector unsigned short a, vector unsigned short b) { return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); } static int __ATTRS_o_ai +vec_any_ne(vector unsigned short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short a, vector short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short a, vector unsigned short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short a, vector bool short b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector pixel a, vector pixel b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)a, (vector short)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector int a, vector int b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, a, b); } static int __ATTRS_o_ai +vec_any_ne(vector int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector unsigned int a, vector unsigned int b) { return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b); } static int __ATTRS_o_ai +vec_any_ne(vector unsigned int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int a, vector int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int a, vector unsigned int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int a, vector bool int b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)a, (vector int)b); +} + +static int __ATTRS_o_ai vec_any_ne(vector float a, vector float b) { return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, a, b); diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h new file mode 100644 index 0000000..884d31c --- /dev/null +++ b/lib/Headers/avxintrin.h @@ -0,0 +1,1156 @@ +/*===---- avxintrin.h - AVX intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avxintrin.h> directly; include <immintrin.h> instead." +#endif + +typedef double __v4df __attribute__ ((__vector_size__ (32))); +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef long long __v4di __attribute__ ((__vector_size__ (32))); +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef char __v32qi __attribute__ ((__vector_size__ (32))); + +typedef float __m256 __attribute__ ((__vector_size__ (32))); +typedef double __m256d __attribute__((__vector_size__(32))); +typedef long long __m256i __attribute__((__vector_size__(32))); + +/* Arithmetic */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_add_pd(__m256d a, __m256d b) +{ + return a+b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_add_ps(__m256 a, __m256 b) +{ + return a+b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_pd(__m256d a, __m256d b) +{ + return a-b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_ps(__m256 a, __m256 b) +{ + return a-b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_addsub_pd(__m256d a, __m256d b) +{ + return (__m256d)__builtin_ia32_addsubpd256((__v4df)a, (__v4df)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_addsub_ps(__m256 a, __m256 b) +{ + return (__m256)__builtin_ia32_addsubps256((__v8sf)a, (__v8sf)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_div_pd(__m256d a, __m256d b) +{ + return a / b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_div_ps(__m256 a, __m256 b) +{ + return a / b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_max_pd(__m256d a, __m256d b) +{ + return (__m256d)__builtin_ia32_maxpd256((__v4df)a, (__v4df)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_max_ps(__m256 a, __m256 b) +{ + return (__m256)__builtin_ia32_maxps256((__v8sf)a, (__v8sf)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_min_pd(__m256d a, __m256d b) +{ + return (__m256d)__builtin_ia32_minpd256((__v4df)a, (__v4df)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_min_ps(__m256 a, __m256 b) +{ + return (__m256)__builtin_ia32_minps256((__v8sf)a, (__v8sf)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_pd(__m256d a, __m256d b) +{ + return a * b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_ps(__m256 a, __m256 b) +{ + return a * b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_sqrt_pd(__m256d a) +{ + return (__m256d)__builtin_ia32_sqrtpd256((__v4df)a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_sqrt_ps(__m256 a) +{ + return (__m256)__builtin_ia32_sqrtps256((__v8sf)a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_rsqrt_ps(__m256 a) +{ + return (__m256)__builtin_ia32_rsqrtps256((__v8sf)a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_rcp_ps(__m256 a) +{ + return (__m256)__builtin_ia32_rcpps256((__v8sf)a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_round_pd(__m256d v, const int m) +{ + return (__m256d)__builtin_ia32_roundpd256((__v4df)v, m); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_round_ps(__m256 v, const int m) +{ + return (__m256)__builtin_ia32_roundps256((__v8sf)v, m); +} + +#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) +#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) +#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) +#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) + +/* Logical */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_and_pd(__m256d a, __m256d b) +{ + return (__m256d)((__v4di)a & (__v4di)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_and_ps(__m256 a, __m256 b) +{ + return (__m256)((__v8si)a & (__v8si)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_andnot_pd(__m256d a, __m256d b) +{ + return (__m256d)(~(__v4di)a & (__v4di)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_andnot_ps(__m256 a, __m256 b) +{ + return (__m256)(~(__v8si)a & (__v8si)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_or_pd(__m256d a, __m256d b) +{ + return (__m256d)((__v4di)a | (__v4di)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_or_ps(__m256 a, __m256 b) +{ + return (__m256)((__v8si)a | (__v8si)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_xor_pd(__m256d a, __m256d b) +{ + return (__m256d)((__v4di)a ^ (__v4di)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_xor_ps(__m256 a, __m256 b) +{ + return (__m256)((__v8si)a ^ (__v8si)b); +} + +/* Horizontal arithmetic */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_pd(__m256d a, __m256d b) +{ + return (__m256d)__builtin_ia32_haddpd256((__v4df)a, (__v4df)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_ps(__m256 a, __m256 b) +{ + return (__m256)__builtin_ia32_haddps256((__v8sf)a, (__v8sf)b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_pd(__m256d a, __m256d b) +{ + return (__m256d)__builtin_ia32_hsubpd256((__v4df)a, (__v4df)b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_ps(__m256 a, __m256 b) +{ + return (__m256)__builtin_ia32_hsubps256((__v8sf)a, (__v8sf)b); +} + +/* Vector permutations */ +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_permutevar_pd(__m128d a, __m128i c) +{ + return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)a, (__v2di)c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar_pd(__m256d a, __m256i c) +{ + return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)a, (__v4di)c); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_permutevar_ps(__m128 a, __m128i c) +{ + return (__m128)__builtin_ia32_vpermilvarps((__v4sf)a, (__v4si)c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar_ps(__m256 a, __m256i c) +{ + return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)a, + (__v8si)c); +} + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_permute_pd(__m128d a, const int c) +{ + return (__m128d)__builtin_ia32_vpermilpd((__v2df)a, c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_permute_pd(__m256d a, const int c) +{ + return (__m256d)__builtin_ia32_vpermilpd256((__v4df)a, c); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_permute_ps(__m128 a, const int c) +{ + return (__m128)__builtin_ia32_vpermilps((__v4sf)a, c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_permute_ps(__m256 a, const int c) +{ + return (__m256)__builtin_ia32_vpermilps256((__v8sf)a, c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_permute2f128_pd(__m256d a, __m256d b, const int c) +{ + return (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)a, (__v4df)b, c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_permute2f128_ps(__m256 a, __m256 b, const int c) +{ + return (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)a, (__v8sf)b, c); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_permute2f128_si256(__m256i a, __m256i b, const int c) +{ + return (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)a, (__v8si)b, c); +} + +/* Vector Blend */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_blend_pd(__m256d a, __m256d b, const int c) +{ + return (__m256d)__builtin_ia32_blendpd256((__v4df)a, (__v4df)b, c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_blend_ps(__m256 a, __m256 b, const int c) +{ + return (__m256)__builtin_ia32_blendps256((__v8sf)a, (__v8sf)b, c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_blendv_pd(__m256d a, __m256d b, __m256d c) +{ + return (__m256d)__builtin_ia32_blendvpd256((__v4df)a, (__v4df)b, (__v4df)c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_blendv_ps(__m256 a, __m256 b, __m256 c) +{ + return (__m256)__builtin_ia32_blendvps256((__v8sf)a, (__v8sf)b, (__v8sf)c); +} + +/* Vector Dot Product */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_dp_ps(__m256 a, __m256 b, const int c) +{ + return (__m256)__builtin_ia32_dpps256((__v8sf)a, (__v8sf)b, c); +} + +/* Vector shuffle */ +#define _mm256_shuffle_ps(a, b, mask) \ + (__builtin_shufflevector((__v8sf)(a), (__v8sf)(b), \ + (mask) & 0x3, ((mask) & 0xc) >> 2, \ + (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \ + (mask) & 0x3 + 4, (((mask) & 0xc) >> 2) + 4, \ + (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12)) + +#define _mm256_shuffle_pd(a, b, mask) \ + (__builtin_shufflevector((__v4df)(a), (__v4df)(b), \ + (mask) & 0x1, \ + (((mask) & 0x2) >> 1) + 4, \ + (((mask) & 0x4) >> 2) + 2, \ + (((mask) & 0x8) >> 3) + 6)) + +/* Compare */ +#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ +#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ +#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ +#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ +#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ +#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ +#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ +#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ +#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ +#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ +#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ +#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ +#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ +#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ +#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ +#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ +#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ +#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ +#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ +#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ +#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ +#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ +#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ +#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ +#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ +#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ +#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ +#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ +#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ +#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ +#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ +#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmp_pd(__m128d a, __m128d b, const int c) +{ + return (__m128d)__builtin_ia32_cmppd((__v2df)a, (__v2df)b, c); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmp_ps(__m128 a, __m128 b, const int c) +{ + return (__m128)__builtin_ia32_cmpps((__v4sf)a, (__v4sf)b, c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_cmp_pd(__m256d a, __m256d b, const int c) +{ + return (__m256d)__builtin_ia32_cmppd256((__v4df)a, (__v4df)b, c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmp_ps(__m256 a, __m256 b, const int c) +{ + return (__m256)__builtin_ia32_cmpps256((__v8sf)a, (__v8sf)b, c); +} + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmp_sd(__m128d a, __m128d b, const int c) +{ + return (__m128d)__builtin_ia32_cmpsd((__v2df)a, (__v2df)b, c); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmp_ss(__m128 a, __m128 b, const int c) +{ + return (__m128)__builtin_ia32_cmpss((__v4sf)a, (__v4sf)b, c); +} + +/* Vector extract */ +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm256_extractf128_pd(__m256d a, const int o) +{ + return (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)a, o); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm256_extractf128_ps(__m256 a, const int o) +{ + return (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)a, o); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_extractf128_si256(__m256i a, const int o) +{ + return (__m128i)__builtin_ia32_vextractf128_si256((__v8si)a, o); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi32(__m256i a, int const imm) +{ + __v8si b = (__v8si)a; + return b[imm]; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi16(__m256i a, int const imm) +{ + __v16hi b = (__v16hi)a; + return b[imm]; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi8(__m256i a, int const imm) +{ + __v32qi b = (__v32qi)a; + return b[imm]; +} + +#ifdef __x86_64__ +static __inline long long __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi64(__m256i a, const int imm) +{ + __v4di b = (__v4di)a; + return b[imm]; +} +#endif + +/* Vector insert */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_insertf128_pd(__m256d a, __m128d b, const int o) +{ + return (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)a, (__v2df)b, o); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_insertf128_ps(__m256 a, __m128 b, const int o) +{ + return (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)a, (__v4sf)b, o); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insertf128_si256(__m256i a, __m128i b, const int o) +{ + return (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)a, (__v4si)b, o); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi32(__m256i a, int b, int const imm) +{ + __v8si c = (__v8si)a; + c[imm & 7] = b; + return (__m256i)c; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi16(__m256i a, int b, int const imm) +{ + __v16hi c = (__v16hi)a; + c[imm & 15] = b; + return (__m256i)c; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi8(__m256i a, int b, int const imm) +{ + __v32qi c = (__v32qi)a; + c[imm & 31] = b; + return (__m256i)c; +} + +#ifdef __x86_64__ +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi64(__m256i a, int b, int const imm) +{ + __v4di c = (__v4di)a; + c[imm & 3] = b; + return (__m256i)c; +} +#endif + +/* Conversion */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi32_pd(__m128i a) +{ + return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi32_ps(__m256i a) +{ + return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) a); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtpd_ps(__m256d a) +{ + return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) a); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtps_epi32(__m256 a) +{ + return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtps_pd(__m128 a) +{ + return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) a); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvttpd_epi32(__m256d a) +{ + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) a); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtpd_epi32(__m256d a) +{ + return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) a); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvttps_epi32(__m256 a) +{ + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) a); +} + +/* Vector replicate */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_movehdup_ps(__m256 a) +{ + return __builtin_shufflevector(a, a, 1, 1, 3, 3, 5, 5, 7, 7); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_moveldup_ps(__m256 a) +{ + return __builtin_shufflevector(a, a, 0, 0, 2, 2, 4, 4, 6, 6); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_movedup_pd(__m256d a) +{ + return __builtin_shufflevector(a, a, 0, 0, 2, 2); +} + +/* Unpack and Interleave */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_pd(__m256d a, __m256d b) +{ + return __builtin_shufflevector(a, b, 1, 5, 1+2, 5+2); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_pd(__m256d a, __m256d b) +{ + return __builtin_shufflevector(a, b, 0, 4, 0+2, 4+2); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_ps(__m256 a, __m256 b) +{ + return __builtin_shufflevector(a, b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_ps(__m256 a, __m256 b) +{ + return __builtin_shufflevector(a, b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); +} + +/* Bit Test */ +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testz_pd(__m128d a, __m128d b) +{ + return __builtin_ia32_vtestzpd((__v2df)a, (__v2df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testc_pd(__m128d a, __m128d b) +{ + return __builtin_ia32_vtestcpd((__v2df)a, (__v2df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testnzc_pd(__m128d a, __m128d b) +{ + return __builtin_ia32_vtestnzcpd((__v2df)a, (__v2df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testz_ps(__m128 a, __m128 b) +{ + return __builtin_ia32_vtestzps((__v4sf)a, (__v4sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testc_ps(__m128 a, __m128 b) +{ + return __builtin_ia32_vtestcps((__v4sf)a, (__v4sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testnzc_ps(__m128 a, __m128 b) +{ + return __builtin_ia32_vtestnzcps((__v4sf)a, (__v4sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_pd(__m256d a, __m256d b) +{ + return __builtin_ia32_vtestzpd256((__v4df)a, (__v4df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_pd(__m256d a, __m256d b) +{ + return __builtin_ia32_vtestcpd256((__v4df)a, (__v4df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_pd(__m256d a, __m256d b) +{ + return __builtin_ia32_vtestnzcpd256((__v4df)a, (__v4df)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_ps(__m256 a, __m256 b) +{ + return __builtin_ia32_vtestzps256((__v8sf)a, (__v8sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_ps(__m256 a, __m256 b) +{ + return __builtin_ia32_vtestcps256((__v8sf)a, (__v8sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_ps(__m256 a, __m256 b) +{ + return __builtin_ia32_vtestnzcps256((__v8sf)a, (__v8sf)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_si256(__m256i a, __m256i b) +{ + return __builtin_ia32_ptestz256((__v4di)a, (__v4di)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_si256(__m256i a, __m256i b) +{ + return __builtin_ia32_ptestc256((__v4di)a, (__v4di)b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_si256(__m256i a, __m256i b) +{ + return __builtin_ia32_ptestnzc256((__v4di)a, (__v4di)b); +} + +/* Vector extract sign mask */ +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_movemask_pd(__m256d a) +{ + return __builtin_ia32_movmskpd256((__v4df)a); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_movemask_ps(__m256 a) +{ + return __builtin_ia32_movmskps256((__v8sf)a); +} + +/* Vector zero */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_zeroall(void) +{ + __builtin_ia32_vzeroall(); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_zeroupper(void) +{ + __builtin_ia32_vzeroupper(); +} + +/* Vector load with broadcast */ +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_broadcast_ss(float const *a) +{ + return (__m128)__builtin_ia32_vbroadcastss(a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_sd(double const *a) +{ + return (__m256d)__builtin_ia32_vbroadcastsd256(a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_ss(float const *a) +{ + return (__m256)__builtin_ia32_vbroadcastss256(a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_pd(__m128d const *a) +{ + return (__m256d)__builtin_ia32_vbroadcastf128_pd256(a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_ps(__m128 const *a) +{ + return (__m256)__builtin_ia32_vbroadcastf128_ps256(a); +} + +/* SIMD load ops */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_load_pd(double const *p) +{ + return *(__m256d *)p; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_load_ps(float const *p) +{ + return *(__m256 *)p; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_pd(double const *p) +{ + return (__m256d)__builtin_ia32_loadupd256(p); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_ps(float const *p) +{ + return (__m256)__builtin_ia32_loadups256(p); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_load_si256(__m256i const *p) +{ + return *p; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_si256(__m256i const *p) +{ + return (__m256i)__builtin_ia32_loaddqu256((char const *)p); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_lddqu_si256(__m256i const *p) +{ + return (__m256i)__builtin_ia32_lddqu256((char const *)p); +} + +/* SIMD store ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_pd(double *p, __m256d a) +{ + *(__m256d *)p = a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_ps(float *p, __m256 a) +{ + *(__m256 *)p = a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_pd(double *p, __m256d a) +{ + __builtin_ia32_storeupd256(p, (__v4df)a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_ps(float *p, __m256 a) +{ + __builtin_ia32_storeups256(p, (__v8sf)a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_si256(__m256i *p, __m256i a) +{ + *p = a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_si256(__m256i *p, __m256i a) +{ + __builtin_ia32_storedqu256((char *)p, (__v32qi)a); +} + +/* Conditional load ops */ +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_pd(double const *p, __m128d m) +{ + return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)p, (__v2df)m); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_pd(double const *p, __m256d m) +{ + return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)p, (__v4df)m); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_ps(float const *p, __m128 m) +{ + return (__m128)__builtin_ia32_maskloadps((const __v4sf *)p, (__v4sf)m); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_ps(float const *p, __m256 m) +{ + return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)p, (__v8sf)m); +} + +/* Conditional store ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_ps(float *p, __m256 m, __m256 a) +{ + __builtin_ia32_maskstoreps256((__v8sf *)p, (__v8sf)m, (__v8sf)a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_pd(double *p, __m128d m, __m128d a) +{ + __builtin_ia32_maskstorepd((__v2df *)p, (__v2df)m, (__v2df)a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_pd(double *p, __m256d m, __m256d a) +{ + __builtin_ia32_maskstorepd256((__v4df *)p, (__v4df)m, (__v4df)a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_ps(float *p, __m128 m, __m128 a) +{ + __builtin_ia32_maskstoreps((__v4sf *)p, (__v4sf)m, (__v4sf)a); +} + +/* Cacheability support ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_si256(__m256i *a, __m256i b) +{ + __builtin_ia32_movntdq256((__v4di *)a, (__v4di)b); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_pd(double *a, __m256d b) +{ + __builtin_ia32_movntpd256(a, (__v4df)b); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_ps(float *p, __m256 a) +{ + __builtin_ia32_movntps256(p, (__v8sf)a); +} + +/* Create vectors */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_set_pd(double a, double b, double c, double d) +{ + return (__m256d){ d, c, b, a }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_set_ps(float a, float b, float c, float d, + float e, float f, float g, float h) +{ + return (__m256){ h, g, f, e, d, c, b, a }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi32(int i0, int i1, int i2, int i3, + int i4, int i5, int i6, int i7) +{ + return (__m256i)(__v8si){ i7, i6, i5, i4, i3, i2, i1, i0 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi16(short w15, short w14, short w13, short w12, + short w11, short w10, short w09, short w08, + short w07, short w06, short w05, short w04, + short w03, short w02, short w01, short w00) +{ + return (__m256i)(__v16hi){ w00, w01, w02, w03, w04, w05, w06, w07, + w08, w09, w10, w11, w12, w13, w14, w15 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi8(char b31, char b30, char b29, char b28, + char b27, char b26, char b25, char b24, + char b23, char b22, char b21, char b20, + char b19, char b18, char b17, char b16, + char b15, char b14, char b13, char b12, + char b11, char b10, char b09, char b08, + char b07, char b06, char b05, char b04, + char b03, char b02, char b01, char b00) +{ + return (__m256i)(__v32qi){ + b00, b01, b02, b03, b04, b05, b06, b07, + b08, b09, b10, b11, b12, b13, b14, b15, + b16, b17, b18, b19, b20, b21, b22, b23, + b24, b25, b26, b27, b28, b29, b30, b31 + }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi64x(long long a, long long b, long long c, long long d) +{ + return (__m256i)(__v4di){ d, c, b, a }; +} + +/* Create vectors with elements in reverse order */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_pd(double a, double b, double c, double d) +{ + return (__m256d){ a, b, c, d }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_ps(float a, float b, float c, float d, + float e, float f, float g, float h) +{ + return (__m256){ a, b, c, d, e, f, g, h }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi32(int i0, int i1, int i2, int i3, + int i4, int i5, int i6, int i7) +{ + return (__m256i)(__v8si){ i0, i1, i2, i3, i4, i5, i6, i7 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi16(short w15, short w14, short w13, short w12, + short w11, short w10, short w09, short w08, + short w07, short w06, short w05, short w04, + short w03, short w02, short w01, short w00) +{ + return (__m256i)(__v16hi){ w15, w14, w13, w12, w11, w10, w09, w08, + w07, w06, w05, w04, w03, w02, w01, w00 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi8(char b31, char b30, char b29, char b28, + char b27, char b26, char b25, char b24, + char b23, char b22, char b21, char b20, + char b19, char b18, char b17, char b16, + char b15, char b14, char b13, char b12, + char b11, char b10, char b09, char b08, + char b07, char b06, char b05, char b04, + char b03, char b02, char b01, char b00) +{ + return (__m256i)(__v32qi){ + b31, b30, b29, b28, b27, b26, b25, b24, + b23, b22, b21, b20, b19, b18, b17, b16, + b15, b14, b13, b12, b11, b10, b09, b08, + b07, b06, b05, b04, b03, b02, b01, b00 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi64x(long long a, long long b, long long c, long long d) +{ + return (__m256i)(__v4di){ a, b, c, d }; +} + +/* Create vectors with repeated elements */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_pd(double w) +{ + return (__m256d){ w, w, w, w }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_ps(float w) +{ + return (__m256){ w, w, w, w, w, w, w, w }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi32(int i) +{ + return (__m256i)(__v8si){ i, i, i, i, i, i, i, i }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi16(short w) +{ + return (__m256i)(__v16hi){ w, w, w, w, w, w, w, w, w, w, w, w, w, w, w, w }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi8(char b) +{ + return (__m256i)(__v32qi){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi64x(long long q) +{ + return (__m256i)(__v4di){ q, q, q, q }; +} + +/* Create zeroed vectors */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_pd(void) +{ + return (__m256d){ 0, 0, 0, 0 }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_ps(void) +{ + return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_si256(void) +{ + return (__m256i){ 0LL, 0LL, 0LL, 0LL }; +} + +/* Cast between vector types */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd_ps(__m256d in) +{ + return (__m256)in; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd_si256(__m256d in) +{ + return (__m256i)in; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castps_pd(__m256 in) +{ + return (__m256d)in; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castps_si256(__m256 in) +{ + return (__m256i)in; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_ps(__m256i in) +{ + return (__m256)in; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_pd(__m256i in) +{ + return (__m256d)in; +} + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd256_pd128(__m256d in) +{ + return __builtin_shufflevector(in, in, 0, 1); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm256_castps256_ps128(__m256 in) +{ + return __builtin_shufflevector(in, in, 0, 1, 2, 3); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_si128(__m256i in) +{ + return __builtin_shufflevector(in, in, 0, 1); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd128_pd256(__m128d in) +{ + __m128d zero = _mm_setzero_pd(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 2); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castps128_ps256(__m128 in) +{ + __m128 zero = _mm_setzero_ps(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 3, 4, 4, 4, 4); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi128_si256(__m128i in) +{ + __m128i zero = _mm_setzero_si128(); + return __builtin_shufflevector(in, zero, 0, 1, 2, 2); +} diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index f297f36..e5dfe26 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -1,4 +1,4 @@ -/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== +/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -20,7 +20,7 @@ * *===-----------------------------------------------------------------------=== */ - + #ifndef __EMMINTRIN_H #define __EMMINTRIN_H @@ -33,6 +33,9 @@ typedef double __m128d __attribute__((__vector_size__(16))); typedef long long __m128i __attribute__((__vector_size__(16))); +/* Type defines. */ +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); typedef short __v8hi __attribute__((__vector_size__(16))); typedef char __v16qi __attribute__((__vector_size__(16))); @@ -1194,7 +1197,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__)) _mm_extract_epi16(__m128i a, int imm) { __v8hi b = (__v8hi)a; - return b[imm]; + return (unsigned short)b[imm]; } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h new file mode 100644 index 0000000..a19deaa --- /dev/null +++ b/lib/Headers/immintrin.h @@ -0,0 +1,59 @@ +/*===---- immintrin.h - Intel intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#define __IMMINTRIN_H + +#ifdef __MMX__ +#include <mmintrin.h> +#endif + +#ifdef __SSE__ +#include <xmmintrin.h> +#endif + +#ifdef __SSE2__ +#include <emmintrin.h> +#endif + +#ifdef __SSE3__ +#include <pmmintrin.h> +#endif + +#ifdef __SSSE3__ +#include <tmmintrin.h> +#endif + +#if defined (__SSE4_2__) || defined (__SSE4_1__) +#include <smmintrin.h> +#endif + +#if defined (__AES__) || defined (__PCLMUL__) +#include <wmmintrin.h> +#endif + +#ifdef __AVX__ +#include <avxintrin.h> +#endif + +#endif /* __IMMINTRIN_H */ diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index 401d8a7..bad9e1c 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -443,6 +443,64 @@ _mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 }; } + +/* Aliases for compatibility. */ +#define _m_empty _mm_empty +#define _m_from_int _mm_cvtsi32_si64 +#define _m_to_int _mm_cvtsi64_si32 +#define _m_packsswb _mm_packs_pi16 +#define _m_packssdw _mm_packs_pi32 +#define _m_packuswb _mm_packs_pu16 +#define _m_punpckhbw _mm_unpackhi_pi8 +#define _m_punpckhwd _mm_unpackhi_pi16 +#define _m_punpckhdq _mm_unpackhi_pi32 +#define _m_punpcklbw _mm_unpacklo_pi8 +#define _m_punpcklwd _mm_unpacklo_pi16 +#define _m_punpckldq _mm_unpacklo_pi32 +#define _m_paddb _mm_add_pi8 +#define _m_paddw _mm_add_pi16 +#define _m_paddd _mm_add_pi32 +#define _m_paddsb _mm_adds_pi8 +#define _m_paddsw _mm_adds_pi16 +#define _m_paddusb _mm_adds_pu8 +#define _m_paddusw _mm_adds_pu16 +#define _m_psubb _mm_sub_pi8 +#define _m_psubw _mm_sub_pi16 +#define _m_psubd _mm_sub_pi32 +#define _m_psubsb _mm_subs_pi8 +#define _m_psubsw _mm_subs_pi16 +#define _m_psubusb _mm_subs_pu8 +#define _m_psubusw _mm_subs_pu16 +#define _m_pmaddwd _mm_madd_pi16 +#define _m_pmulhw _mm_mulhi_pi16 +#define _m_pmullw _mm_mullo_pi16 +#define _m_psllw _mm_sll_pi16 +#define _m_psllwi _mm_slli_pi16 +#define _m_pslld _mm_sll_pi32 +#define _m_pslldi _mm_slli_pi32 +#define _m_psllq _mm_sll_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psraw _mm_sra_pi16 +#define _m_psrawi _mm_srai_pi16 +#define _m_psrad _mm_sra_pi32 +#define _m_psradi _mm_srai_pi32 +#define _m_psrlw _mm_srl_pi16 +#define _m_psrlwi _mm_srli_pi16 +#define _m_psrld _mm_srl_pi32 +#define _m_psrldi _mm_srli_pi32 +#define _m_psrlq _mm_srl_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_pand _mm_and_si64 +#define _m_pandn _mm_andnot_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_pcmpeqb _mm_cmpeq_pi8 +#define _m_pcmpeqw _mm_cmpeq_pi16 +#define _m_pcmpeqd _mm_cmpeq_pi32 +#define _m_pcmpgtb _mm_cmpgt_pi8 +#define _m_pcmpgtw _mm_cmpgt_pi16 +#define _m_pcmpgtd _mm_cmpgt_pi32 + #endif /* __MMX__ */ #endif /* __MMINTRIN_H */ diff --git a/lib/Headers/nmmintrin.h b/lib/Headers/nmmintrin.h index cc213ce..f12622d 100644 --- a/lib/Headers/nmmintrin.h +++ b/lib/Headers/nmmintrin.h @@ -1,25 +1,25 @@ -/*===---- nmmintrin.h - SSE intrinsics -------------------------------------=== -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to deal -* in the Software without restriction, including without limitation the rights -* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -* THE SOFTWARE. -* -*===-----------------------------------------------------------------------=== -*/ +/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ #ifndef _NMMINTRIN_H #define _NMMINTRIN_H diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h index 4b0d9e7..2b8b321 100644 --- a/lib/Headers/smmintrin.h +++ b/lib/Headers/smmintrin.h @@ -30,10 +30,6 @@ #include <tmmintrin.h> -/* Type defines. */ -typedef double __v2df __attribute__ ((__vector_size__ (16))); -typedef long long __v2di __attribute__ ((__vector_size__ (16))); - /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 @@ -213,11 +209,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) __a;})) #endif /* __x86_64__ */ -/* Extract int from packed integer array at index. */ +/* Extract int from packed integer array at index. This returns the element + * as a zero extended value, so it is unsigned. + */ #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - __a[N];})) + (unsigned char)__a[N];})) #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[N];})) + (unsigned)__a[N];})) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ __a[N];})) diff --git a/lib/Headers/stddef.h b/lib/Headers/stddef.h index b1d0d52..84ec1a7 100644 --- a/lib/Headers/stddef.h +++ b/lib/Headers/stddef.h @@ -34,12 +34,13 @@ typedef __typeof__(sizeof(int)) size_t; #ifndef __cplusplus #ifndef _WCHAR_T #define _WCHAR_T -typedef __typeof__(*L"") wchar_t; +typedef __WCHAR_TYPE__ wchar_t; #endif #endif #undef NULL #ifdef __cplusplus +#undef __null // VC++ hack. #define NULL __null #else #define NULL ((void*)0) diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h new file mode 100644 index 0000000..e5e7a6a --- /dev/null +++ b/lib/Headers/x86intrin.h @@ -0,0 +1,31 @@ +/*===---- x86intrin.h - X86 intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#define __X86INTRIN_H + +#include <immintrin.h> + +// FIXME: SSE4A, 3dNOW, FMA4, XOP, LWP, ABM, POPCNT + +#endif /* __X86INTRIN_H */ diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h index 75e06b5..8363b45 100644 --- a/lib/Headers/xmmintrin.h +++ b/lib/Headers/xmmintrin.h @@ -416,6 +416,12 @@ _mm_cvtps_pi32(__m128 a) return (__m64)__builtin_ia32_cvtps2pi(a); } +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_ps2pi(__m128 a) +{ + return _mm_cvtps_pi32(a); +} + static __inline__ int __attribute__((__always_inline__, __nodebug__)) _mm_cvttss_si32(__m128 a) { @@ -440,6 +446,12 @@ _mm_cvttps_pi32(__m128 a) return (__m64)__builtin_ia32_cvttps2pi(a); } +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtt_ps2pi(__m128 a) +{ + return _mm_cvttps_pi32(a); +} + static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cvtsi32_ss(__m128 a, int b) { @@ -447,6 +459,12 @@ _mm_cvtsi32_ss(__m128 a, int b) return a; } +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_si2ss(__m128 a, int b) +{ + return _mm_cvtsi32_ss(a, b); +} + #ifdef __x86_64__ static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -464,6 +482,12 @@ _mm_cvtpi32_ps(__m128 a, __m64 b) return __builtin_ia32_cvtpi2ps(a, (__v2si)b); } +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_pi2ps(__m128 a, __m64 b) +{ + return _mm_cvtpi32_ps(a, b); +} + static __inline__ float __attribute__((__always_inline__, __nodebug__)) _mm_cvtss_f32(__m128 a) { @@ -590,6 +614,12 @@ _mm_store1_ps(float *p, __m128 a) } static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_ps1(float *p, __m128 a) +{ + return _mm_store1_ps(p, a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_store_ps(float *p, __m128 a) { *(__m128 *)p = a; @@ -602,9 +632,9 @@ _mm_storer_ps(float *p, __m128 a) _mm_store_ps(p, a); } -#define _MM_HINT_T0 1 +#define _MM_HINT_T0 3 #define _MM_HINT_T1 2 -#define _MM_HINT_T2 3 +#define _MM_HINT_T2 1 #define _MM_HINT_NTA 0 /* FIXME: We have to #define this because "sel" must be a constant integer, and @@ -908,6 +938,23 @@ do { \ (row3) = _mm_movehl_ps(tmp3, tmp1); \ } while (0) +/* Aliases for compatibility. */ +#define _m_pextrw _mm_extract_pi16 +#define _m_pinsrw _mm_insert_pi16 +#define _m_pmaxsw _mm_max_pi16 +#define _m_pmaxub _mm_max_pu8 +#define _m_pminsw _mm_min_pi16 +#define _m_pminub _mm_min_pu8 +#define _m_pmovmskb _mm_movemask_pi8 +#define _m_pmulhuw _mm_mulhi_pu16 +#define _m_pshufw _mm_shuffle_pi16 +#define _m_maskmovq _mm_maskmove_si64 +#define _m_pavgb _mm_avg_pu8 +#define _m_pavgw _mm_avg_pu16 +#define _m_psadbw _mm_sad_pu8 +#define _m_ _mm_ +#define _m_ _mm_ + /* Ugly hack for backwards-compatibility (compatible with gcc) */ #ifdef __SSE2__ #include <emmintrin.h> |