test/CodeGen/avx-builtins.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s

// Don't include mm_malloc.h, it's system specific.
#define __MM_MALLOC_H

#include <immintrin.h>

//
// Test LLVM IR codegen of shuffle instructions
//

__m256 test__mm256_loadu_ps(void* p) {
  // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1
  return _mm256_loadu_ps(p);
}

__m256d test__mm256_loadu_pd(void* p) {
  // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1
  return _mm256_loadu_pd(p);
}

__m256i test__mm256_loadu_si256(void* p) {
  // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1
  return _mm256_loadu_si256(p);
}

__m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestrm128
  return _mm_cmpestrm(A, LA, B, LB, 7);
}

int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestri128
  return _mm_cmpestri(A, LA, B, LB, 7);
}

int test_mm_cmpestra(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestria128
  return _mm_cmpestra(A, LA, B, LB, 7);
}

int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestric128
  return _mm_cmpestrc(A, LA, B, LB, 7);
}

int test_mm_cmpestro(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestrio128
  return _mm_cmpestro(A, LA, B, LB, 7);
}

int test_mm_cmpestrs(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestris128
  return _mm_cmpestrs(A, LA, B, LB, 7);
}

int test_mm_cmpestrz(__m128i A, int LA, __m128i B, int LB) {
  // CHECK: @llvm.x86.sse42.pcmpestriz128
  return _mm_cmpestrz(A, LA, B, LB, 7);
}

__m128i test_mm_cmpistrm(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistrm128
  return _mm_cmpistrm(A, B, 7);
}

int test_mm_cmpistri(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistri128
  return _mm_cmpistri(A, B, 7);
}

int test_mm_cmpistra(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistria128
  return _mm_cmpistra(A, B, 7);
}

int test_mm_cmpistrc(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistric128
  return _mm_cmpistrc(A, B, 7);
}

int test_mm_cmpistro(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistrio128
  return _mm_cmpistro(A, B, 7);
}

int test_mm_cmpistrs(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistris128
  return _mm_cmpistrs(A, B, 7);
}

int test_mm_cmpistrz(__m128i A, __m128i B) {
  // CHECK: @llvm.x86.sse42.pcmpistriz128
  return _mm_cmpistrz(A, B, 7);
}

int test_extract_epi32(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi32
  // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7
  // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]
  return _mm256_extract_epi32(__a, 8);
}

int test_extract_epi16(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi16
  // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15
  // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]
  return _mm256_extract_epi16(__a, 16);
}

int test_extract_epi8(__m256i __a) {
  // CHECK-LABEL: @test_extract_epi8
  // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31
  // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]
  return _mm256_extract_epi8(__a, 32);
}

__m256d test_256_blend_pd(__m256d __a, __m256d __b) {
  // CHECK-LABEL: @test_256_blend_pd
  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  return _mm256_blend_pd(__a, __b, 0x35);
}

__m256 test_256_blend_ps(__m256 __a, __m256 __b) {
  // CHECK-LABEL: @test_256_blend_ps
  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
  return _mm256_blend_ps(__a, __b, 0x35);
}

__m256i test_256_insert_epi8(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi8
  // CHECK: insertelement <32 x i8> {{.*}}, i8 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi8(__a, 42, 3);
}

__m256i test_256_insert_epi16(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi16
  // CHECK: insertelement <16 x i16> {{.*}}, i16 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi16(__a, 42, 3);
}

__m256i test_256_insert_epi32(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi32
  // CHECK: insertelement <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi32(__a, 42, 3);
}

__m256i test_256_insert_epi64(__m256i __a) {
  // CHECK-LABEL: @test_256_insert_epi64
  // CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}}
  return _mm256_insert_epi64(__a, 42, 3);
}

__m256 test_mm256_undefined_ps() {
  // CHECK-LABEL: @test_mm256_undefined_ps
  // CHECK: ret <8 x float> undef
  return _mm256_undefined_ps();
}

__m256d test_mm256_undefined_pd() {
  // CHECK-LABEL: @test_mm256_undefined_pd
  // CHECK: ret <4 x double> undef
  return _mm256_undefined_pd();
}

__m256i test_mm256_undefined_si256() {
  // CHECK-LABEL: @test_mm256_undefined_si256
  // CHECK: ret <4 x i64> undef
  return _mm256_undefined_si256();
}