meta/recipes-devtools/gcc/gcc-4.6.0/gcc-4_6-branch-backports/0138-PR-tree-optimization-48616.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

From 45a8b16e2be6b6ff5d37d43e86a6e2fce5cfb79d Mon Sep 17 00:00:00 2001
From: jakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon, 18 Apr 2011 07:38:11 +0000
Subject: [PATCH 138/200] 	PR tree-optimization/48616
 	* tree-vect-stmts.c (vectorizable_shift): If SLP, determine
 	whether the shift is by scalar or vector based on whether all SLP
 	scalar stmts have the same rhs.

	* gcc.dg/pr48616.c: New test.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_6-branch@172640 138bc75d-0d04-0410-961f-82ee72b054a4

index cfba894..ddb0a36 100644
new file mode 100644
index 0000000..8c8ec2c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr48616.c
@@ -0,0 +1,134 @@
+/* PR tree-optimization/48616 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+extern void abort (void);
+int a[4] __attribute__((aligned (32)));
+int b[4] __attribute__((aligned (32)));
+int c[4] __attribute__((aligned (32)));
+int d[4] __attribute__((aligned (32)));
+int e[4] __attribute__((aligned (32)));
+
+__attribute__((noinline, noclone))
+int
+foo (int x)
+{
+  asm ("" : "+r" (x));
+  return x;
+}
+
+__attribute__((noinline, noclone))
+void
+fn1 (int i)
+{
+  a[0] = b[0] << c[0];
+  a[1] = b[1] << c[1];
+  a[2] = b[2] << c[2];
+  a[3] = b[3] << c[3];
+  if (i)
+    {
+      d[0] = e[0] >> c[0];
+      d[1] = e[1] >> c[1];
+      d[2] = e[2] >> c[2];
+      d[3] = e[3] >> c[3];
+    }
+}
+
+__attribute__((noinline, noclone))
+void
+fn2 (int i)
+{
+  a[0] = b[0] << 1;
+  a[1] = b[1] << 2;
+  a[2] = b[2] << 3;
+  a[3] = b[3] << 4;
+  if (i)
+    {
+      d[0] = e[0] >> 1;
+      d[1] = e[1] >> 2;
+      d[2] = e[2] >> 3;
+      d[3] = e[3] >> 4;
+    }
+}
+
+__attribute__((noinline, noclone))
+void
+fn3 (int i, int j)
+{
+  int x = foo (j);
+  a[0] = b[0] << x;
+  a[1] = b[1] << x;
+  a[2] = b[2] << x;
+  a[3] = b[3] << x;
+  if (i)
+    {
+      d[0] = e[0] >> x;
+      d[1] = e[1] >> x;
+      d[2] = e[2] >> x;
+      d[3] = e[3] >> x;
+    }
+}
+
+__attribute__((noinline, noclone))
+void
+fn4 (int i)
+{
+  a[0] = b[0] << 1;
+  a[1] = b[1] << 1;
+  a[2] = b[2] << 1;
+  a[3] = b[3] << 1;
+  if (i)
+    {
+      d[0] = e[0] >> 1;
+      d[1] = e[1] >> 1;
+      d[2] = e[2] >> 1;
+      d[3] = e[3] >> 1;
+    }
+}
+
+int
+main ()
+{
+  int i;
+  int *t;
+  for (i = 0; i < 4; i++)
+    {
+      b[i] = 32;
+      c[i] = i + 1;
+      e[i] = 32;
+    }
+  asm volatile ("" : : "r" (b) : "memory");
+  asm volatile ("" : : "r" (c) : "memory");
+  asm volatile ("" : "=r" (t) : "0" (d) : "memory");
+  fn1 (t != 0);
+  for (i = 0; i < 4; i++)
+    {
+      if (a[i] != (32 << (i + 1)) || d[i] != (32 >> (i + 1)))
+	abort ();
+      a[i] = 0;
+      d[i] = 0;
+    }
+  fn2 (t != 0);
+  for (i = 0; i < 4; i++)
+    {
+      if (a[i] != (32 << (i + 1)) || d[i] != (32 >> (i + 1)))
+	abort ();
+      a[i] = 0;
+      d[i] = 0;
+    }
+  fn3 (t != 0, t != 0);
+  for (i = 0; i < 4; i++)
+    {
+      if (a[i] != (32 << 1) || d[i] != (32 >> 1))
+	abort ();
+      a[i] = 0;
+      d[i] = 0;
+    }
+  fn4 (t != 0);
+  for (i = 0; i < 4; i++)
+    {
+      if (a[i] != (32 << 1) || d[i] != (32 >> 1))
+	abort ();
+    }
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b347925..5685cde 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2077,7 +2077,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
   tree vop0, vop1;
   unsigned int k;
-  bool scalar_shift_arg = false;
+  bool scalar_shift_arg = true;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   int vf;
 
@@ -2159,8 +2159,34 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
   /* Determine whether the shift amount is a vector, or scalar.  If the
      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
 
+  if (dt[1] == vect_internal_def && !slp_node)
+    scalar_shift_arg = false;
+  else if (dt[1] == vect_constant_def
+	   || dt[1] == vect_external_def
+	   || dt[1] == vect_internal_def)
+    {
+      /* In SLP, need to check whether the shift count is the same,
+	 in loops if it is a constant or invariant, it is always
+	 a scalar shift.  */
+      if (slp_node)
+	{
+	  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+	  gimple slpstmt;
+
+	  FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
+	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
+	      scalar_shift_arg = false;
+	}
+    }
+  else
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "operand mode requires invariant argument.");
+      return false;
+    }
+
   /* Vector shifted by vector.  */
-  if (dt[1] == vect_internal_def)
+  if (!scalar_shift_arg)
     {
       optab = optab_for_tree_code (code, vectype, optab_vector);
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -2168,13 +2194,12 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
     }
   /* See if the machine has a vector shifted by scalar insn and if not
      then see if it has a vector shifted by vector insn.  */
-  else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
+  else
     {
       optab = optab_for_tree_code (code, vectype, optab_scalar);
       if (optab
           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
         {
-          scalar_shift_arg = true;
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "vector/scalar shift/rotate found.");
         }
@@ -2185,6 +2210,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
                && (optab_handler (optab, TYPE_MODE (vectype))
                       != CODE_FOR_nothing))
             {
+	      scalar_shift_arg = false;
+
               if (vect_print_dump_info (REPORT_DETAILS))
                 fprintf (vect_dump, "vector/vector shift/rotate found.");
 
@@ -2197,12 +2224,6 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
             }
         }
     }
-  else
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "operand mode requires invariant argument.");
-      return false;
-    }
 
   /* Supportable by target?  */
   if (!optab)
-- 
1.7.0.4