1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package other;
18 
19 /**
20  * Tests for dot product idiom vectorization: byte case.
21  */
22 public class TestByte {
23 
24   public static final int ARRAY_SIZE = 1024;
25 
26   /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before)
27   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
28   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
29   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
30   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
31   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
32   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
33   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
34   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
35   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
36 
37   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after)
38   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
39   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
40   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
41   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
42   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
43   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
44   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
45   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
46   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none
47   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
48   //
49   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
50   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdSimple(byte[] a, byte[] b)51   public static final int testDotProdSimple(byte[] a, byte[] b) {
52     int s = 1;
53     for (int i = 0; i < b.length; i++) {
54       int temp = a[i] * b[i];
55       s += temp;
56     }
57     return s - 1;
58   }
59 
60   /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before)
61   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
62   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
63   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
64   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
65   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
66   /// CHECK-DAG: <<AddC1:i\d+>>   Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
67   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC1>>]                            loop:<<Loop>>      outer_loop:none
68   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
69   /// CHECK-DAG: <<AddC2:i\d+>>   Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
70   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddC2>>]                            loop:<<Loop>>      outer_loop:none
71   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
72   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
73   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
74 
75   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after)
76   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
77   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
78   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
79   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
80   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
81   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
82   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
83   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
84   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
85   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
86   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
87   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
88   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
89   //
90   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
91   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplex(byte[] a, byte[] b)92   public static final int testDotProdComplex(byte[] a, byte[] b) {
93     int s = 1;
94     for (int i = 0; i < b.length; i++) {
95       int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1));
96       s += temp;
97     }
98     return s - 1;
99   }
100 
101   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before)
102   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
103   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
104   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
105   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
106   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
107   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
108   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
109   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
110   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
111 
112   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after)
113   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
114   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
115   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
116   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
117   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
118   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
119   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
120   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
121   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
122   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
123   //
124   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
125   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdSimpleUnsigned(byte[] a, byte[] b)126   public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) {
127     int s = 1;
128     for (int i = 0; i < b.length; i++) {
129       int temp = (a[i] & 0xff) * (b[i] & 0xff);
130       s += temp;
131     }
132     return s - 1;
133   }
134 
135   /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before)
136   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
137   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
138   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
139   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
140   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
141   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
142   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
143   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
144   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
145   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
146   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
147   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
148   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
149 
150   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after)
151   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
152   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
153   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
154   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
155   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
156   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
157   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
158   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
159   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
160   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
161   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
162   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
163   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
164   //
165   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
166   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexUnsigned(byte[] a, byte[] b)167   public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) {
168     int s = 1;
169     for (int i = 0; i < b.length; i++) {
170       int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff);
171       s += temp;
172     }
173     return s - 1;
174   }
175 
176   /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before)
177   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
178   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
179   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
180   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
181   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
182   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
183   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
184   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
185   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
186   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
187   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
188   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
189   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
190 
191   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after)
192   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
193   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
194   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
195   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
196   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
197   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
198   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
199   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
200   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
201   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
202   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
203   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
204   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
205   //
206   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
207   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b)208   public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) {
209     int s = 1;
210     for (int i = 0; i < b.length; i++) {
211       int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1));
212       s += temp;
213     }
214     return s - 1;
215   }
216 
217   /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before)
218   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
219   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
220   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
221   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
222   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
223   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
224   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
225   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
226   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
227   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
228   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
229   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
230   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
231 
232   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after)
233   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
234   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
235   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
236   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
237   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
238   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
239   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
240   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
241   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
242   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
243   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
244   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
245   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
246   //
247   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
248   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b)249   public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) {
250     int s = 1;
251     for (int i = 0; i < b.length; i++) {
252       int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff);
253       s += temp;
254     }
255     return s - 1;
256   }
257 
258   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after)
259   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdSignedWidening(byte[] a, byte[] b)260   public static final int testDotProdSignedWidening(byte[] a, byte[] b) {
261     int s = 1;
262     for (int i = 0; i < b.length; i++) {
263       int temp = ((short)(a[i])) * ((short)(b[i]));
264       s += temp;
265     }
266     return s - 1;
267   }
268 
269   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after)
270   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdParamSigned(int x, byte[] b)271   public static final int testDotProdParamSigned(int x, byte[] b) {
272     int s = 1;
273     for (int i = 0; i < b.length; i++) {
274       int temp = (byte)(x) * b[i];
275       s += temp;
276     }
277     return s - 1;
278   }
279 
280   /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after)
281   /// CHECK-DAG:                  VecDotProd type:Uint8
testDotProdParamUnsigned(int x, byte[] b)282   public static final int testDotProdParamUnsigned(int x, byte[] b) {
283     int s = 1;
284     for (int i = 0; i < b.length; i++) {
285       int temp = (x & 0xff) * (b[i] & 0xff);
286       s += temp;
287     }
288     return s - 1;
289   }
290 
291   // No DOTPROD cases.
292 
293   /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after)
294   /// CHECK-NOT:                  VecDotProd
testDotProdIntParam(int x, byte[] b)295   public static final int testDotProdIntParam(int x, byte[] b) {
296     int s = 1;
297     for (int i = 0; i < b.length; i++) {
298       int temp = b[i] * (x);
299       s += temp;
300     }
301     return s - 1;
302   }
303 
304   /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after)
305   /// CHECK-NOT:                  VecDotProd
testDotProdSignedToChar(byte[] a, byte[] b)306   public static final int testDotProdSignedToChar(byte[] a, byte[] b) {
307     int s = 1;
308     for (int i = 0; i < b.length; i++) {
309       int temp = ((char)(a[i])) * ((char)(b[i]));
310       s += temp;
311     }
312     return s - 1;
313   }
314 
315   // Cases when result of Mul is type-converted are not supported.
316 
317   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after)
318   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b)319   public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) {
320     int s = 1;
321     for (int i = 0; i < b.length; i++) {
322       byte temp = (byte)(a[i] * b[i]);
323       s += temp;
324     }
325     return s - 1;
326   }
327 
328   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after)
329   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b)330   public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) {
331     int s = 1;
332     for (int i = 0; i < b.length; i++) {
333       s += (a[i] * b[i]) & 0xff;
334     }
335     return s - 1;
336   }
337 
338   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after)
339   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b)340   public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) {
341     int s = 1;
342     for (int i = 0; i < b.length; i++) {
343       byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff));
344       s += temp;
345     }
346     return s - 1;
347   }
348 
349   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after)
350   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b)351   public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) {
352     int s = 1;
353     for (int i = 0; i < b.length; i++) {
354       s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff;
355     }
356     return s - 1;
357   }
358 
359   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after)
360   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToShort(byte[] a, byte[] b)361   public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) {
362     int s = 1;
363     for (int i = 0; i < b.length; i++) {
364       short temp = (short)(a[i] * b[i]);
365       s += temp;
366     }
367     return s - 1;
368   }
369 
370   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after)
371   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToChar(byte[] a, byte[] b)372   public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) {
373     int s = 1;
374     for (int i = 0; i < b.length; i++) {
375       char temp = (char)(a[i] * b[i]);
376       s += temp;
377     }
378     return s - 1;
379   }
380 
381   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after)
382   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b)383   public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) {
384     int s = 1;
385     for (int i = 0; i < b.length; i++) {
386       short temp = (short)((a[i] & 0xff) * (b[i] & 0xff));
387       s += temp;
388     }
389     return s - 1;
390   }
391 
392   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after)
393   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b)394   public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) {
395     int s = 1;
396     for (int i = 0; i < b.length; i++) {
397       char temp = (char)((a[i] & 0xff) * (b[i] & 0xff));
398       s += temp;
399     }
400     return s - 1;
401   }
402 
403   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after)
404   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b)405   public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) {
406     int s = 1;
407     for (int i = 0; i < b.length; i++) {
408       long temp = (long)((a[i] & 0xff) * (b[i] & 0xff));
409       s += temp;
410     }
411     return s - 1;
412   }
413 
414   /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after)
415   /// CHECK-NOT:                  VecDotProd
testDotProdUnsignedSigned(byte[] a, byte[] b)416   public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) {
417     int s = 1;
418     for (int i = 0; i < b.length; i++) {
419       int temp = (a[i] & 0xff) * b[i];
420       s += temp;
421     }
422     return s - 1;
423   }
424 
expectEquals(int expected, int result)425   private static void expectEquals(int expected, int result) {
426     if (expected != result) {
427       throw new Error("Expected: " + expected + ", found: " + result);
428     }
429   }
430 
testDotProd(byte[] b1, byte[] b2, int[] results)431   private static void testDotProd(byte[] b1, byte[] b2, int[] results) {
432     expectEquals(results[0], testDotProdSimple(b1, b2));
433     expectEquals(results[1], testDotProdComplex(b1, b2));
434     expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2));
435     expectEquals(results[3], testDotProdComplexUnsigned(b1, b2));
436     expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2));
437     expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2));
438     expectEquals(results[6], testDotProdSignedWidening(b1, b2));
439     expectEquals(results[7], testDotProdParamSigned(-128, b2));
440     expectEquals(results[8], testDotProdParamUnsigned(-128, b2));
441     expectEquals(results[9], testDotProdIntParam(-128, b2));
442     expectEquals(results[10], testDotProdSignedToChar(b1, b2));
443     expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2));
444     expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2));
445     expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2));
446     expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2));
447     expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2));
448     expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2));
449     expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2));
450     expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2));
451     expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2));
452     expectEquals(results[20], testDotProdUnsignedSigned(b1, b2));
453   }
454 
run()455   public static void run() {
456     byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
457     byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
458     int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024,
459                         64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 };
460     testDotProd(b1_1, b2_1, results_1);
461 
462     byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
463     byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
464     int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280,
465                         80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 };
466     testDotProd(b1_2, b2_2, results_2);
467 
468     byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
469     byte[] b2_3 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 };
470     int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280,
471                         41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 };
472     testDotProd(b1_3, b2_3, results_3);
473 
474     byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
475     byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
476     int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920,
477                        -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 };
478     testDotProd(b1_4, b2_4, results_4);
479   }
480 
main(String[] args)481   public static void main(String[] args) {
482     run();
483   }
484 }
485