Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
misc.cpp
Go to the documentation of this file.
1#include <stdio.h>
2#include <string.h>
3#include <string>
4#include <xbyak/xbyak.h>
5#include <cybozu/inttype.hpp>
6#include <cybozu/test.hpp>
7
8using namespace Xbyak;
9
11{
12 struct Code : Xbyak::CodeGenerator {
14 {
15 setSize(4095);
16 db(1);
17 size_t size = getSize();
18 CYBOZU_TEST_EQUAL(size, 4096u);
21 }
22 } code;
23}
24
25CYBOZU_TEST_AUTO(compOperand)
26{
27 using namespace Xbyak::util;
28 CYBOZU_TEST_ASSERT(eax == eax);
29 CYBOZU_TEST_ASSERT(ecx != xmm0);
30 CYBOZU_TEST_ASSERT(ptr[eax] == ptr[eax]);
31 CYBOZU_TEST_ASSERT(dword[eax] != ptr[eax]);
32 CYBOZU_TEST_ASSERT(ptr[eax] != ptr[eax+3]);
33}
34
36{
37 struct Code : Xbyak::CodeGenerator {
38 Code()
39 {
40 const struct {
41 uint64_t v;
42 int bit;
43 bool error;
44 } tbl[] = {
45 { uint64_t(-1), 8, false },
46 { 0x12, 8, false },
47 { 0x80, 8, false },
48 { 0xff, 8, false },
49 { 0x100, 8, true },
50
51 { 1, 16, false },
52 { uint64_t(-1), 16, false },
53 { 0x7fff, 16, false },
54 { 0xffff, 16, false },
55 { 0x10000, 16, true },
56
57 { uint64_t(-1), 32, false },
58 { 0x7fffffff, 32, false },
59 { uint64_t(-0x7fffffff), 32, false },
60 { 0xffffffff, 32, false },
61 { 0x100000000ull, 32, true },
62
63#ifdef XBYAK64
64 { uint64_t(-1), 64, false },
65 { 0x7fffffff, 64, false },
66 { 0xffffffffffffffffull, 64, false },
67 { 0x80000000, 64, true },
68 { 0xffffffff, 64, true },
69#endif
70 };
71 for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
72 const int bit = tbl[i].bit;
73 const uint64_t v = tbl[i].v;
74 const Xbyak::AddressFrame& af = bit == 8 ? byte : bit == 16 ? word : bit == 32 ? dword : qword;
75 if (tbl[i].error) {
77 } else {
79 }
80 }
81 }
82 } code;
83}
84
86{
87 struct Code : Xbyak::CodeGenerator {
88 Code()
89 {
90 const size_t alignSize = 16;
91 for (int padding = 0; padding < 20; padding++) {
92 for (int i = 0; i < padding; i++) {
93 db(1);
94 }
95 align(alignSize);
96 CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
97 }
98 align(alignSize);
99 const uint8 *p = getCurr();
100 // do nothing if aligned
101 align(alignSize);
103 }
104 } c;
105}
106
107#ifdef XBYAK64
108CYBOZU_TEST_AUTO(vfmaddps)
109{
110 struct Code : Xbyak::CodeGenerator {
111 Code()
112 {
113 v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
114 v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
115 v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
116 v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
117 vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
118 vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
119 }
120 } c;
121 const uint8_t tbl[] = {
122 0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
123 0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
124 0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
125 0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
126 0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
127 0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
128 };
129 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
130 CYBOZU_TEST_EQUAL(c.getSize(), n);
131 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
132}
134{
135 struct Code : Xbyak::CodeGenerator {
136 Code()
137 {
138 vaesdec(xmm20, xmm30, ptr [rcx + 64]);
139 vaesdec(ymm1, ymm2, ptr [rcx + 64]);
140 vaesdec(zmm1, zmm2, ptr [rcx + 64]);
141
142 vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
143 vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
144 vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
145
146 vaesenc(xmm20, xmm30, ptr [rcx + 64]);
147 vaesenc(ymm1, ymm2, ptr [rcx + 64]);
148 vaesenc(zmm1, zmm2, ptr [rcx + 64]);
149
150 vaesenclast(xmm20, xmm30, ptr [rax + 64]);
151 vaesenclast(ymm20, ymm30, ptr [rax + 64]);
152 vaesenclast(zmm20, zmm30, ptr [rax + 64]);
153 }
154 } c;
155 const uint8_t tbl[] = {
156 0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
157 0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
158 0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
159
160 0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
161 0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
162 0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
163
164 0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
165 0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
166 0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
167
168 0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
169 0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
170 0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
171 };
172 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
173 CYBOZU_TEST_EQUAL(c.getSize(), n);
174 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
175}
177{
178 struct Code : Xbyak::CodeGenerator {
179 Code()
180 {
181 vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
182 vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
183 vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
184
185 vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
186 vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
187 vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
188 }
189 } c;
190 const uint8_t tbl[] = {
191 0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
192 0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
193 0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
194 0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
195 0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
196 0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
197 };
198 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
199 CYBOZU_TEST_EQUAL(c.getSize(), n);
200 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
201}
202CYBOZU_TEST_AUTO(vcompressb_w)
203{
204 struct Code : Xbyak::CodeGenerator {
205 Code()
206 {
207 vcompressb(ptr[rax + 64], xmm1);
208 vcompressb(xmm30 | k5, xmm1);
209 vcompressb(ptr[rax + 64], ymm1);
210 vcompressb(ymm30 | k3 |T_z, ymm1);
211 vcompressb(ptr[rax + 64], zmm1);
212 vcompressb(zmm30 | k2 |T_z, zmm1);
213
214 vcompressw(ptr[rax + 64], xmm1);
215 vcompressw(xmm30 | k5, xmm1);
216 vcompressw(ptr[rax + 64], ymm1);
217 vcompressw(ymm30 | k3 |T_z, ymm1);
218 vcompressw(ptr[rax + 64], zmm1);
219 vcompressw(zmm30 | k2 |T_z, zmm1);
220 }
221 } c;
222 const uint8_t tbl[] = {
223 0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
224 0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
225 0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
226 0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
227 0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
228 0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
229
230 0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
231 0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
232 0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
233 0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
234 0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
235 0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
236 };
237 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
238 CYBOZU_TEST_EQUAL(c.getSize(), n);
239 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
240}
242{
243 struct Code : Xbyak::CodeGenerator {
244 Code()
245 {
246 vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
247 vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
248 vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
249
250 vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
251 vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
252 vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
253
254 vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
255 vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
256 vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
257
258 vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
259 vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
260 vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
261
262 vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
263 vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
264 vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
265
266 vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
267 vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
268 vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
269 }
270 } c;
271 const uint8_t tbl[] = {
272 0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
273 0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
274 0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
275
276 0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
277 0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
278 0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
279
280 0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
281 0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
282 0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
283
284 0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
285 0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
286 0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
287
288 0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
289 0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
290 0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
291
292 0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
293 0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
294 0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
295 };
296 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
297 CYBOZU_TEST_EQUAL(c.getSize(), n);
298 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
299}
301{
302 struct Code : Xbyak::CodeGenerator {
303 Code()
304 {
305 vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
306 vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
307 vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
308
309 vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
310 vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
311 vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
312
313 vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
314 vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
315 vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
316
317 vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
318 vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
319 vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
320
321 vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
322 vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
323 vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
324
325 vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
326 vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
327 vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
328
329 vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
330 vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
331 vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
332
333 vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
334 vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
335 vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
336
337 vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
338 vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
339 vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
340
341 vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
342 vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
343 vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
344 }
345 } c;
346 const uint8_t tbl[] = {
347 0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
348 0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
349 0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
350
351 0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
352 0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
353 0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
354
355 0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
356 0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
357 0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
358
359 0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
360 0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
361 0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
362
363 0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
364 0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
365 0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
366
367 0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
368 0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
369 0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
370
371 0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
372 0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
373 0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
374
375 0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
376 0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
377 0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
378
379 0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
380 0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
381 0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
382
383 0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
384 0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
385 0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
386 };
387 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
388 CYBOZU_TEST_EQUAL(c.getSize(), n);
389 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
390}
391CYBOZU_TEST_AUTO(vpopcnt)
392{
393 struct Code : Xbyak::CodeGenerator {
394 Code()
395 {
396 vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
397 vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
398 vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
399
400 vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
401 vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
402 vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
403
404 vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
405 vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
406 vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
407
408 vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
409 vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
410 vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
411
412 vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
413 vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
414 vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
415
416 vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
417 vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
418 vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
419 }
420 } c;
421 const uint8_t tbl[] = {
422 0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
423 0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
424 0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
425
426 0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
427 0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
428 0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
429
430 0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
431 0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
432 0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
433
434 0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
435 0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
436 0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
437
438 0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
439 0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
440 0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
441
442 0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
443 0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
444 0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
445 };
446 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
447 CYBOZU_TEST_EQUAL(c.getSize(), n);
448 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
449}
450CYBOZU_TEST_AUTO(vpdpbus)
451{
452 struct Code : Xbyak::CodeGenerator {
453 Code()
454 {
455 vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
456 vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
457 vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
458
459 vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
460 vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
461 vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
462
463 vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
464 vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
465 vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
466
467 vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
468 vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
469 vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
470
471 vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
472 vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
473 vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
474
475 vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
476 vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
477 vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
478
479 vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
480 vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
481 vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
482
483 vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
484 vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
485 vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
486 }
487 } c;
488 const uint8_t tbl[] = {
489 0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
490 0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
491 0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
492
493 0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
494 0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
495 0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
496
497 0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
498 0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
499 0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
500
501 0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
502 0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
503 0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
504
505 0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
506 0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
507 0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
508
509 0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
510 0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
511 0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
512
513 0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
514 0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
515 0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
516
517 0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
518 0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
519 0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
520 };
521 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
522 CYBOZU_TEST_EQUAL(c.getSize(), n);
523 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
524}
525CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
526{
527 struct Code : Xbyak::CodeGenerator {
528 Code()
529 {
530 vpexpandb(xmm5|k3|T_z, xmm30);
531 vpexpandb(ymm5|k3|T_z, ymm30);
532 vpexpandb(zmm5|k3|T_z, zmm30);
533 vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
534 vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
535 vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
536
537 vpexpandw(xmm5|k3|T_z, xmm30);
538 vpexpandw(ymm5|k3|T_z, ymm30);
539 vpexpandw(zmm5|k3|T_z, zmm30);
540 vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
541 vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
542 vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
543
544 vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
545 vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
546 vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
547 }
548 } c;
549 const uint8_t tbl[] = {
550 0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
551 0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
552 0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
553 0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
554 0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
555 0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
556
557 0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
558 0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
559 0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
560 0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
561 0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
562 0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
563
564 0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
565 0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
566 0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
567 };
568 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
569 CYBOZU_TEST_EQUAL(c.getSize(), n);
570 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
571}
573{
574 struct Code : Xbyak::CodeGenerator {
575 Code()
576 {
579 gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
580
583 vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
584 vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
585
586 vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
587 vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
588 vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
589
590 vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
591 vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
592 vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
593
594 vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
595 vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
596 vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
599 gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
600
603 vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
604 vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
605
606 vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
607 vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
608 vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
609
610 vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
611 vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
612 vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
613
614 vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
615 vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
616 vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
619 gf2p8mulb(xmm1, ptr [rax + 0x40]);
620
623 vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
624 vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
625
626 vgf2p8mulb(xmm30, xmm31, xmm4);
627 vgf2p8mulb(ymm30, ymm31, ymm4);
628 vgf2p8mulb(zmm30, zmm31, zmm4);
629
630 vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
631 vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
632 vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
633 }
634 } c;
635 const uint8_t tbl[] = {
636 0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
637 0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
638 0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
639 0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
640 0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
641 0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
642 0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
643 0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
644 0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
645 0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
646 0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
647 0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
648 0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
649 0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
650 0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
651
652 0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
653 0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
654 0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
655 0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
656 0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
657 0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
658 0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
659 0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
660 0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
661 0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
662 0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
663 0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
664 0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
665 0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
666 0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
667
668 0x66, 0x0f, 0x38, 0xcf, 0xca,
669 0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
670 0xc4, 0xe2, 0x51, 0xcf, 0xca,
671 0xc4, 0xe2, 0x55, 0xcf, 0xca,
672 0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
673 0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
674 0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
675 0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
676 0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
677 0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
678 0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
679 0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
680 };
681 const size_t n = sizeof(tbl) / sizeof(tbl[0]);
682 CYBOZU_TEST_EQUAL(c.getSize(), n);
683 CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
684}
685#endif
#define CYBOZU_NUM_OF_ARRAY(x)
Definition inttype.hpp:58
#define CYBOZU_TEST_ASSERT(x)
Definition test.hpp:192
#define CYBOZU_TEST_EXCEPTION(statement, Exception)
Definition test.hpp:285
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n)
Definition test.hpp:235
#define CYBOZU_TEST_NO_EXCEPTION(statement)
Definition test.hpp:310
#define CYBOZU_TEST_EQUAL(x, y)
Definition test.hpp:199
#define CYBOZU_TEST_AUTO(name)
Definition test.hpp:322
const mie::Vuint & p
Definition bn.cpp:27
void setSize(size_t size)
Definition xbyak.h:912
size_t getSize() const
Definition xbyak.h:911
const uint8 * getCurr() const
Definition xbyak.h:908
void v4fnmaddss(const Xmm &x1, const Xmm &x2, const Address &addr)
Definition xbyak.h:1653
void vaesenclast(const Xmm &xmm, const Operand &op1, const Operand &op2=Operand())
Definition xbyak.h:769
void vpshldq(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1878
void vpopcntb(const Xmm &x, const Operand &op)
Definition xbyak.h:1859
const Opmask k3
Definition xbyak.h:2093
const Opmask k7
Definition xbyak.h:2093
const AddressFrame ptr_b
Definition xbyak.h:2091
void vaesdec(const Xmm &xmm, const Operand &op1, const Operand &op2=Operand())
Definition xbyak.h:766
void vpshrdvq(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1886
void vpshldvd(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1879
void vgf2p8affineinvqb(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1009
void vpshldw(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1882
void vpclmulqdq(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1110
void vpshrdvd(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1885
void vpopcntw(const Xmm &x, const Operand &op)
Definition xbyak.h:1862
void gf2p8affineinvqb(const Xmm &xmm, const Operand &op, int imm)
Definition xbyak.h:298
void vcompressw(const Operand &op, const Xmm &x)
Definition xbyak.h:1675
void vp4dpwssds(const Zmm &z1, const Zmm &z2, const Address &addr)
Definition xbyak.h:1762
void vpshufbitqmb(const Opmask &k, const Xmm &x, const Operand &op)
Definition xbyak.h:1889
void align(size_t x=16, bool useMultiByteNop=true)
Definition xbyak.h:2475
void vpshldvw(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1881
void vpdpbusd(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1797
void gf2p8affineqb(const Xmm &xmm, const Operand &op, int imm)
Definition xbyak.h:299
const Reg32 eax
Definition xbyak.h:2087
const AddressFrame dword
Definition xbyak.h:2090
void vpshrdw(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1888
void vgf2p8affineqb(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1010
const AddressFrame byte
Definition xbyak.h:2090
void v4fmaddps(const Zmm &z1, const Zmm &z2, const Address &addr)
Definition xbyak.h:1650
void vpshrdd(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1883
void vaesdeclast(const Xmm &xmm, const Operand &op1, const Operand &op2=Operand())
Definition xbyak.h:767
const Opmask k5
Definition xbyak.h:2093
void vpshldvq(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1880
void vpshrdvw(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1887
void db(int code)
Definition xbyak.h:882
void v4fmaddss(const Xmm &x1, const Xmm &x2, const Address &addr)
Definition xbyak.h:1651
void vpopcntq(const Xmm &x, const Operand &op)
Definition xbyak.h:1861
const EvexModifierZero T_z
Definition xbyak.h:2096
void vpdpwssd(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1799
const Opmask k4
Definition xbyak.h:2093
const AddressFrame qword
Definition xbyak.h:2090
void v4fnmaddps(const Zmm &z1, const Zmm &z2, const Address &addr)
Definition xbyak.h:1652
void gf2p8mulb(const Xmm &xmm, const Operand &op)
Definition xbyak.h:300
const AddressFrame word
Definition xbyak.h:2090
void mov(const Operand &reg1, const Operand &reg2)
Definition xbyak.h:2210
const Opmask k2
Definition xbyak.h:2093
void vaesenc(const Xmm &xmm, const Operand &op1, const Operand &op2=Operand())
Definition xbyak.h:768
void vgf2p8mulb(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1011
void vpdpbusds(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1798
void vcompressb(const Operand &op, const Xmm &x)
Definition xbyak.h:1672
const Opmask k1
Definition xbyak.h:2093
void vpopcntd(const Xmm &x, const Operand &op)
Definition xbyak.h:1860
void vpshrdq(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1884
void vpexpandw(const Xmm &x, const Operand &op)
Definition xbyak.h:1818
void vpshldd(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
Definition xbyak.h:1877
void vpdpwssds(const Xmm &x1, const Xmm &x2, const Operand &op)
Definition xbyak.h:1800
void vp4dpwssd(const Zmm &z1, const Zmm &z2, const Address &addr)
Definition xbyak.h:1761
void vpexpandb(const Xmm &x, const Operand &op)
Definition xbyak.h:1815
const AddressFrame ptr
Definition xbyak.h:2090
const struct Ptn tbl[]
Definition xbyak.h:104
unsigned char uint8
unsigned char uint8_t
Definition stdint.h:124
unsigned __int64 uint64_t
Definition stdint.h:136
Definition bench.cpp:18
Code()
Definition memfunc.cpp:19
int type definition and macros Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
unit test class
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
void shrd(const Operand &op, const Reg &reg, const Reg8 &_cl)
void shld(const Operand &op, const Reg &reg, const Reg8 &_cl)
void vpclmulqdq(const Xmm &x1, const Xmm &x2, const Operand &op, uint8 imm)
int bit
Definition yubihsm.h:566