Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
make_nm.cpp
Go to the documentation of this file.
1#include <stdio.h>
2#define XBYAK_NO_OP_NAMES
3#include "xbyak/xbyak.h"
5#include <stdlib.h>
6#include <string.h>
7#include "cybozu/inttype.hpp"
8#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
9
10using namespace Xbyak;
11
12const int bitEnd = 64;
13
14const uint64 MMX = 1ULL << 0;
15const uint64 _XMM = 1ULL << 1;
16const uint64 _MEM = 1ULL << 2;
17const uint64 _REG32 = 1ULL << 3;
18const uint64 EAX = 1ULL << 4;
19const uint64 IMM32 = 1ULL << 5;
20const uint64 IMM8 = 1ULL << 6;
21const uint64 _REG8 = 1ULL << 7;
22const uint64 _REG16 = 1ULL << 8;
23const uint64 NEG8 = 1ULL << 9;
24const uint64 IMM16 = 1ULL << 10;
25const uint64 NEG16 = 1ULL << 11;
26const uint64 AX = 1ULL << 12;
27const uint64 AL = 1ULL << 13;
28const uint64 IMM_1 = 1ULL << 14;
29const uint64 MEM8 = 1ULL << 15;
30const uint64 MEM16 = 1ULL << 16;
31const uint64 MEM32 = 1ULL << 17;
32const uint64 ONE = 1ULL << 19;
33const uint64 CL = 1ULL << 20;
34const uint64 MEM_ONLY_DISP = 1ULL << 21;
35const uint64 NEG32 = 1ULL << 23;
36const uint64 _YMM = 1ULL << 24;
37const uint64 VM32X_32 = 1ULL << 39;
38const uint64 VM32X_64 = 1ULL << 40;
39const uint64 VM32Y_32 = 1ULL << 41;
40const uint64 VM32Y_64 = 1ULL << 42;
41#ifdef XBYAK64
42const uint64 _MEMe = 1ULL << 25;
43const uint64 REG32_2 = 1ULL << 26; // r8d, ...
44const uint64 REG16_2 = 1ULL << 27; // r8w, ...
45const uint64 REG8_2 = 1ULL << 28; // r8b, ...
46const uint64 REG8_3 = 1ULL << 29; // spl, ...
47const uint64 _REG64 = 1ULL << 30; // rax, ...
48const uint64 _REG64_2 = 1ULL << 31; // r8, ...
49const uint64 RAX = 1ULL << 32;
50const uint64 _XMM2 = 1ULL << 33;
51const uint64 _YMM2 = 1ULL << 34;
54#else
55const uint64 _MEMe = 0;
56const uint64 REG32_2 = 0;
57const uint64 REG16_2 = 0;
58const uint64 REG8_2 = 0;
59const uint64 REG8_3 = 0;
60const uint64 _REG64 = 0;
61const uint64 _REG64_2 = 0;
62const uint64 RAX = 0;
63const uint64 _XMM2 = 0;
64const uint64 _YMM2 = 0;
67#endif
73const uint64 MEM = _MEM | _MEMe;
74const uint64 MEM64 = 1ULL << 35;
75const uint64 ST0 = 1ULL << 36;
76const uint64 STi = 1ULL << 37;
77const uint64 IMM_2 = 1ULL << 38;
79const uint64 XMM = _XMM | _XMM2;
80const uint64 YMM = _YMM | _YMM2;
81const uint64 K = 1ULL << 43;
82const uint64 _ZMM = 1ULL << 44;
83const uint64 _ZMM2 = 1ULL << 45;
84#ifdef XBYAK64
85const uint64 ZMM = _ZMM | _ZMM2;
86const uint64 _YMM3 = 1ULL << 46;
87#else
88const uint64 ZMM = _ZMM;
89const uint64 _YMM3 = 0;
90#endif
91const uint64 K2 = 1ULL << 47;
92const uint64 ZMM_SAE = 1ULL << 48;
93const uint64 ZMM_ER = 1ULL << 49;
94#ifdef XBYAK64
95const uint64 _XMM3 = 1ULL << 50;
96#endif
97const uint64 XMM_SAE = 1ULL << 51;
98#ifdef XBYAK64
99const uint64 XMM_KZ = 1ULL << 52;
100const uint64 YMM_KZ = 1ULL << 53;
101const uint64 ZMM_KZ = 1ULL << 54;
102#else
103const uint64 XMM_KZ = 0;
104const uint64 YMM_KZ = 0;
105const uint64 ZMM_KZ = 0;
106#endif
107const uint64 MEM_K = 1ULL << 55;
108const uint64 M_1to2 = 1ULL << 56;
109const uint64 M_1to4 = 1ULL << 57;
110const uint64 M_1to8 = 1ULL << 58;
111const uint64 M_1to16 = 1ULL << 59;
112const uint64 XMM_ER = 1ULL << 60;
113const uint64 M_xword = 1ULL << 61;
114const uint64 M_yword = 1ULL << 62;
115const uint64 MY_1to4 = 1ULL << 18;
116const uint64 BNDREG = 1ULL << 22;
117
118const uint64 NOPARA = 1ULL << (bitEnd - 1);
119
120class Test {
121 Test(const Test&);
122 void operator=(const Test&);
123 const bool isXbyak_;
124 int funcNum_;
125 /*
126 and_, or_, xor_, not_ => and, or, xor, not
127 */
128 std::string removeUnderScore(std::string s) const
129 {
130 if (!isXbyak_ && s[s.size() - 1] == '_') s.resize(s.size() - 1);
131 return s;
132 }
133
134 // check all op1, op2, op3
135 void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const
136 {
137 for (int i = 0; i < bitEnd; i++) {
138 if ((op1 & (1ULL << i)) == 0) continue;
139 for (int j = 0; j < bitEnd; j++) {
140 if ((op2 & (1ULL << j)) == 0) continue;
141 for (int k = 0; k < bitEnd; k++) {
142 if ((op3 & (1ULL << k)) == 0) continue;
143 for (int s = 0; s < bitEnd; s++) {
144 if ((op4 & (1ULL << s)) == 0) continue;
145 printf("%s ", nm.c_str());
146 if (isXbyak_) printf("(");
147 if (!(op1 & NOPARA)) printf("%s", get(1ULL << i));
148 if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
149 if (!(op3 & NOPARA)) printf(", %s", get(1ULL << k));
150 if (!(op4 & NOPARA)) printf(", %s", get(1ULL << s));
151 if (isXbyak_) printf("); dump();");
152 printf("\n");
153 }
154 }
155 }
156 }
157 }
158 void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const
159 {
160 for (int i = 0; i < bitEnd; i++) {
161 if ((op & (1ULL << i)) == 0) continue;
162 printf("%s ", nm);
163 if (isXbyak_) printf("(");
164 if (!(op & NOPARA)) printf("%s", get(1ULL << i));
165 printf(", %s", isXbyak_ ? xbyak : nasm);
166 if (isXbyak_) printf("); dump();");
167 printf("\n");
168 }
169 }
170 void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const
171 {
172 if (nasm == 0) nasm = xbyak;
173 for (int i = 0; i < bitEnd; i++) {
174 if ((op & (1ULL << i)) == 0) continue;
175 printf("%s ", nm);
176 if (isXbyak_) printf("(");
177 printf("%s ", isXbyak_ ? xbyak : nasm);
178 if (!(op & NOPARA)) printf(", %s", get(1ULL << i));
179 if (isXbyak_) printf("); dump();");
180 printf("\n");
181 }
182 }
183 const char *get(uint64 type) const
184 {
185 int idx = (rand() / 31) & 7;
186 if (type == ST0) {
187 return "st0";
188 }
189 if (type == STi) {
190 return "st2";
191 }
192 switch (type) {
193 case MMX:
194 {
195 static const char MmxTbl[][4] = {
196 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
197 };
198 return MmxTbl[idx];
199 }
200 case _XMM:
201 {
202 static const char tbl[][6] = {
203 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
204 };
205 return tbl[idx];
206 }
207 case _YMM:
208 {
209 static const char tbl[][6] = {
210 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7"
211 };
212 return tbl[idx];
213 }
214 case _ZMM:
215 {
216 static const char tbl[][6] = {
217 "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7"
218 };
219 return tbl[idx];
220 }
221#ifdef XBYAK64
222 case _XMM2:
223 {
224 static const char tbl[][6] = {
225 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
226 };
227 return tbl[idx];
228 }
229 case _XMM3:
230 {
231 static const char tbl[][6] = {
232 "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23"
233 };
234 return tbl[idx];
235 }
236 case _YMM2:
237 {
238 static const char tbl[][6] = {
239 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
240 };
241 return tbl[idx];
242 }
243 case _YMM3:
244 {
245 static const char tbl[][6] = {
246 "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23",
247 };
248 return tbl[idx];
249 }
250 case _ZMM2:
251 {
252 static const char tbl[][6] = {
253 "zmm8", "zmm9", "zmm10", "zmm11", "zmm28", "zmm29", "zmm30", "zmm31",
254 };
255 return tbl[idx];
256 }
257#endif
258 case _MEM:
259 {
260 return isXbyak_ ? "ptr[eax+ecx+3]" : "[eax+ecx+3]"; // QQQ : disp8N
261/*
262 idx %= 5;
263 switch (idx) {
264 case 0: return isXbyak_ ? "ptr[eax+ecx]" : "[eax+ecx]";
265 case 1: return isXbyak_ ? "ptr[eax+ecx+1]" : "[eax+ecx+1]";
266 case 2: return isXbyak_ ? "ptr[eax+ecx+16]" : "[eax+ecx+16]";
267 case 3: return isXbyak_ ? "ptr[eax+ecx+32]" : "[eax+ecx+32]";
268 case 4: return isXbyak_ ? "ptr[eax+ecx+48]" : "[eax+ecx+48]";
269 }
270*/
271 }
272 case _MEMe:
273 {
274 static int ccc = 1;
275#ifdef USE_YASM
276 ccc++;
277#endif
278 if (ccc & 1) {
279 return isXbyak_ ? "ptr[rdx+r15+0x12]" : "[rdx+r15+0x12]";
280 } else {
281 return isXbyak_ ? "ptr[rip - 0x13456+1-3]" : "[rip - 0x13456+1-3]";
282 }
283 }
284 case MEM8:
285 return "byte [eax+edx]";
286 case MEM16:
287 return "word [esi]";
288 case MEM32:
289 return "dword [ebp*2]";
290 case MEM64:
291 return "qword [eax+ecx*8]";
292 case MEM_ONLY_DISP:
293 return isXbyak_ ? "ptr[(void*)0x123]" : "[0x123]";
294 case _REG16: // not ax
295 {
296 static const char Reg16Tbl[][4] = {
297 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
298 };
299 return Reg16Tbl[(idx % 7) + 1];
300 }
301 case _REG8: // not al
302 {
303 static const char Reg8Tbl[][4] = {
304#ifdef XBYAK64 // QQQ
305 "al", "cl", "dl", "bl", "al", "cl", "dl", "bl"
306#else
307 "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"
308#endif
309 };
310 return Reg8Tbl[(idx % 7) + 1];
311 }
312 case _REG32: // not eax
313 {
314 static const char Reg32Tbl[][4] = {
315 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
316 };
317 return Reg32Tbl[(idx % 7) + 1];
318 }
319#ifdef XBYAK64
320 case _REG64: // not rax
321 {
322 static const char Reg64Tbl[][4] = {
323 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"
324 };
325 return Reg64Tbl[(idx % 7) + 1];
326 }
327 case _REG64_2:
328 {
329 static const char Reg64_2Tbl[][4] = {
330 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
331 };
332 return Reg64_2Tbl[idx];
333 }
334 case REG32_2:
335 {
336 static const char Reg32eTbl[][5] = {
337 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
338 };
339 return Reg32eTbl[idx];
340 }
341 case REG16_2:
342 {
343 static const char Reg16eTbl[][5] = {
344 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
345 };
346 return Reg16eTbl[idx];
347 }
348 case REG8_2:
349 {
350 static const char Reg8_2Tbl[][5] = {
351 "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
352 };
353 return Reg8_2Tbl[idx];
354 }
355 case REG8_3:
356 {
357 static const char Reg8_3Tbl[][5] = {
358 "spl", "bpl", "sil", "dil", "spl", "bpl", "sil", "dil"
359 };
360 return Reg8_3Tbl[idx];
361 }
362 case RAX:
363 return "rax";
364#endif
365 case EAX:
366 return "eax";
367 case AX:
368 return "ax";
369 case AL:
370 return "al";
371 case CL:
372 return "cl";
373 case ONE:
374 return "1";
375 case IMM32:
376 return isXbyak_ ? "12345678" : "dword 12345678";
377 case IMM16:
378 return isXbyak_ ? "1000" : "word 1000";
379 case IMM8:
380 return isXbyak_ ? "4" : "byte 4";
381 case NEG8:
382 return isXbyak_ ? "-30" : "byte -30";
383 case NEG16:
384 return isXbyak_ ? "-1000" : "word -1000";
385 case NEG32:
386 return isXbyak_ ? "-100000" : "dword -100000";
387 case IMM_1:
388 return "4";
389 case IMM_2:
390 return isXbyak_ ? "0xda" : "0xda";
391 case VM32X_32:
392 return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
393 case VM32X_64:
394 return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
395 case VM32Y_32:
396 return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
397 case VM32Y_64:
398 return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
399 case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
400 case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}";
401 case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}";
402 case M_1to16: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to16}";
403
404 case M_xword: return isXbyak_ ? "ptr [eax+33]" : "oword [eax+33]";
405 case M_yword: return isXbyak_ ? "yword [eax+33]" : "yword [eax+33]";
406 case MY_1to4: return isXbyak_ ? "yword_b [eax+32]" : "[eax+32]{1to4}";
407 case K:
408 {
409 static const char kTbl[][5] = {
410 "k1", "k2", "k3", "k4", "k5", "k6", "k7",
411 };
412 return kTbl[idx % 7];
413 }
414 case K2:
415 return isXbyak_ ? "k3 | k5" : "k3{k5}";
416 case BNDREG:
417 {
418 static const char tbl[][5] = {
419 "bnd0", "bnd1", "bnd2", "bnd3",
420 };
421 return tbl[idx % 4];
422 }
423#ifdef XBYAK64
424 case XMM_SAE:
425 return isXbyak_ ? "xmm25 | T_sae" : "xmm25, {sae}";
426 case ZMM_SAE:
427 return isXbyak_ ? "zmm25 | T_sae" : "zmm25, {sae}";
428 case XMM_ER:
429 return isXbyak_ ? "xmm4 | T_rd_sae" : "xmm4, {rd-sae}";
430 case ZMM_ER:
431 return isXbyak_ ? "zmm20 | T_rd_sae" : "zmm20, {rd-sae}";
432 case XMM_KZ:
433 return isXbyak_ ? "xmm5 | k5" : "xmm5{k5}";
434 case YMM_KZ:
435 return isXbyak_ ? "ymm2 |k3|T_z" : "ymm2{k3}{z}";
436 case ZMM_KZ:
437 return isXbyak_ ? "zmm7|k1" : "zmm7{k1}";
438 case MEM_K:
439 return isXbyak_ ? "ptr [rax] | k1" : "[rax]{k1}";
440#else
441 case XMM_SAE:
442 return isXbyak_ ? "xmm5 | T_sae" : "xmm5, {sae}";
443 case ZMM_SAE:
444 return isXbyak_ ? "zmm5 | T_sae" : "zmm5, {sae}";
445 case XMM_ER:
446 return isXbyak_ ? "xmm30 | T_rd_sae" : "xmm30, {rd-sae}";
447 case ZMM_ER:
448 return isXbyak_ ? "zmm2 | T_rd_sae" : "zmm2, {rd-sae}";
449 case MEM_K:
450 return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
451#endif
452 }
453 return 0;
454 }
455 void putSIMPLE() const
456 {
457 const char tbl[][20] = {
458#ifdef XBYAK64
459 "cdqe",
460 "cqo",
461#else
462 "aaa",
463 "aad",
464 "aam",
465 "aas",
466 "daa",
467 "das",
468 "popad",
469 "popfd",
470 "pusha",
471 "pushad",
472 "pushfd",
473 "popa",
474#endif
475
476 "cbw",
477 "cdq",
478 "clc",
479 "cld",
480 "cli",
481 "cmc",
482
483 "cpuid",
484 "cwd",
485 "cwde",
486
487 "lahf",
488// "lock",
489 "nop",
490
491 "sahf",
492 "stc",
493 "std",
494 "sti",
495
496 "emms",
497 "pause",
498 "sfence",
499 "lfence",
500 "mfence",
501 "monitor",
502 "mwait",
503
504 "rdmsr",
505 "rdpmc",
506 "rdtsc",
507 "rdtscp",
508 "ud2",
509 "wait",
510 "fwait",
511 "wbinvd",
512 "wrmsr",
513 "xlatb",
514
515 "popf",
516 "pushf",
517 "stac",
518
519 "xgetbv",
520 "vzeroall",
521 "vzeroupper",
522
523 "f2xm1",
524 "fabs",
525 "faddp",
526 "fchs",
527 "fcom",
528 "fcomp",
529 "fcompp",
530 "fcos",
531 "fdecstp",
532 "fdivp",
533 "fdivrp",
534 "fincstp",
535 "finit",
536 "fninit",
537 "fld1",
538 "fldl2t",
539 "fldl2e",
540 "fldpi",
541 "fldlg2",
542 "fldln2",
543 "fldz",
544 "fmulp",
545 "fnop",
546 "fpatan",
547 "fprem",
548 "fprem1",
549 "fptan",
550 "frndint",
551 "fscale",
552 "fsin",
553 "fsincos",
554 "fsqrt",
555 "fsubp",
556 "fsubrp",
557 "ftst",
558 "fucom",
559 "fucomp",
560 "fucompp",
561 "fxam",
562 "fxch",
563 "fxtract",
564 "fyl2x",
565 "fyl2xp1",
566 };
567 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
568 put(tbl[i]);
569 }
570
571 put("bswap", REG32e);
572 put("lea", REG32e|REG16, MEM);
573 put("fldcw", MEM);
574 put("fstcw", MEM);
575 }
576 void putJmp() const
577 {
578#ifdef XBYAK64
579 put("jmp", REG64);
580 put("call", REG64);
581#else
582 put("jmp", REG32);
583 put("call", REG16|REG32);
584#endif
585 put("jmp", MEM);
586 put("jmp", MEM);
587 put("jmp", MEM);
588 put("call", MEM|MEM_ONLY_DISP);
589#ifndef USE_YASM
590 // call(ptr [getCode() + 5]); means to construct the opecode of "call"
591 // after calling getCode().
592 // Its behavior is same as NASM(MASM). YASM makes different opecode.
593 put("call", "getCode() + 5", "$ + 5");
594#endif
595
596#ifdef XBYAK64
597 put("jmp", "ptr[(void*)0x12345678]", "[0x12345678]");
598 put("call", "ptr[(void*)0x12345678]", "[0x12345678]");
599#ifdef USE_YASM
600 put("jmp", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
601 put("call", "ptr[rip + 0x12345678]", "[rip+0x12345678]");
602 put("call", "ptr[rip -23]", "[rip-23]");
603 put("call", "ptr[rip -23+56]", "[rip-23+56]");
604#else
605 // bug of yasm?
606 if (isXbyak_) {
607 puts("{ Label label0;");
608 puts("L(label0);");
609 puts("pshufb (xmm14, ptr [rip+label0]); dump();");
610 puts("}");
611 } else {
612 puts("label0:");
613 puts("pshufb xmm14, [rel label0]");
614 }
615#endif
616#endif
617 }
618 void putMMX1() const
619 {
620 // emms etc
621 put("ldmxcsr", MEM);
622 put("movmskps", REG32e, XMM);
623 put("movmskpd", REG32e, XMM);
624 put("stmxcsr", MEM);
625 put("maskmovq", MMX, MMX);
626 put("movntps", MEM, XMM);
627 put("movntq", MEM, MMX);
628 put("prefetcht0", MEM);
629 put("prefetcht1", MEM);
630 put("prefetcht2", MEM);
631 put("prefetchnta", MEM);
632 put("prefetchwt1", MEM);
633 put("prefetchw", MEM);
634
635 // SSE2 misc
636 put("maskmovdqu", XMM, XMM);
637 put("movntpd", MEM, XMM);
638 put("movntdq", MEM, XMM);
639 put("movnti", MEM, REG32); // QQQ:REG32e?
640
641 put("movhlps", XMM, XMM);
642 put("movlhps", XMM, XMM);
643
644 // movd for MMX, XMM
645 put("movd", MEM|MEM32|REG32, MMX|XMM);
646 put("movd", MMX|XMM, MEM|REG32|MEM32);
647
648 // movq for MMX
649 put("movq", MMX, MMX|MEM);
650 put("movq", MEM, MMX);
651 // movq for XMM
652 put("movq", XMM, XMM|MEM);
653 put("movq", MEM, XMM);
654 put("movq", XMM|MMX, "qword[eax]", "qword[eax]");
655 put("movq", XMM|MMX, "ptr[eax]", "qword[eax]");
656 put("movq", "qword[eax]", "qword[eax]", XMM|MMX);
657 put("movq", "ptr[eax]", "qword[eax]", XMM|MMX);
658#ifdef XBYAK64
659 put("movq", REG64, XMM|MMX);
660 put("movq", XMM|MMX, REG64);
661#endif
662
663 // SSE3 int
664 put("lddqu", XMM, MEM);
665 }
666 void putMMX2() const
667 {
668 static const char nmTbl[][16] = {
669 // MMX
670 "packssdw",
671 "packsswb",
672 "packuswb",
673 "pand",
674 "pandn",
675 "pmaddwd",
676 "pmulhuw",
677 "pmulhw",
678 "pmullw",
679 "por",
680 "punpckhbw",
681 "punpckhwd",
682 "punpckhdq",
683 "punpcklbw",
684 "punpcklwd",
685 "punpckldq",
686 "pxor",
687 "paddb",
688 "paddw",
689 "paddd",
690 "paddsb",
691 "paddsw",
692 "paddusb",
693 "paddusw",
694 "pcmpeqb",
695 "pcmpeqw",
696 "pcmpeqd",
697 "pcmpgtb",
698 "pcmpgtw",
699 "pcmpgtd",
700 "psllw",
701 "pslld",
702 "psllq",
703 "psraw",
704 "psrad",
705 "psrlw",
706 "psrld",
707 "psrlq",
708 "psubb",
709 "psubw",
710 "psubd",
711 "psubsb",
712 "psubsw",
713 "psubusb",
714 "psubusw",
715 // MMX2
716 "pavgb",
717 "pavgw",
718 "pmaxsw",
719 "pmaxub",
720 "pminsw",
721 "pminub",
722 "psadbw",
723 //
724 "paddq",
725 "pmuludq",
726 "psubq",
727 };
728 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
729 put(nmTbl[i], MMX, MMX|MEM);
730 put(nmTbl[i], XMM, XMM|MEM);
731 }
732 }
733 void putMMX3() const
734 {
735 static const char nmTbl[][16] = {
736 "psllw",
737 "pslld",
738 "psllq",
739 "psraw",
740 "psrad",
741 "psrlw",
742 "psrld",
743 "psrlq",
744 };
745 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
746 put(nmTbl[i], MMX|XMM, IMM);
747 }
748 put("pslldq", XMM, IMM);
749 put("psrldq", XMM, IMM);
750 put("pmovmskb", REG32, MMX|XMM); // QQQ
751 put("pextrw", REG32, MMX|XMM, IMM); // QQQ
752 put("pinsrw", MMX|XMM, REG32|MEM, IMM); // QQQ
753 }
754 void putMMX4() const
755 {
756 put("pshufw", MMX, MMX|MEM, IMM);
757 put("pshuflw", XMM, XMM|MEM, IMM);
758 put("pshufhw", XMM, XMM|MEM, IMM);
759 put("pshufd", XMM, XMM|MEM, IMM);
760 }
761 void putMMX5() const
762 {
763 static const char nmTbl[][16] = {
764 "movdqa",
765 "movdqu",
766 "movaps",
767 "movss",
768 "movups",
769 "movapd",
770 "movsd",
771 "movupd",
772 };
773 for (size_t i = 0; i < NUM_OF_ARRAY(nmTbl); i++) {
774 put(nmTbl[i], XMM, XMM|MEM);
775 put(nmTbl[i], MEM, XMM);
776 }
777 put("movq2dq", XMM, MMX);
778 put("movdq2q", MMX, XMM);
779 }
780
781 void putXMM1() const
782 {
783 enum {
784 PS = 1 << 0,
785 SS = 1 << 1,
786 PD = 1 << 2,
787 SD = 1 << 3
788 };
789 const struct {
790 uint8 code;
791 const char *name;
792 } sufTbl[] = {
793 { 0, "ps" },
794 { 0xF3, "ss" },
795 { 0x66, "pd" },
796 { 0xF2, "sd" },
797 };
798 static const struct XmmTbl1 {
799 uint8 code;
800 int mode;
801 const char *name;
802 bool hasImm;
803 } xmmTbl1[] = {
804 { B01011000, PS|SS|PD|SD, "add", false },
805 { B01010101, PS|PD , "andn", false },
806 { B01010100, PS|PD , "and", false },
807 { B11000010, PS|SS|PD|SD, "cmp", true },
808 { B01011110, PS|SS|PD|SD, "div", false },
809 { B01011111, PS|SS|PD|SD, "max", false },
810 { B01011101, PS|SS|PD|SD, "min", false },
811 { B01011001, PS|SS|PD|SD, "mul", false },
812 { B01010110, PS|PD , "or", false },
813 { B01010011, PS|SS , "rcp", false },
814 { B01010010, PS|SS , "rsqrt", false },
815 { B11000110, PS|PD , "shuf", true },
816 { B01010001, PS|SS|PD|SD, "sqrt", false },
817 { B01011100, PS|SS|PD|SD, "sub", false },
818 { B00010101, PS|PD , "unpckh", false },
819 { B00010100, PS|PD , "unpckl", false },
820 { B01010111, PS|PD , "xor", false },
821 //
822 };
823 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl1); i++) {
824 const XmmTbl1 *p = &xmmTbl1[i];
825 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
826 if (!(p->mode & (1 << j))) continue;
827 char buf[16];
828 sprintf(buf, "%s%s", p->name, sufTbl[j].name);
829 if (p->hasImm) {
830 put(buf, XMM, XMM|MEM, IMM);
831 } else {
832 put(buf, XMM, XMM|MEM);
833 }
834 }
835 }
836 }
837 void putXMM2() const
838 {
839 // (XMM, XMM|MEM)
840 static const char tbl[][16] = {
841 "punpckhqdq",
842 "punpcklqdq",
843
844 "comiss",
845 "ucomiss",
846 "comisd",
847 "ucomisd",
848
849 "cvtpd2ps",
850 "cvtps2pd",
851 "cvtsd2ss",
852 "cvtss2sd",
853 "cvtpd2dq",
854 "cvttpd2dq",
855 "cvtdq2pd",
856 "cvtps2dq",
857 "cvttps2dq",
858 "cvtdq2ps",
859
860 "addsubpd",
861 "addsubps",
862 "haddpd",
863 "haddps",
864 "hsubpd",
865 "hsubps",
866 "movddup",
867 "movshdup",
868 "movsldup",
869 };
870 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
871 put(tbl[i], XMM, XMM|MEM);
872 }
873 }
874 void putXMM3() const
875 {
876 static const struct Tbl {
877 const char *name;
878 uint64 op1;
879 uint64 op2;
880 } tbl[] = {
881 { "cvtpi2ps", XMM, MMX|MEM },
882 { "cvtps2pi", MMX, XMM|MEM },
883 { "cvtsi2ss", XMM, REG32|MEM },
884 { "cvtss2si", REG32, XMM|MEM },
885 { "cvttps2pi", MMX, XMM|MEM },
886 { "cvttss2si", REG32, XMM|MEM },
887 { "cvtpi2pd", XMM, MMX|MEM },
888 { "cvtpd2pi", MMX, XMM|MEM },
889 { "cvtsi2sd", XMM, REG32|MEM },
890 { "cvtsd2si", REG32, XMM|MEM },
891 { "cvttpd2pi", MMX, XMM|MEM },
892 { "cvttsd2si", REG32, XMM|MEM },
893 };
894 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
895 const Tbl *p = &tbl[i];
896 put(p->name, p->op1, p->op2);
897 }
898 }
899 void putXMM4() const
900 {
901 static const char tbl[][16] = {
902 "movhps",
903 "movlps",
904 "movhpd",
905 "movlpd",
906 };
907 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
908 const char *p = tbl[i];
909 put(p, XMM, MEM);
910 put(p, MEM, XMM);
911 }
912 }
913 void putCmov() const
914 {
915 const char tbl[][4] = {
916 "o",
917 "no",
918 "b",
919 "c",
920 "nae",
921 "nb",
922 "nc",
923 "ae",
924 "e",
925 "z",
926 "ne",
927 "nz",
928 "be",
929 "na",
930 "nbe",
931 "a",
932 "s",
933 "ns",
934 "p",
935 "pe",
936 "np",
937 "po",
938 "l",
939 "nge",
940 "nl",
941 "ge",
942 "le",
943 "ng",
944 "nle",
945 "g",
946 };
947 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
948 char buf[16];
949 sprintf(buf, "cmov%s", tbl[i]);
950 put(buf, REG16, REG16|MEM);
951 put(buf, REG32, REG32|MEM);
952 put(buf, REG64, REG64|MEM);
953 sprintf(buf, "set%s", tbl[i]);
955 }
956 }
957 void putReg1() const
958 {
959 // (REG, REG|MEM)
960 {
961 static const char tbl[][16] = {
962 "adc",
963 "add",
964 "and_",
965 "cmp",
966 "or_",
967 "sbb",
968 "sub",
969 "xor_",
970 };
971 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
972 const std::string s = removeUnderScore(tbl[i]);
973 const char *p = s.c_str();
974 put(p, REG32, REG32|MEM);
975 put(p, REG64, REG64|MEM);
976 put(p, REG16, REG16|MEM);
977 put(p, REG8|REG8_3, REG8|MEM);
979
980 put(p, MEM8, IMM8|NEG8);
983
984 put(p, REG64|RAX, IMM8|NEG8);
985 put(p, REG64|RAX, "0x12345678", "0x12345678");
986 put(p, REG64|RAX, "192", "192");
987 put(p, REG64|RAX, "0x1234", "0x1234");
991 }
992 }
993 {
994 const char tbl[][8] = {
995 "adcx",
996 "adox",
997 };
998 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
999 const char *p = tbl[i];
1000 put(p, REG32, REG32|MEM);
1001 put(p, REG64, REG64|MEM);
1002 }
1003 }
1004 }
1005 void putBt() const
1006 {
1007 static const char tbl[][16] = {
1008 "bt",
1009 "bts",
1010 "btr",
1011 "btc",
1012 };
1013 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1014 const char *p = tbl[i];
1015 put(p, MEM|REG16, REG16);
1016 put(p, MEM|REG32, REG32);
1017 put(p, MEM|REG64, REG64);
1018 put(p, MEM16|REG16, IMM);
1019 }
1020 }
1021 void putRorM() const
1022 {
1023 static const char tbl[][16] = {
1024 "inc",
1025 "dec",
1026 "div",
1027 "idiv",
1028 "imul",
1029 "mul",
1030 "neg",
1031 "not_",
1032 };
1033 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1034 const std::string s = removeUnderScore(tbl[i]);
1035 const char *p = s.c_str();
1037 put(p, MEM32|MEM16|MEM8);
1038 }
1039 const char *p = "imul";
1040 put(p, REG16, REG16|MEM16);
1041 put(p, REG32, REG32|MEM32);
1042 put(p, REG64, REG64|MEM);
1046 }
1047 void putPushPop() const
1048 {
1049 /*
1050 QQQ:
1051 push byte 2
1052 push dword 2
1053 reduce 4-byte stack
1054 push word 2
1055 reduce 2-byte stack, so I can't support it
1056 */
1057 const char *p = "push";
1058 put(p, REG16);
1059 put(p, IMM8); // IMM16 decrease -2 from esp
1060 put(p, MEM16);
1061
1062 put("pop", REG16|MEM16);
1063#ifdef XBYAK64
1064 put("push", REG64);
1065 put("pop", REG64);
1066#else
1067 put("push", REG32|IMM32|MEM32);
1068 put("pop", REG32|MEM32);
1069#endif
1070 }
1071 void putTest() const
1072 {
1073 const char *p = "test";
1074 put(p, REG32|MEM, REG32);
1075 put(p, REG64|MEM, REG64);
1076 put(p, REG16|MEM, REG16);
1079 }
1080 void putMov64() const
1081 {
1082 const struct {
1083 const char *a;
1084 const char *b;
1085 } tbl[] = {
1086 { "0", "0" },
1087 { "0x123", "0x123" },
1088 { "0x12345678", "0x12345678" },
1089 { "0x7fffffff", "0x7fffffff" },
1090 { "0xffffffff", "0xffffffff" },
1091 { "0x80000000", "0x80000000" },
1092 { "2147483648U", "2147483648" },
1093 { "0x80000001", "0x80000001" },
1094 { "0xffffffffffffffff", "0xffffffffffffffff" },
1095 { "-1", "-1" },
1096 { "0xffffffff80000000", "0xffffffff80000000" },
1097 { "0xffffffff80000001", "0xffffffff80000001" },
1098 { "0xffffffff12345678", "0xffffffff12345678" },
1099 };
1100 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1101 put("mov", REG64, tbl[i].a, tbl[i].b);
1102 }
1103 }
1104 // only nasm
1105 void putMovImm64() const
1106 {
1107 put("mov", REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1108 put("mov", REG64, "0x12345678", "0x12345678");
1109 put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
1110 put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
1111 }
1112 void putEtc() const
1113 {
1114 {
1115 const char *p = "ret";
1116 put(p);
1117 put(p, IMM);
1118 p = "mov";
1124 put(p, MEM32|MEM16|MEM8, IMM);
1125 put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
1126 put("movbe", REG16|REG32e, MEM);
1127 put("movbe", MEM, REG16|REG32e);
1128#ifdef XBYAK64
1129 put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
1130 put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
1131 put(p, "qword [rax], 0");
1132 put(p, "qword [rax], 0x12");
1133 put(p, "qword [rax], 0x1234");
1134 put(p, "qword [rax], 0x12345678");
1135// put(p, "qword [rax], 0x123456789ab");
1136 put(p, "qword [rax], 1000000");
1137 put(p, "rdx, qword [rax]");
1138#endif
1139 put("mov", EAX, "ptr [eax + ecx * 0]", "[eax + ecx * 0]"); // ignore scale = 0
1140 }
1141 {
1142 const char tbl[][8] = {
1143 "movsx",
1144 "movzx",
1145 };
1146 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1147 const char *p = tbl[i];
1150 put(p, REG16, REG8|MEM8);
1151 }
1152 }
1153#ifdef XBYAK64
1154 put("movsxd", REG64, REG32|MEM32);
1155#endif
1156 put("cmpxchg8b", MEM);
1157#ifdef XBYAK64
1158 put("cmpxchg16b", MEM);
1159#endif
1160 {
1161 const char tbl[][8] = {
1162 "xadd",
1163 "cmpxchg"
1164 };
1165 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1166 const char *p = tbl[i];
1167 put(p, REG8|MEM, REG8);
1168 put(p, REG16|MEM, REG16);
1169 put(p, REG32|MEM, REG32);
1170 put(p, REG64|MEM, REG64);
1171 }
1172 }
1173
1174 put("xchg", AL|REG8, AL|REG8|MEM);
1175 put("xchg", MEM, AL|REG8);
1176 put("xchg", AX|REG16, AX|REG16|MEM);
1177 put("xchg", MEM, AX|REG16);
1178 put("xchg", EAX|REG32, EAX|REG32|MEM);
1179 put("xchg", MEM, EAX|REG32);
1180 put("xchg", REG64, REG64|MEM);
1181 }
1182 void putShift() const
1183 {
1184 const char tbl[][8] = {
1185 "rcl",
1186 "rcr",
1187 "rol",
1188 "ror",
1189 "sar",
1190 "shl",
1191 "shr",
1192
1193 "sal",
1194 };
1195 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1196 const char *p = tbl[i];
1198 }
1199 }
1200 void putShxd() const
1201 {
1202 const char tbl[][8] = {
1203 "shld",
1204 "shrd",
1205 };
1206 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1207 const char *p = tbl[i];
1208 put(p, REG64|MEM, REG64, IMM|CL);
1209 put(p, REG32|MEM, REG32, IMM|CL);
1210 put(p, REG16|MEM, REG16, IMM|CL);
1211 }
1212 }
1213 void putBs() const
1214 {
1215 const char tbl[][8] = {
1216 "bsr",
1217 "bsf",
1218 "lzcnt",
1219 "tzcnt",
1220 "popcnt",
1221 };
1222 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1223 const char *p = tbl[i];
1224 put(p, REG64, REG64|MEM);
1225 put(p, REG32, REG32|MEM);
1226 put(p, REG16, REG16|MEM);
1227 }
1228 }
1229 void putSSSE3() const
1230 {
1231 const char tbl[][16] = {
1232 "pshufb",
1233 "phaddw",
1234 "phaddd",
1235 "phaddsw",
1236 "pmaddubsw",
1237 "phsubw",
1238 "phsubd",
1239 "phsubsw",
1240 "psignb",
1241 "psignw",
1242 "psignd",
1243 "pmulhrsw",
1244 "pabsb",
1245 "pabsw",
1246 "pabsd",
1247 };
1248 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1249 const char *p = tbl[i];
1250 put(p, XMM, XMM|MEM);
1251 put(p, MMX, MMX|MEM);
1252 }
1253 put("palignr", XMM, XMM|MEM, IMM8);
1254 put("palignr", MMX, MMX|MEM, IMM8);
1255 }
1256 void putSSE4_1() const
1257 {
1258 const char tbl[][16] = {
1259 "blendvpd",
1260 "blendvps",
1261 "packusdw",
1262 "pblendvb",
1263 "pcmpeqq",
1264 "ptest",
1265 "pmovsxbw",
1266 "pmovsxbd",
1267 "pmovsxbq",
1268 "pmovsxwd",
1269 "pmovsxwq",
1270 "pmovsxdq",
1271 "pmovzxbw",
1272 "pmovzxbd",
1273 "pmovzxbq",
1274 "pmovzxwd",
1275 "pmovzxwq",
1276 "pmovzxdq",
1277 "pminsb",
1278 "pminsd",
1279 "pminuw",
1280 "pminud",
1281 "pmaxsb",
1282 "pmaxsd",
1283 "pmaxuw",
1284 "pmaxud",
1285 "pmuldq",
1286 "pmulld",
1287 "phminposuw",
1288 "pcmpgtq",
1289 "aesdec",
1290 "aesdeclast",
1291 "aesenc",
1292 "aesenclast",
1293 "aesimc",
1294 };
1295 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1296 const char *p = tbl[i];
1297 put(p, XMM, XMM|MEM);
1298 }
1299 }
1300 void putSSE4_2() const
1301 {
1302 {
1303 const char tbl[][16] = {
1304 "blendpd",
1305 "blendps",
1306 "dppd",
1307 "dpps",
1308 "mpsadbw",
1309 "pblendw",
1310 "roundps",
1311 "roundpd",
1312 "roundss",
1313 "roundsd",
1314 "pcmpestrm",
1315 "pcmpestri",
1316 "pcmpistrm",
1317 "pcmpistri",
1318 "pclmulqdq",
1319 "aeskeygenassist",
1320 };
1321 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1322 const char *p = tbl[i];
1323 put(p, XMM, XMM|MEM, IMM);
1324 }
1325 }
1326 {
1327 const char tbl[][16] = {
1328 "pclmullqlqdq",
1329 "pclmulhqlqdq",
1330// "pclmullqhdq", // QQQ : not supported by nasm/yasm
1331// "pclmulhqhdq",
1332 };
1333 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1334 const char *p = tbl[i];
1335 put(p, XMM, XMM|MEM);
1336 }
1337 }
1338 put("extractps", REG32e|MEM, XMM, IMM);
1339 put("pextrw", REG32e|MEM, XMM, IMM); // pextrw for REG32 is for MMX2
1340 put("pextrb", REG32e|MEM, XMM, IMM);
1341 put("pextrd", REG32|MEM, XMM, IMM);
1342
1343 put("insertps", XMM, XMM|MEM, IMM);
1344 put("pinsrb", XMM, REG32|MEM, IMM);
1345 put("pinsrd", XMM, REG32|MEM, IMM);
1346 put("movntdqa", XMM, MEM);
1347 put("crc32", REG32, REG8|REG16|REG32|MEM8|MEM16|MEM32);
1348 put("crc32", REG64, REG64|REG8|MEM8);
1349#ifdef XBYAK64
1350 put("pextrq", REG64|MEM, XMM, IMM);
1351 put("pinsrq", XMM, REG64|MEM, IMM);
1352#endif
1353 }
1354 void putSHA() const
1355 {
1356 put("sha1rnds4", XMM, XMM|MEM, IMM);
1357 put("sha1nexte", XMM, XMM|MEM);
1358 put("sha1msg1", XMM, XMM|MEM);
1359 put("sha1msg2", XMM, XMM|MEM);
1360 put("sha256rnds2", XMM, XMM|MEM);
1361 put("sha256msg1", XMM, XMM|MEM);
1362 put("sha256msg2", XMM, XMM|MEM);
1363 }
1364 void putMPX() const
1365 {
1366#ifdef XBYAK64
1367 const uint64 reg = REG64;
1368#else
1369 const uint64 reg = REG32;
1370#endif
1371 put("bndcl", BNDREG, reg|MEM);
1372 put("bndcu", BNDREG, reg|MEM);
1373 put("bndcn", BNDREG, reg|MEM);
1374 put("bndldx", BNDREG, MEM);
1375 put("bndmk", BNDREG, MEM);
1376 put("bndmov", BNDREG, BNDREG|MEM);
1377 put("bndstx", MEM, BNDREG);
1378 put("bndstx", "ptr [eax]", "[eax]", BNDREG);
1379 put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
1380 put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
1381 put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
1382 put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
1383 put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
1384 put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
1385 put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
1386 put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
1387 put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
1388 put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
1389#ifdef XBYAK64
1390 put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
1391 put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
1392 put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
1393#endif
1394 }
1395 void putFpuMem16_32() const
1396 {
1397 const char tbl[][8] = {
1398 "fiadd",
1399 "fidiv",
1400 "fidivr",
1401 "ficom",
1402 "ficomp",
1403 "fimul",
1404 "fist",
1405 "fisub",
1406 "fisubr",
1407 };
1408 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1409 const char *p = tbl[i];
1410 put(p, MEM16|MEM32);
1411 }
1412 }
1413 void putFpuMem32_64() const
1414 {
1415 const char tbl[][8] = {
1416 "fadd",
1417 "fcom",
1418 "fcomp",
1419 "fdiv",
1420 "fdivr",
1421 "fld",
1422 "fmul",
1423 "fst",
1424 "fstp",
1425 "fsub",
1426 "fsubr",
1427 };
1428 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1429 const char *p = tbl[i];
1430 put(p, MEM32|MEM64);
1431 }
1432 }
1433 void putFpuMem16_32_64() const
1434 {
1435 const char tbl[][8] = {
1436 "fild",
1437 "fistp",
1438 "fisttp",
1439 };
1440 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1441 const char *p = tbl[i];
1442 put(p, MEM16|MEM32|MEM64);
1443 }
1444 }
1445 void putFpuFpu() const
1446 {
1447 const struct Tbl {
1448 const char *name;
1449 int mode; /* 1:only (st0, sti), 2: only (sti, st0), 3: both */
1450 } tbl[] = {
1451 { "fadd", 3 },
1452 { "faddp", 2 },
1453 { "fcmovb", 1 },
1454 { "fcmove", 1 },
1455 { "fcmovbe", 1 },
1456 { "fcmovu", 1 },
1457 { "fcmovnb", 1 },
1458 { "fcmovne", 1 },
1459 { "fcmovnbe", 1 },
1460 { "fcmovnu", 1 },
1461 { "fcomi", 1 },
1462 { "fcomip", 1 },
1463 { "fucomi", 1 },
1464 { "fucomip", 1 },
1465 { "fdiv", 3 },
1466 { "fdivp", 2 },
1467 { "fdivr", 3 },
1468 { "fdivrp", 2 },
1469 { "fmul", 3 },
1470 { "fmulp", 2 },
1471 { "fsub", 3 },
1472 { "fsubp", 2 },
1473 { "fsubr", 3 },
1474 { "fsubrp", 2 },
1475 };
1476 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1477 const Tbl *p = &tbl[i];
1478 if (p->mode & 1) put(p->name, ST0, STi);
1479 if (p->mode & 2) put(p->name, STi, ST0);
1480 if (p->mode) put(p->name, STi);
1481 }
1482 }
1483 void putFpu() const
1484 {
1485 const char tbl[][16] = {
1486 "fcom",
1487 "fcomp",
1488 "ffree",
1489 "fld",
1490 "fst",
1491 "fstp",
1492 "fucom",
1493 "fucomp",
1494 "fxch",
1495 };
1496 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1497 put(tbl[i], STi);
1498 }
1499 }
1500 void putAVX1()
1501 {
1502 const struct Tbl {
1503 const char *name;
1504 bool only_pd_ps;
1505 } tbl[] = {
1506 { "add", false },
1507 { "sub", false },
1508 { "mul", false },
1509 { "div", false },
1510 { "max", false },
1511 { "min", false },
1512 { "and", true },
1513 { "andn", true },
1514 { "or", true },
1515 { "xor", true },
1516
1517 { "addsub", true },
1518 { "hadd", true },
1519 { "hsub", true },
1520 };
1521 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1522 const struct Suf {
1523 const char *suf;
1524 bool supportYMM;
1525 } suf[] = {
1526 { "pd", true },
1527 { "ps", true },
1528 { "sd", false },
1529 { "ss", false },
1530 };
1531 for (size_t j = 0; j < NUM_OF_ARRAY(suf); j++) {
1532 if (tbl[i].only_pd_ps && j == 2) break;
1533 std::string name = std::string("v") + tbl[i].name + suf[j].suf;
1534 const char *p = name.c_str();
1535 put(p, XMM, XMM | MEM);
1536 put(p, XMM, XMM, XMM | MEM);
1537 if (!suf[j].supportYMM) continue;
1538 put(p, YMM, YMM | MEM);
1539 put(p, YMM, YMM, YMM | MEM);
1540 }
1541 }
1542 }
1543 void putAVX_X_X_XM_omit()
1544 {
1545 const struct Tbl {
1546 const char *name;
1547 bool supportYMM;
1548 } tbl[] = {
1549 { "vaesenc", false },
1550 { "vaesenclast", false },
1551 { "vaesdec", false },
1552 { "vaesdeclast", false },
1553 { "vcvtsd2ss", false },
1554 { "vcvtss2sd", false },
1555 { "vpacksswb", true },
1556 { "vpackssdw", true },
1557 { "vpackuswb", true },
1558 { "vpackusdw", true },
1559
1560 { "vpaddb", true },
1561 { "vpaddw", true },
1562 { "vpaddd", true },
1563 { "vpaddq", true },
1564
1565 { "vpaddsb", true },
1566 { "vpaddsw", true },
1567
1568 { "vpaddusb", true },
1569 { "vpaddusw", true },
1570
1571 { "vpand", true },
1572 { "vpandn", true },
1573 { "vpavgb", true },
1574 { "vpavgw", true },
1575
1576 { "vpcmpeqb", true },
1577 { "vpcmpeqw", true },
1578 { "vpcmpeqd", true },
1579 { "vpcmpeqq", true },
1580
1581 { "vpcmpgtb", true },
1582 { "vpcmpgtw", true },
1583 { "vpcmpgtd", true },
1584 { "vpcmpgtq", true },
1585
1586 { "vphaddw", true },
1587 { "vphaddd", true },
1588 { "vphaddsw", true },
1589
1590 { "vphsubw", true },
1591 { "vphsubd", true },
1592 { "vphsubsw", true },
1593 { "vpmaddwd", true },
1594 { "vpmaddubsw", true },
1595
1596 { "vpmaxsb", true },
1597 { "vpmaxsw", true },
1598 { "vpmaxsd", true },
1599
1600 { "vpmaxub", true },
1601 { "vpmaxuw", true },
1602 { "vpmaxud", true },
1603
1604 { "vpminsb", true },
1605 { "vpminsw", true },
1606 { "vpminsd", true },
1607
1608 { "vpminub", true },
1609 { "vpminuw", true },
1610 { "vpminud", true },
1611
1612 { "vpmulhuw", true },
1613 { "vpmulhrsw", true },
1614 { "vpmulhw", true },
1615 { "vpmullw", true },
1616 { "vpmulld", true },
1617
1618 { "vpmuludq", true },
1619 { "vpmuldq", true },
1620
1621 { "vpor", true },
1622 { "vpsadbw", true },
1623
1624 { "vpsignb", true },
1625 { "vpsignw", true },
1626 { "vpsignd", true },
1627
1628 { "vpsllw", false },
1629 { "vpslld", false },
1630 { "vpsllq", false },
1631
1632 { "vpsraw", false },
1633 { "vpsrad", false },
1634 { "vpsrlw", false },
1635 { "vpsrld", false },
1636 { "vpsrlq", false },
1637
1638 { "vpsubb", true },
1639 { "vpsubw", true },
1640 { "vpsubd", true },
1641 { "vpsubq", true },
1642
1643 { "vpsubsb", true },
1644 { "vpsubsw", true },
1645
1646 { "vpsubusb", true },
1647 { "vpsubusw", true },
1648
1649 { "vpunpckhbw", true },
1650 { "vpunpckhwd", true },
1651 { "vpunpckhdq", true },
1652 { "vpunpckhqdq", true },
1653
1654 { "vpunpcklbw", true },
1655 { "vpunpcklwd", true },
1656 { "vpunpckldq", true },
1657 { "vpunpcklqdq", true },
1658
1659 { "vpxor", true },
1660 { "vsqrtsd", false },
1661 { "vsqrtss", false },
1662
1663 { "vunpckhpd", true },
1664 { "vunpckhps", true },
1665 { "vunpcklpd", true },
1666 { "vunpcklps", true },
1667 };
1668 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1669 const Tbl *p = &tbl[i];
1670 put(p->name, XMM, XMM | MEM);
1671 put(p->name, XMM, XMM, XMM | MEM);
1672 if (!p->supportYMM) continue;
1673 put(p->name, YMM, YMM | MEM);
1674 put(p->name, YMM, YMM, YMM | MEM);
1675 }
1676 }
1677 void putAVX_X_X_XM_IMM()
1678 {
1679 const struct Tbl {
1680 const char *name;
1681 bool supportYMM;
1682 } tbl[] = {
1683 { "vblendpd", true },
1684 { "vblendps", true },
1685 { "vdppd", false },
1686 { "vdpps", true },
1687 { "vmpsadbw", true },
1688 { "vpblendw", true },
1689 { "vpblendd", true },
1690 { "vroundsd", false },
1691 { "vroundss", false },
1692 { "vpclmulqdq", false },
1693 { "vcmppd", true },
1694 { "vcmpps", true },
1695 { "vcmpsd", false },
1696 { "vcmpss", false },
1697 { "vinsertps", false },
1698 { "vpalignr", true },
1699 { "vshufpd", true },
1700 { "vshufps", true },
1701 };
1702 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1703 const Tbl *p = &tbl[i];
1704 put(p->name, XMM, XMM, XMM | MEM, IMM);
1705 put(p->name, XMM, XMM | MEM, IMM);
1706 if (!p->supportYMM) continue;
1707 put(p->name, YMM, YMM, YMM | MEM, IMM);
1708 put(p->name, YMM, YMM | MEM, IMM);
1709 }
1710 }
1711 void putAVX_X_XM_IMM()
1712 {
1713 const struct Tbl {
1714 const char *name;
1715 bool supportYMM;
1716 } tbl[] = {
1717 { "vroundpd", true },
1718 { "vroundps", true },
1719 { "vpcmpestri", false },
1720 { "vpcmpestrm", false },
1721 { "vpcmpistri", false },
1722 { "vpcmpistrm", false },
1723 { "vpermilpd", true },
1724 { "vpermilps", true },
1725 { "vaeskeygenassist", false },
1726 { "vpshufd", true },
1727 { "vpshufhw", true },
1728 { "vpshuflw", true },
1729 };
1730 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1731 const Tbl *p = &tbl[i];
1732 put(p->name, XMM, XMM | MEM, IMM);
1733 if (!p->supportYMM) continue;
1734 put(p->name, YMM, YMM | MEM, IMM);
1735 }
1736 }
1737 void putAVX_X_X_XM()
1738 {
1739 const struct Tbl {
1740 const char *name;
1741 bool supportYMM;
1742 } tbl[] = {
1743 { "vpermilpd", true },
1744 { "vpermilps", true },
1745 { "vpshufb", true },
1746
1747 { "vpsllvd", true },
1748 { "vpsllvq", true },
1749 { "vpsravd", true },
1750 { "vpsrlvd", true },
1751 { "vpsrlvq", true },
1752 };
1753 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1754 const Tbl *p = &tbl[i];
1755 put(p->name, XMM, XMM, XMM | MEM);
1756 if (!p->supportYMM) continue;
1757 put(p->name, YMM, YMM, YMM | MEM);
1758 }
1759 }
1760 void putAVX_X_XM()
1761 {
1762 const struct Tbl {
1763 const char *name;
1764 bool supportYMM;
1765 } tbl[] = {
1766 { "vaesimc", false },
1767 { "vtestps", true },
1768 { "vtestpd", true },
1769 { "vcomisd", false },
1770 { "vcomiss", false },
1771 { "vcvtdq2ps", true },
1772 { "vcvtps2dq", true },
1773 { "vcvttps2dq", true },
1774 { "vmovapd", true },
1775 { "vmovaps", true },
1776 { "vmovddup", true },
1777 { "vmovdqa", true },
1778 { "vmovdqu", true },
1779 { "vmovupd", true },
1780 { "vmovups", true },
1781
1782 { "vpabsb", true },
1783 { "vpabsw", true },
1784 { "vpabsd", true },
1785 { "vphminposuw", false },
1786
1787 { "vpmovsxbw", false },
1788 { "vpmovsxbd", false },
1789 { "vpmovsxbq", false },
1790 { "vpmovsxwd", false },
1791 { "vpmovsxwq", false },
1792 { "vpmovsxdq", false },
1793
1794 { "vpmovzxbw", false },
1795 { "vpmovzxbd", false },
1796 { "vpmovzxbq", false },
1797 { "vpmovzxwd", false },
1798 { "vpmovzxwq", false },
1799 { "vpmovzxdq", false },
1800
1801 { "vptest", true },
1802 { "vrcpps", true },
1803 { "vrcpss", false },
1804
1805 { "vrsqrtps", true },
1806 { "vrsqrtss", false },
1807
1808 { "vsqrtpd", true },
1809 { "vsqrtps", true },
1810 { "vucomisd", false },
1811 { "vucomiss", false },
1812 };
1813 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1814 const Tbl *p = &tbl[i];
1815 put(p->name, XMM, XMM | MEM);
1816 if (!p->supportYMM) continue;
1817 put(p->name, YMM, YMM | MEM);
1818 }
1819 }
1820 void putAVX_Y_XM()
1821 {
1822 const char *tbl[] = {
1823 "vpmovsxbw",
1824 "vpmovsxbd",
1825 "vpmovsxbq",
1826 "vpmovsxwd",
1827 "vpmovsxwq",
1828 "vpmovsxdq",
1829 "vpmovzxbw",
1830 "vpmovzxbd",
1831 "vpmovzxbq",
1832 "vpmovzxwd",
1833 "vpmovzxwq",
1834 "vpmovzxdq",
1835 };
1836 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1837 const char *name = tbl[i];
1838 put(name, YMM, XMM);
1839 }
1840 }
1841 void putAVX_M_X()
1842 {
1843 const struct Tbl {
1844 const char *name;
1845 bool supportYMM;
1846 } tbl[] = {
1847 { "vmovapd", true },
1848 { "vmovaps", true },
1849 { "vmovdqa", true },
1850 { "vmovdqu", true },
1851 { "vmovupd", true },
1852 { "vmovups", true },
1853 };
1854 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1855 const Tbl *p = &tbl[i];
1856 put(p->name, MEM, XMM);
1857 if (!p->supportYMM) continue;
1858 put(p->name, MEM, YMM);
1859 }
1860 }
1861 void putAVX_X_X_IMM_omit()
1862 {
1863 const struct Tbl {
1864 const char *name;
1865 bool support_Y_Y_X;
1866 } tbl[] = {
1867 { "vpslldq", false },
1868 { "vpsrldq", false },
1869 { "vpsllw", true },
1870 { "vpslld", true },
1871 { "vpsllq", true },
1872 { "vpsraw", true },
1873 { "vpsrad", true },
1874 { "vpsrlw", true },
1875 { "vpsrld", true },
1876 { "vpsrlq", true },
1877 };
1878 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1879 const Tbl& p = tbl[i];
1880 put(p.name, XMM, XMM, IMM);
1881 put(p.name, YMM, YMM, IMM);
1882 put(p.name, YMM, IMM);
1883 put(p.name, _ZMM, _ZMM, IMM8);
1884#ifdef XBYAK64
1885 put(p.name, _XMM3, _XMM3, IMM8);
1886 put(p.name, _YMM3, _YMM3, IMM8);
1887#endif
1888 if (p.support_Y_Y_X) {
1889 put(p.name, YMM, YMM, XMM);
1890 }
1891 }
1892 }
1893 void putFMA()
1894 {
1895 const struct Tbl {
1896 const char *name;
1897 bool supportYMM;
1898 } tbl[] = {
1899 { "vfmadd", true },
1900 { "vfmadd", false },
1901 { "vfmaddsub", true },
1902 { "vfmsubadd", true },
1903 { "vfmsub", true },
1904 { "vfmsub", false },
1905 { "vfnmadd", true },
1906 { "vfnmadd", false },
1907 { "vfnmsub", true },
1908 { "vfnmsub", false },
1909 };
1910 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1911 const Tbl& p = tbl[i];
1912 const struct Ord {
1913 const char *name;
1914 } ord[] = {
1915 { "132" },
1916 { "213" },
1917 { "231" },
1918 };
1919 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
1920 const char sufTbl[][2][8] = {
1921 { "pd", "ps" },
1922 { "sd", "ss" },
1923 };
1924 for (size_t k = 0; k < 2; k++) {
1925 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
1926 std::string name = std::string(p.name) + ord[j].name + suf;
1927 const char *q = name.c_str();
1928 put(q, XMM, XMM, XMM | MEM);
1929 if (!p.supportYMM) continue;
1930 put(q, YMM, YMM, YMM | MEM);
1931 }
1932 }
1933 }
1934 }
1935 void putAVX2()
1936 {
1937 put("vextractps", REG32 | MEM, XMM, IMM);
1938 put("vldmxcsr", MEM);
1939 put("vstmxcsr", MEM);
1940 put("vmaskmovdqu", XMM, XMM);
1941
1942 put("vmovd", XMM, REG32 | MEM);
1943 put("vmovd", REG32 | MEM, XMM);
1944
1945 put("vmovq", XMM, XMM | MEM);
1946 put("vmovq", MEM, XMM);
1947
1948 put("vmovhlps", XMM, XMM);
1949 put("vmovhlps", XMM, XMM, XMM);
1950 put("vmovlhps", XMM, XMM);
1951 put("vmovlhps", XMM, XMM, XMM);
1952
1953 {
1954 const char tbl[][16] = {
1955 "vmovhpd",
1956 "vmovhps",
1957 "vmovlpd",
1958 "vmovlps",
1959 };
1960 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1961 put(tbl[i], XMM, XMM, MEM);
1962 put(tbl[i], XMM, MEM);
1963 put(tbl[i], MEM, XMM);
1964 }
1965 }
1966 put("vmovmskpd", REG32e, XMM | YMM);
1967 put("vmovmskps", REG32e, XMM | YMM);
1968
1969 put("vmovntdq", MEM, XMM | YMM);
1970 put("vmovntpd", MEM, XMM | YMM);
1971 put("vmovntdqa", XMM | YMM, MEM);
1972
1973 {
1974 const char tbl[][8] = { "vmovsd", "vmovss" };
1975 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1976 put(tbl[i], XMM, XMM, XMM);
1977 put(tbl[i], XMM, XMM | MEM);
1978 put(tbl[i], MEM, XMM);
1979 }
1980 }
1981 put("vpextrb", REG32e|MEM, XMM, IMM);
1982 put("vpextrd", REG32|MEM, XMM, IMM);
1983
1984 for (int i = 0; i < 3; i++) {
1985 const char tbl[][8] = { "vpinsrb", "vpinsrw", "vpinsrd" };
1986 put(tbl[i], XMM, XMM, REG32|MEM, IMM);
1987 put(tbl[i], XMM, REG32|MEM, IMM);
1988 }
1989
1990 put("vpmovmskb", REG32e, XMM|YMM);
1991
1992 {
1993 const struct Tbl {
1994 const char *name;
1995 bool supportYMM;
1996 } tbl[] = {
1997 { "vblendvpd", true },
1998 { "vblendvps", true },
1999 { "vpblendvb", true },
2000 };
2001 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2002 const Tbl& p = tbl[i];
2003 put(p.name, XMM, XMM, XMM | MEM, XMM);
2004 put(p.name, XMM, XMM | MEM, XMM);
2005 if (!p.supportYMM) continue;
2006 put(p.name, YMM, YMM, YMM | MEM, YMM);
2007 put(p.name, YMM, YMM | MEM, YMM);
2008 }
2009 }
2010 // cvt
2011 {
2012 put("vcvtss2si", REG32e, XMM | MEM);
2013 put("vcvttss2si", REG32e, XMM | MEM);
2014 put("vcvtsd2si", REG32e, XMM | MEM);
2015 put("vcvttsd2si", REG32e, XMM | MEM);
2016
2017 put("vcvtsi2ss", XMM, XMM, REG32e | MEM);
2018 put("vcvtsi2ss", XMM, REG32e | MEM);
2019
2020 put("vcvtsi2sd", XMM, XMM, REG32e | MEM);
2021 put("vcvtsi2sd", XMM, REG32e | MEM);
2022#ifdef XBYAK64
2023 put("vcvtsi2sd", XMM, XMM, MEM64);
2024 put("vcvtsi2sd", XMM, MEM64);
2025#endif
2026
2027 put("vcvtps2pd", XMM | YMM, XMM | MEM);
2028 put("vcvtdq2pd", XMM | YMM, XMM | MEM);
2029
2030 put("vcvtpd2ps", XMM, XMM | YMM | MEM);
2031 put("vcvtpd2dq", XMM, XMM | YMM | MEM);
2032 put("vcvttpd2dq", XMM, XMM | YMM | MEM);
2033
2034 put("vcvtph2ps", XMM | YMM, XMM | MEM);
2035 put("vcvtps2ph", XMM | MEM, XMM | YMM, IMM8);
2036 }
2037#ifdef XBYAK64
2038 put("vmovq", XMM, REG64);
2039 put("vmovq", REG64, XMM);
2040
2041 put("vpextrq", REG64|MEM, XMM, IMM);
2042
2043 put("vpinsrq", XMM, XMM, REG64|MEM, IMM);
2044 put("vpinsrq", XMM, REG64|MEM, IMM);
2045
2046#endif
2047 }
2048 void putFMA2()
2049 {
2050#ifdef USE_YASM
2051 put("vextractf128", XMM | MEM, YMM, IMM);
2052 put("vextracti128", XMM | MEM, YMM, IMM);
2053 put("vmaskmovps", MEM, YMM, YMM);
2054 put("vmaskmovpd", MEM, YMM, YMM);
2055 put("vlddqu", XMM | YMM, MEM);
2056
2057 put("vmovshdup", XMM, XMM | MEM);
2058 put("vmovshdup", YMM, YMM | MEM);
2059 put("vmovsldup", XMM, XMM | MEM);
2060 put("vmovsldup", YMM, YMM | MEM);
2061
2062 // QQQ:nasm is wrong
2063 put("vpcmpeqq", XMM, XMM | MEM);
2064 put("vpcmpeqq", XMM, XMM, XMM | MEM);
2065 put("vpcmpgtq", XMM, XMM | MEM);
2066 put("vpcmpgtq", XMM, XMM, XMM | MEM);
2067
2068 put("vmovntps", MEM, XMM | YMM); // nasm error
2069#else
2070 put("vmaskmovps", XMM, XMM, MEM);
2071 put("vmaskmovps", YMM, YMM, MEM);
2072
2073 put("vmaskmovpd", YMM, YMM, MEM);
2074 put("vmaskmovpd", XMM, XMM, MEM);
2075
2076 put("vmaskmovps", MEM, XMM, XMM);
2077 put("vmaskmovpd", MEM, XMM, XMM);
2078#endif
2079 }
2080 void putCmp()
2081 {
2082 const char pred[32][16] = {
2083 "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
2084 "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
2085 "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
2086 "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
2087 };
2088 const char suf[][4] = { "pd", "ps", "sd", "ss" };
2089 for (int i = 0; i < 4; i++) {
2090 for (int j = 0; j < 32; j++) {
2091 if (j < 8) {
2092 put((std::string("cmp") + pred[j] + suf[i]).c_str(), XMM, XMM | MEM);
2093 }
2094 std::string str = std::string("vcmp") + pred[j] + suf[i];
2095 const char *p = str.c_str();
2096 put(p, XMM, XMM | MEM);
2097 put(p, XMM, XMM, XMM | MEM);
2098 if (i >= 2) continue;
2099 put(p, YMM, YMM | MEM);
2100 put(p, YMM, YMM, YMM | MEM);
2101 }
2102 }
2103 }
2104 void putRip()
2105 {
2106 const char tbl[][2][64] = {
2107 { "mov(byte [rip - 10], 3);dump();", "mov byte [rip - 10], 3" },
2108 { "mov(word [rip - 10], 3);dump();", "mov word [rip - 10], 3" },
2109 { "mov(dword[rip - 10], 3);dump();", "mov dword [rip - 10], 3" },
2110 { "mov(qword [rip - 10], 3);dump();", "mov qword [rip - 10], 3" },
2111 { "mov(ptr [rip - 10], al);dump();", "mov byte [rip - 10], al" },
2112 { "mov(ptr [rip - 10], ax);dump();", "mov word [rip - 10], ax" },
2113 { "mov(ptr [rip - 10], eax);dump();", "mov dword [rip - 10], eax" },
2114 { "mov(ptr [rip - 10], rax);dump();", "mov qword [rip - 10], rax" },
2115 };
2116 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2117 puts(tbl[i][isXbyak_ ? 0 : 1]);
2118 }
2119 }
2120public:
2121 Test(bool isXbyak)
2122 : isXbyak_(isXbyak)
2123 , funcNum_(1)
2124 {
2125 if (!isXbyak_) return;
2126 printf("%s",
2127 " void gen0()\n"
2128 " {\n");
2129 }
2130 /*
2131 gcc and vc give up to compile this source,
2132 so I split functions.
2133 */
2135 {
2136 if (!isXbyak_) return;
2137 printf(
2138 " }\n"
2139 " void gen%d()\n"
2140 " {\n", funcNum_++);
2141 }
2143 {
2144 if (!isXbyak_) return;
2145 printf("%s",
2146 " }\n"
2147 " void gen()\n"
2148 " {\n");
2149 for (int i = 0; i < funcNum_; i++) {
2150 printf(
2151 " gen%d();\n", i);
2152 }
2153 printf(
2154 " }\n");
2155 }
2157 {
2158 const char *tbl[] = {
2159 "andn",
2160 "mulx",
2161 "pdep",
2162 "pext",
2163 };
2164 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2165 const char *name = tbl[i];
2166 put(name, REG32, REG32, REG32 | MEM);
2167#ifdef XBYAK64
2168 put(name, REG64, REG64, REG64 | MEM);
2169#endif
2170 }
2171 }
2173 {
2174 const char *tbl[] = {
2175 "bextr",
2176 "bzhi",
2177 "sarx",
2178 "shlx",
2179 "shrx",
2180 };
2181 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2182 const char *name = tbl[i];
2183 put(name, REG32, REG32 | MEM, REG32);
2184#ifdef XBYAK64
2185 put(name, REG64, REG64 | MEM, REG64);
2186#endif
2187 }
2188 }
2190 {
2191 const char *tbl[] = {
2192 "blsi",
2193 "blsmsk",
2194 "blsr",
2195 };
2196 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2197 const char *name = tbl[i];
2198 put(name, REG32, REG32 | MEM);
2199#ifdef XBYAK64
2200 put(name, REG64, REG64 | MEM);
2201#endif
2202 }
2203 }
2205 {
2206 put("rdrand", REG16 | REG32e);
2207 put("rdseed", REG16 | REG32e);
2208 put("rorx", REG32, REG32 | MEM, IMM8);
2209#ifdef XBYAK64
2210 put("rorx", REG64, REG64 | MEM, IMM8);
2211#endif
2212 }
2214 {
2215 const int y_vx_y = 0;
2216 const int y_vy_y = 1;
2217 const int x_vy_x = 2;
2218 const struct Tbl {
2219 const char *name;
2220 int mode;
2221 } tbl[] = {
2222 { "vgatherdpd", y_vx_y },
2223 { "vgatherqpd", y_vy_y },
2224 { "vgatherdps", y_vy_y },
2225 { "vgatherqps", x_vy_x },
2226 { "vpgatherdd", y_vy_y },
2227 { "vpgatherqd", x_vy_x },
2228 { "vpgatherdq", y_vx_y },
2229 { "vpgatherqq", y_vy_y },
2230 };
2231 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2232 const Tbl& p = tbl[i];
2233 const char *name = p.name;
2234 put(name, XMM, VM32X, XMM);
2235 switch (p.mode) {
2236 case y_vx_y:
2237 put(name, YMM, VM32X, YMM);
2238 break;
2239 case y_vy_y:
2240 put(name, YMM, VM32Y, YMM);
2241 break;
2242 case x_vy_x:
2243 put(name, XMM, VM32Y, XMM);
2244 break;
2245 default:
2246 printf("ERR mode=%d\n", p.mode);
2247 exit(1);
2248 }
2249 }
2250 }
2251 void putGath(const std::string& vsib)
2252 {
2253 std::string x = "xmm1, ";
2254 std::string a = std::string("[") + vsib + "], xmm3";
2255 put("vgatherdpd", (x + "ptr" + a).c_str(), (x + a).c_str());
2256 }
2257
2259 {
2260 const char *xmmTbl[] = {
2261 "xmm2",
2262 "xmm4",
2263 "xmm2*1",
2264 "xmm2*4",
2265 };
2266 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2267 std::string s = xmmTbl[i];
2268 putGath(s);
2269 putGath(s + "+3");
2270 putGath(s + "+eax");
2271 putGath("3+" + s);
2272 putGath("eax+" + s);
2273 }
2274 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2275 int ord[] = { 0, 1, 2 };
2276 do {
2277 std::string s;
2278 for (int j = 0; j < 3; j++) {
2279 if (j > 0) s += '+';
2280 switch (ord[j]) {
2281 case 0: s += xmmTbl[i]; break;
2282 case 1: s += "123"; break;
2283 case 2: s += "ebp"; break;
2284 }
2285 }
2286 putGath(s);
2287 } while (std::next_permutation(ord, ord + 3));
2288 }
2289 }
2290 void putSeg()
2291 {
2292 {
2293 const char *segTbl[] = {
2294 "es",
2295 "cs",
2296 "ss",
2297 "ds",
2298 "fs",
2299 "gs",
2300 };
2301 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2302 const char *seg = segTbl[i];
2303 const char *op1Tbl[] = {
2304 "ax",
2305 "edx",
2306 (isXbyak_ ? "ptr [eax]" : "[eax]"),
2307#ifdef XBYAK64
2308 "r9",
2309#endif
2310 };
2311 for (size_t j = 0; j < NUM_OF_ARRAY(op1Tbl); j++) {
2312 const char *op1 = op1Tbl[j];
2313 if (isXbyak_) {
2314 printf("mov(%s, %s); dump();\n", op1, seg);
2315 printf("mov(%s, %s); dump();\n", seg, op1);
2316 } else {
2317 printf("mov %s, %s\n", op1, seg);
2318 printf("mov %s, %s\n", seg, op1);
2319 }
2320 }
2321 }
2322 }
2323 {
2324 const char *segTbl[] = {
2325#ifdef XBYAK32
2326 "es",
2327 "ss",
2328 "ds",
2329#endif
2330 "fs",
2331 "gs",
2332 };
2333 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2334 const char *seg = segTbl[i];
2335 if (isXbyak_) {
2336 printf("push(%s); dump();\n", seg);
2337 printf("pop(%s); dump();\n", seg);
2338 } else {
2339 printf("push %s\n", seg);
2340 printf("pop %s\n", seg);
2341 }
2342 }
2343 }
2344 }
2345 void put()
2346 {
2347#ifdef USE_AVX512
2348 putAVX512();
2349#else
2350
2351#ifdef USE_AVX
2352
2353 separateFunc();
2354 putFMA2();
2355
2356#ifdef USE_YASM
2357 putGprR_R_RM();
2358 putGprR_RM_R();
2359 putGprR_RM();
2361 putGather();
2362 putGatherAll();
2363#else
2364 putAVX1();
2365 separateFunc();
2366 putAVX2();
2367 putAVX_X_X_XM_omit();
2368 separateFunc();
2369 putAVX_X_X_XM_IMM();
2370 separateFunc();
2371 putAVX_X_XM_IMM();
2372 separateFunc();
2373 putAVX_X_X_XM();
2374 separateFunc();
2375 putAVX_X_XM();
2376 separateFunc();
2377 putAVX_M_X();
2378 putAVX_X_X_IMM_omit();
2379 separateFunc();
2380 putAVX_Y_XM();
2381 separateFunc();
2382 putFMA();
2383 putSHA();
2384#endif
2385
2386#else // USE_AVX
2387
2388 putJmp();
2389
2390#ifdef USE_YASM
2391
2392 putSSSE3();
2393 putSSE4_1();
2394 separateFunc();
2395 putSSE4_2();
2396 putSeg(); // same behavior as yasm for mov rax, cx
2397#else
2398 putSIMPLE();
2399 putReg1();
2400 putBt();
2401 putRorM();
2402 separateFunc();
2403 putPushPop();
2404 putTest();
2405 separateFunc();
2406 putEtc();
2407 putShift();
2408 putShxd();
2409
2410 separateFunc();
2411
2412 putBs();
2413 putMMX1();
2414 putMMX2();
2415 separateFunc();
2416 putMMX3();
2417 putMMX4();
2418 putMMX5();
2419 separateFunc();
2420 putXMM1();
2421 putXMM2();
2422 putXMM3();
2423 putXMM4();
2424 separateFunc();
2425 putCmov();
2426 putFpuMem16_32();
2427 putFpuMem32_64();
2428 separateFunc();
2429 putFpuMem16_32_64();
2430 put("clflush", MEM); // current nasm is ok
2431 putFpu();
2432 putFpuFpu();
2433 putCmp();
2434 putMPX();
2435#endif
2436
2437#ifdef XBYAK64
2438
2439#ifdef USE_YASM
2440 putRip();
2441#else
2442 putMov64();
2443 putMovImm64();
2444#endif
2445
2446#endif // XBYAK64
2447
2448#endif // USE_AVX
2449
2450#endif // USE_AVX512
2451 }
2452#ifdef USE_AVX512
2453 void putOpmask()
2454 {
2455 {
2456 const char *tbl[] = {
2457 "kadd",
2458 "kand",
2459 "kandn",
2460 "kor",
2461 "kxnor",
2462 "kxor",
2463 };
2464 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2465 std::string name = tbl[i];
2466 put(name + "b", K, K, K);
2467 put(name + "w", K, K, K);
2468 put(name + "q", K, K, K);
2469 put(name + "d", K, K, K);
2470 }
2471 put("kunpckbw", K, K, K);
2472 put("kunpckwd", K, K, K);
2473 put("kunpckdq", K, K, K);
2474 }
2475 {
2476 const char *tbl[] = {
2477 "knot",
2478 "kortest",
2479 "ktest",
2480 };
2481 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2482 std::string name = tbl[i];
2483 put(name + "b", K, K);
2484 put(name + "w", K, K);
2485 put(name + "q", K, K);
2486 put(name + "d", K, K);
2487 }
2488 }
2489 {
2490 const char *tbl[] = {
2491 "kshiftl",
2492 "kshiftr",
2493 };
2494 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2495 std::string name = tbl[i];
2496 put(name + "b", K, K, IMM8);
2497 put(name + "w", K, K, IMM8);
2498 put(name + "q", K, K, IMM8);
2499 put(name + "d", K, K, IMM8);
2500 }
2501 }
2502 put("kmovw", K, K | MEM | REG32);
2503 put("kmovq", K, K | MEM);
2504 put("kmovb", K, K | MEM | REG32);
2505 put("kmovd", K, K | MEM | REG32);
2506
2507 put("kmovw", MEM | REG32, K);
2508 put("kmovq", MEM, K);
2509 put("kmovb", MEM | REG32, K);
2510 put("kmovd", MEM | REG32, K);
2511#ifdef XBYAK64
2512 put("kmovq", K, REG64);
2513 put("kmovq", REG64, K);
2514#endif
2515 }
2516 void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx = 0, bool z = false, int sae = 0)
2517 {
2518 std::string modifier;
2519 char pk[16] = "";
2520 const char *pz = "";
2521 const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" };
2522 const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" };
2523 if (isXbyak_) {
2524 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
2525 if (z) pz = "|T_z";
2526 printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
2527 } else {
2528 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
2529 if (z) pz = "{z}";
2530 printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
2531 }
2532 }
2533 void putCombi()
2534 {
2535 const char *xTbl[] = {
2536 "xmm2",
2537#ifdef XBYAK64
2538 "xmm8", "xmm31"
2539#else
2540 "xmm5", "xmm6"
2541#endif
2542 };
2543 const char *yTbl[] = {
2544 "ymm0",
2545#ifdef XBYAK64
2546 "ymm15", "ymm31"
2547#else
2548 "ymm4", "ymm2"
2549#endif
2550 };
2551 const char *zTbl[] = {
2552 "zmm1",
2553#ifdef XBYAK64
2554 "zmm9", "zmm30"
2555#else
2556 "zmm3", "zmm7"
2557#endif
2558 };
2559 const size_t N = NUM_OF_ARRAY(zTbl);
2560 for (size_t i = 0; i < N; i++) {
2561 for (size_t j = 0; j < N; j++) {
2562 separateFunc();
2563 for (size_t k = 0; k < N; k++) {
2564#ifdef XBYAK64
2565 for (int kIdx = 0; kIdx < 8; kIdx++) {
2566 for (int z = 0; z < 2; z++) {
2567 put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
2568 put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
2569 for (int sae = 0; sae < 5; sae++) {
2570 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae);
2571 }
2572 }
2573 }
2574#else
2575 put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
2576 put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
2577 for (int sae = 0; sae < 5; sae++) {
2578 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae);
2579 }
2580#endif
2581 }
2582 }
2583 }
2584 put("vaddpd", XMM, XMM, _MEM);
2585 put("vaddpd", YMM, YMM, _MEM);
2586 put("vaddpd", ZMM, ZMM, _MEM);
2587 }
2588 void putCmpK()
2589 {
2590 {
2591 const struct Tbl {
2592 const char *name;
2593 bool supportYMM;
2594 } tbl[] = {
2595 { "vcmppd", true },
2596 { "vcmpps", true },
2597 { "vcmpsd", false },
2598 { "vcmpss", false },
2599 };
2600 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2601 const Tbl *p = &tbl[i];
2602 put(p->name, K, _XMM, _XMM | MEM, IMM);
2603 if (!p->supportYMM) continue;
2604 put(p->name, K, _YMM, _YMM | MEM, IMM);
2605 put(p->name, K, _ZMM, _ZMM | MEM, IMM);
2606 }
2607 }
2608 put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
2609#ifdef XBYAK64
2610 {
2611 const struct Tbl {
2612 const char *name;
2613 } tbl[] = {
2614 { "vcomisd" },
2615 { "vcomiss" },
2616 { "vucomisd" },
2617 { "vucomiss" },
2618 };
2619 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2620 const Tbl *p = &tbl[i];
2621 put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
2622 }
2623 }
2624 put("vcomiss", _XMM3, XMM | MEM);
2625 put("vcomiss", XMM, XMM_SAE);
2626#endif
2627 }
2628 void putBroadcastSub(int idx, int disp)
2629 {
2630#ifdef XBYAK64
2631 const char *a = "rax";
2632#else
2633 const char *a = "eax";
2634#endif
2635 if (isXbyak_) {
2636 printf("vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2637 printf("vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2638 printf("vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
2639 } else {
2640 printf("vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx, a, disp);
2641 printf("vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx, a, disp);
2642 printf("vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx, a, disp);
2643 }
2644 }
2645 void putBroadcast()
2646 {
2647 for (int i = 0; i < 9; i++) {
2648 putBroadcastSub(0, i);
2649#ifdef XBYAK64
2650 putBroadcastSub(10, i);
2651 putBroadcastSub(20, i);
2652#endif
2653 }
2654 put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8);
2655 put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16);
2656 put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32);
2657#ifdef XBYAK64
2658 put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64);
2659#endif
2660 {
2661 const char *tbl[] = {
2662 "vpbroadcastb",
2663 "vpbroadcastw",
2664 "vpbroadcastd",
2665 "vpbroadcastq",
2666 };
2667 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2668 put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
2669 }
2670 }
2671 put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
2672 put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
2673 put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
2674 put("vbroadcasti32x8", ZMM_KZ, _MEM);
2675 put("vbroadcasti64x4", ZMM_KZ, _MEM);
2676 }
2677 void putAVX512_M_X()
2678 {
2679 const char *tbl[] = {
2680 "vmovapd",
2681 "vmovaps",
2682 "vmovupd",
2683 "vmovups",
2684 };
2685 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2686 const char *name = tbl[i];
2687 put(name, MEM|MEM_K, ZMM|XMM|YMM);
2688 put(name, ZMM, MEM);
2689 }
2690 }
2691 void put_vmov()
2692 {
2693#ifdef XBYAK64
2694 put("vmovd", _XMM3, MEM|REG32);
2695 put("vmovd", MEM|REG32, _XMM3);
2696 put("vmovq", _XMM3, MEM|REG64|XMM);
2697 put("vmovq", MEM|REG64|XMM, _XMM3);
2698 put("vmovhlps", _XMM3, _XMM3, _XMM3);
2699 put("vmovlhps", _XMM3, _XMM3, _XMM3);
2700 put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
2701 put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
2702 put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
2703 put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
2704
2705 put("vmovsd", XMM_KZ, _XMM3, _XMM3);
2706 put("vmovsd", XMM_KZ, MEM);
2707 put("vmovsd", MEM_K, XMM);
2708 put("vmovss", XMM_KZ, _XMM3, _XMM3);
2709 put("vmovss", XMM_KZ, MEM);
2710 put("vmovss", MEM_K, XMM);
2711
2712 put("vmovshdup", _ZMM, _ZMM);
2713 put("vmovsldup", _ZMM, _ZMM);
2714
2715
2716 {
2717 const char *tbl[] = {
2718 "valignd",
2719 "valignq",
2720 };
2721 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2722 const char *name = tbl[i];
2723 put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
2724 put(name, _YMM3, _YMM3, _YMM3, IMM);
2725 put(name, _ZMM, _ZMM, _ZMM, IMM);
2726 }
2727 }
2728 {
2729 const char tbl[][16] = {
2730 "vmovhpd",
2731 "vmovhps",
2732 "vmovlpd",
2733 "vmovlps",
2734 };
2735 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2736 put(tbl[i], _XMM3, _XMM3, MEM);
2737 put(tbl[i], MEM, _XMM3);
2738 }
2739 }
2740#endif
2741 }
2742 void put512_X_XM()
2743 {
2744 const struct Tbl {
2745 const char *name;
2746 bool M_X;
2747 } tbl[] = {
2748 { "vmovddup", false },
2749 { "vmovdqa32", true },
2750 { "vmovdqa64", true },
2751 { "vmovdqu8", true },
2752 { "vmovdqu16", true },
2753 { "vmovdqu32", true },
2754 { "vmovdqu64", true },
2755 { "vpabsb", false },
2756 { "vpabsw", false },
2757 { "vpabsd", false },
2758 { "vpabsq", false },
2759 };
2760 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2761 const Tbl& p = tbl[i];
2762 put(p.name, _XMM|XMM_KZ, _XMM|MEM);
2763 put(p.name, _YMM|YMM_KZ, _YMM|MEM);
2764 put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
2765 if (!p.M_X) continue;
2766 put(p.name, MEM, _XMM);
2767 put(p.name, MEM, _YMM);
2768 put(p.name, MEM, _ZMM);
2769 }
2770 put("vsqrtpd", XMM_KZ, M_1to2);
2771 put("vsqrtpd", YMM_KZ, M_1to4);
2772 put("vsqrtpd", ZMM_KZ, M_1to8);
2773 put("vsqrtpd", ZMM_KZ, ZMM_ER);
2774
2775 put("vsqrtps", XMM_KZ, M_1to4);
2776 put("vsqrtps", YMM_KZ, M_1to8);
2777 put("vsqrtps", ZMM_KZ, M_1to16);
2778 put("vsqrtps", ZMM_KZ, ZMM_ER);
2779
2780 put("vpabsd", ZMM_KZ, M_1to16);
2781 put("vpabsq", ZMM_KZ, M_1to8);
2782
2783 put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
2784 put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
2785
2786 put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
2787 put("vbroadcastf64x4", ZMM_KZ, _MEM);
2788 }
2789 void put512_X_X_XM()
2790 {
2791 const struct Tbl {
2792 const char *name;
2793 uint64_t mem;
2794 } tbl[] = {
2795 { "vsqrtsd", MEM },
2796 { "vsqrtss", MEM },
2797 { "vunpckhpd", M_1to2 },
2798 { "vunpckhps", M_1to4 },
2799 { "vunpcklpd", M_1to2 },
2800 { "vunpcklps", M_1to4 },
2801 };
2802 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2803 const Tbl& p = tbl[i];
2804 put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
2805 }
2806 }
2807 void put512_X3()
2808 {
2809#ifdef XBYAK64
2810 const struct Tbl {
2811 const char *name;
2812 uint64_t x1;
2813 uint64_t x2;
2814 uint64_t xm;
2815 } tbl[] = {
2816 { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
2817 { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
2818 { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2819
2820 { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 },
2821 { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 },
2822 { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2823
2824 { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 },
2825 { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 },
2826 { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2827
2828 { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
2829 { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
2830 { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2831
2832 { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
2833 { "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
2834 { "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 },
2835 { "vpaddq", ZMM_KZ, _ZMM, M_1to8 },
2836
2837 { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
2838 { "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2839
2840 { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
2841 { "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2842
2843 { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
2844 { "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
2845
2846 { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
2847 { "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
2848
2849 { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
2850 { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
2851 { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 },
2852 { "vpsubq", ZMM_KZ, _ZMM, M_1to8 },
2853
2854 { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
2855 { "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2856
2857 { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
2858 { "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2859
2860 { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
2861 { "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
2862
2863 { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
2864 { "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
2865
2866 { "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2867 { "vpandq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
2868
2869 { "vpandnd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
2870 { "vpandnq", ZMM_KZ, _ZMM, _ZMM | M_1to8 },
2871
2872 { "vpavgb", ZMM_KZ, _ZMM, _ZMM },
2873 { "vpavgw", ZMM_KZ, _ZMM, _ZMM },
2874
2875 { "vpcmpeqb", K2, _ZMM, _ZMM | _MEM },
2876 { "vpcmpeqw", K2, _ZMM, _ZMM | _MEM },
2877 { "vpcmpeqd", K2, _ZMM, _ZMM | M_1to16 },
2878 { "vpcmpeqq", K2, _ZMM, _ZMM | M_1to8 },
2879
2880 { "vpcmpgtb", K2, _ZMM, _ZMM | _MEM },
2881 { "vpcmpgtw", K2, _ZMM, _ZMM | _MEM },
2882 { "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 },
2883 { "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 },
2884
2885 { "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2886 { "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
2887
2888 { "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2889 { "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2890 { "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2891 { "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2892
2893 { "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM },
2894 { "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2895 { "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2896 { "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2897
2898 { "vpminsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
2899 { "vpminsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2900 { "vpminsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2901 { "vpminsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2902
2903 { "vpminub", ZMM_KZ, _ZMM, _ZMM | _MEM },
2904 { "vpminuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
2905 { "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
2906 { "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
2907
2908 { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
2909 { "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
2910 { "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
2911
2912 { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
2913 { "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
2914 { "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
2915
2916 { "vpsraw", XMM_KZ, _XMM, IMM8 },
2917 { "vpsraw", ZMM_KZ, _ZMM, IMM8 },
2918
2919 { "vpsrad", XMM_KZ, _XMM | M_1to4, IMM8 },
2920 { "vpsrad", ZMM_KZ, _ZMM | M_1to16, IMM8 },
2921
2922 { "vpsraq", XMM, XMM, IMM8 },
2923 { "vpsraq", XMM_KZ, _XMM | M_1to2, IMM8 },
2924 { "vpsraq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
2925
2926 { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
2927 { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
2928 { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
2929
2930 { "vpsrlw", XMM_KZ, _XMM, IMM8 },
2931 { "vpsrlw", ZMM_KZ, _ZMM, IMM8 },
2932
2933 { "vpsrld", XMM_KZ, _XMM | M_1to4, IMM8 },
2934 { "vpsrld", ZMM_KZ, _ZMM | M_1to16, IMM8 },
2935
2936 { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
2937 { "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
2938
2939 { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM },
2940 { "vpsravw", _ZMM, _ZMM, _MEM },
2941
2942 { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM },
2943 { "vpsravd", _ZMM, _ZMM, M_1to16 },
2944
2945 { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM },
2946 { "vpsravq", _ZMM, _ZMM, M_1to8 },
2947
2948 { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM },
2949 { "vpsllvw", _ZMM, _ZMM, _MEM },
2950
2951 { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM },
2952 { "vpsllvd", _ZMM, _ZMM, M_1to16 },
2953
2954 { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM },
2955 { "vpsllvq", _ZMM, _ZMM, M_1to8 },
2956
2957 { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM },
2958 { "vpsrlvw", _ZMM, _ZMM, _MEM },
2959
2960 { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM },
2961 { "vpsrlvd", _ZMM, _ZMM, M_1to16 },
2962
2963 { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM },
2964 { "vpsrlvq", _ZMM, _ZMM, M_1to8 },
2965
2966 { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM },
2967 { "vpshufb", ZMM_KZ, _ZMM, _MEM },
2968
2969 { "vpshufhw", _XMM | XMM_KZ, _XMM, IMM8 },
2970 { "vpshufhw", ZMM_KZ, _MEM, IMM8 },
2971
2972 { "vpshuflw", _XMM | XMM_KZ, _XMM, IMM8 },
2973 { "vpshuflw", ZMM_KZ, _MEM, IMM8 },
2974
2975 { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4, IMM8 },
2976 { "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16, IMM8 },
2977
2978 { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
2979 { "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
2980
2981 { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
2982 { "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
2983
2984 { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 },
2985 { "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 },
2986
2987 { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 },
2988 { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 },
2989
2990 { "vpsadbw", _XMM3, _XMM, _XMM },
2991 { "vpsadbw", _ZMM, _ZMM, _MEM },
2992
2993 { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 },
2994 { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 },
2995
2996 { "vpmulhrsw", _XMM3, _XMM, _XMM },
2997 { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
2998
2999 { "vpmulhuw", _XMM3, _XMM, _XMM },
3000 { "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
3001
3002 { "vpmulhw", _XMM3, _XMM, _XMM },
3003 { "vpmulhw", ZMM_KZ, _ZMM, _MEM },
3004
3005 { "vpmullw", _XMM3, _XMM, _XMM },
3006 { "vpmullw", ZMM_KZ, _ZMM, _MEM },
3007
3008 { "vpmulld", _XMM3, _XMM, M_1to4 },
3009 { "vpmulld", ZMM_KZ, _ZMM, M_1to16 },
3010
3011 { "vpmullq", _XMM3, _XMM, M_1to2 },
3012 { "vpmullq", ZMM_KZ, _ZMM, M_1to8 },
3013
3014 { "vpmuludq", _XMM3, _XMM, M_1to2 },
3015 { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 },
3016
3017 { "vpunpckhbw", _XMM3, _XMM, _XMM },
3018 { "vpunpckhbw", _ZMM, _ZMM, _MEM },
3019
3020 { "vpunpckhwd", _XMM3, _XMM, _XMM },
3021 { "vpunpckhwd", _ZMM, _ZMM, _MEM },
3022
3023 { "vpunpckhdq", _XMM3, _XMM, M_1to4 },
3024 { "vpunpckhdq", _ZMM, _ZMM, M_1to16 },
3025
3026 { "vpunpckhqdq", _XMM3, _XMM, M_1to2 },
3027 { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 },
3028
3029 { "vpunpcklbw", _XMM3, _XMM, _XMM },
3030 { "vpunpcklbw", _ZMM, _ZMM, _MEM },
3031
3032 { "vpunpcklwd", _XMM3, _XMM, _XMM },
3033 { "vpunpcklwd", _ZMM, _ZMM, _MEM },
3034
3035 { "vpunpckldq", _XMM3, _XMM, M_1to4 },
3036 { "vpunpckldq", _ZMM, _ZMM, M_1to16 },
3037
3038 { "vpunpcklqdq", _XMM3, _XMM, M_1to2 },
3039 { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 },
3040
3041 { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3042 { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3043 { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3044 { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3045
3046 { "vextracti32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3047 { "vextracti64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
3048 { "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3049 { "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
3050
3051 { "vextractps", REG32 | _MEM, _XMM3, IMM8 },
3052
3053 { "vpermb", XMM_KZ, _XMM, _XMM },
3054 { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
3055
3056 { "vpermw", XMM_KZ, _XMM, _XMM },
3057 { "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM },
3058
3059 { "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 },
3060 { "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 },
3061
3062 { "vpermilpd", XMM_KZ, _XMM, _XMM | M_1to2 },
3063 { "vpermilpd", ZMM_KZ, _ZMM, M_1to8 },
3064 { "vpermilpd", XMM_KZ, M_1to2, IMM8 },
3065 { "vpermilpd", ZMM_KZ, M_1to8, IMM8 },
3066
3067 { "vpermilps", XMM_KZ, _XMM, _XMM | M_1to4 },
3068 { "vpermilps", ZMM_KZ, _ZMM, M_1to16 },
3069 { "vpermilps", XMM_KZ, M_1to4, IMM8 },
3070 { "vpermilps", ZMM_KZ, M_1to16, IMM8 },
3071
3072 { "vpermpd", YMM_KZ, _YMM | M_1to4, IMM8 },
3073 { "vpermpd", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3074 { "vpermpd", YMM_KZ, _YMM, M_1to4 },
3075 { "vpermpd", ZMM_KZ, _ZMM, M_1to8 },
3076
3077 { "vpermps", YMM_KZ, _YMM, M_1to8 },
3078 { "vpermps", ZMM_KZ, _ZMM, M_1to16 },
3079
3080 { "vpermq", YMM_KZ, _YMM | M_1to4, IMM8 },
3081 { "vpermq", ZMM_KZ, _ZMM | M_1to8, IMM8 },
3082 { "vpermq", YMM_KZ, _YMM, M_1to4 },
3083 { "vpermq", ZMM_KZ, _ZMM, M_1to8 },
3084 };
3085 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3086 const Tbl& p = tbl[i];
3087 put(p.name, p.x1, p.x2, p.xm);
3088 }
3089#endif
3090 }
3091 void put512_X3_I()
3092 {
3093 const struct Tbl {
3094 const char *name;
3095 uint64_t x1;
3096 uint64_t x2;
3097 uint64_t xm;
3098 } tbl[] = {
3099#ifdef XBYAK64
3100 { "vinsertps", _XMM, _XMM, _XMM3 },
3101
3102 { "vshufpd", XMM_KZ, _XMM, M_1to2 },
3103 { "vshufpd", ZMM_KZ, _ZMM, M_1to8 },
3104
3105 { "vshufps", XMM_KZ, _XMM, M_1to4 },
3106 { "vshufps", ZMM_KZ, _ZMM, M_1to16 },
3107
3108 { "vinsertf32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3109 { "vinsertf32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3110
3111 { "vinsertf64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3112 { "vinsertf64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3113
3114 { "vinsertf32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3115 { "vinsertf64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3116
3117 { "vinserti32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3118 { "vinserti32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3119
3120 { "vinserti64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
3121 { "vinserti64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
3122
3123 { "vinserti32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3124 { "vinserti64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
3125#endif
3126 { "vpalignr", ZMM_KZ, _ZMM, _ZMM },
3127 };
3128 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3129 const Tbl& p = tbl[i];
3130 put(p.name, p.x1, p.x2, p.xm, IMM8);
3131 }
3132#ifdef XBYAK64
3133 put("vpextrb", _REG64, _XMM3, IMM8);
3134 put("vpextrw", _REG64|MEM, _XMM3, IMM8);
3135 put("vpextrd", _REG32, _XMM3, IMM8);
3136 put("vpextrq", _REG64, _XMM3, IMM8);
3137 put("vpinsrb", _XMM3, _XMM3, _REG32, IMM8);
3138 put("vpinsrw", _XMM3, _XMM3, _REG32, IMM8);
3139 put("vpinsrd", _XMM3, _XMM3, _REG32, IMM8);
3140 put("vpinsrq", _XMM3, _XMM3, _REG64, IMM8);
3141#endif
3142 }
3143 void put512_FMA()
3144 {
3145 const struct Tbl {
3146 const char *name;
3147 bool supportYMM;
3148 } tbl[] = {
3149 { "vfmadd", true },
3150 { "vfmadd", false },
3151 { "vfmaddsub", true },
3152 { "vfmsubadd", true },
3153 { "vfmsub", true },
3154 { "vfmsub", false },
3155 { "vfnmadd", true },
3156 { "vfnmadd", false },
3157 { "vfnmsub", true },
3158 { "vfnmsub", false },
3159 };
3160 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3161 const Tbl& p = tbl[i];
3162 const struct Ord {
3163 const char *name;
3164 } ord[] = {
3165 { "132" },
3166 { "213" },
3167 { "231" },
3168 };
3169 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
3170 const char sufTbl[][2][8] = {
3171 { "pd", "ps" },
3172 { "sd", "ss" },
3173 };
3174 for (size_t k = 0; k < 2; k++) {
3175 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
3176 uint64_t mem = 0;
3177 if (suf == "pd") {
3178 mem = M_1to2;
3179 } else if (suf == "ps") {
3180 mem = M_1to4;
3181 } else {
3182 mem = XMM_ER;
3183 }
3184 std::string name = std::string(p.name) + ord[j].name + suf;
3185 const char *q = name.c_str();
3186 put(q, XMM_KZ, _XMM, mem);
3187 if (!p.supportYMM) continue;
3188 if (suf == "pd") {
3189 mem = M_1to8;
3190 } else if (suf == "ps") {
3191 mem = M_1to16;
3192 } else {
3193 mem = XMM_ER;
3194 }
3195 put(q, _ZMM, _ZMM, mem);
3196 }
3197 }
3198 }
3199 }
3200 void put512_Y_XM()
3201 {
3202 const char *tbl[] = {
3203 "vpmovsxbw",
3204 "vpmovsxbd",
3205 "vpmovsxbq",
3206 "vpmovsxwd",
3207 "vpmovsxwq",
3208 "vpmovsxdq",
3209 "vpmovzxbw",
3210 "vpmovzxbd",
3211 "vpmovzxbq",
3212 "vpmovzxwd",
3213 "vpmovzxwq",
3214 "vpmovzxdq",
3215 };
3216 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3217 const char *name = tbl[i];
3218 put(name, XMM_KZ, _XMM);
3219 put(name, _ZMM, _MEM);
3220 }
3221 }
3222 void put512_AVX1()
3223 {
3224#ifdef XBYAK64
3225 const struct Tbl {
3226 std::string name;
3227 bool only_pd_ps;
3228 } tbl[] = {
3229 { "vadd", false },
3230 { "vsub", false },
3231 { "vmul", false },
3232 { "vdiv", false },
3233 { "vmax", false },
3234 { "vmin", false },
3235 { "vand", true },
3236 { "vandn", true },
3237 { "vor", true },
3238 { "vxor", true },
3239 };
3240 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
3241 const struct Suf {
3242 const char *suf;
3243 bool supportYMM;
3244 } sufTbl[] = {
3245 { "pd", true },
3246 { "ps", true },
3247 { "sd", false },
3248 { "ss", false },
3249 };
3250 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
3251 if (tbl[i].only_pd_ps && j == 2) break;
3252 std::string suf = sufTbl[j].suf;
3253 std::string name = tbl[i].name + suf;
3254 const char *p = name.c_str();
3255 uint64_t mem = 0;
3256 if (suf == "pd") {
3257 mem = M_1to2;
3258 } else if (suf == "ps") {
3259 mem = M_1to4;
3260 }
3261 put(p, _XMM3 | XMM_KZ, _XMM, mem);
3262 if (!sufTbl[j].supportYMM) continue;
3263 mem = 0;
3264 if (suf == "pd") {
3265 mem = M_1to8;
3266 } else if (suf == "ps") {
3267 mem = M_1to16;
3268 }
3269 put(p, _ZMM, _ZMM, mem);
3270 }
3271 }
3272#endif
3273 }
3274 void put512_cvt()
3275 {
3276#ifdef XBYAK64
3277 put("vcvtdq2pd", XMM_KZ, _XMM | M_1to2);
3278 put("vcvtdq2pd", YMM_KZ, _XMM | M_1to4);
3279 put("vcvtdq2pd", ZMM_KZ, _YMM | M_1to8);
3280
3281 put("vcvtdq2ps", XMM_KZ, _XMM | M_1to4);
3282 put("vcvtdq2ps", YMM_KZ, _YMM | M_1to8);
3283 put("vcvtdq2ps", ZMM_KZ, _ZMM | M_1to16);
3284
3285 put("vcvtpd2dq", XMM_KZ, _XMM | _YMM | M_1to2);
3286 put("vcvtpd2dq", YMM_KZ, _ZMM | ZMM_ER | M_1to8);
3287#endif
3288 }
3289 void putMin()
3290 {
3291#ifdef XBYAK64
3292 put("vcvtpd2dq", _XMM | _XMM3, _XMM | M_xword | M_1to2);
3293 put("vcvtpd2dq", _XMM | _XMM3, _YMM | M_yword | MY_1to4);
3294#endif
3295 }
3296 void putAVX512()
3297 {
3298#ifdef MIN_TEST
3299 putMin();
3300#else
3301 putOpmask();
3302 separateFunc();
3303 putCombi();
3304 separateFunc();
3305 putCmpK();
3306 separateFunc();
3307 putBroadcast();
3308 separateFunc();
3309 putAVX512_M_X();
3310 separateFunc();
3311 put_vmov();
3312 separateFunc();
3313 put512_X_XM();
3314 separateFunc();
3315 put512_X_X_XM();
3316 separateFunc();
3317 put512_X3();
3318 separateFunc();
3319 put512_X3_I();
3320 separateFunc();
3321 put512_FMA();
3322 separateFunc();
3323 put512_Y_XM();
3324 separateFunc();
3325 put512_AVX1();
3326 separateFunc();
3327 put512_cvt();
3328#endif
3329 }
3330#endif
3331};
3332
3333int main(int argc, char *[])
3334{
3335 Test test(argc > 1);
3336 test.put();
3337}
#define CYBOZU_SNPRINTF(x, len,...)
Definition inttype.hpp:64
#define NUM_OF_ARRAY(x)
Definition bench.cpp:12
const mie::Vuint & p
Definition bn.cpp:27
std::string name
void putBroadcastSub(int idx, int disp)
Definition make_512.cpp:636
Test(bool isXbyak)
Definition make_nm.cpp:2121
void putGprR_RM()
Definition make_nm.cpp:2189
void put512_AVX1()
void putCombi()
Definition make_512.cpp:541
void putBroadcast()
Definition make_512.cpp:653
~Test()
Definition make_nm.cpp:2142
void putGath(const std::string &vsib)
Definition make_nm.cpp:2251
void put512_X3()
Definition make_512.cpp:885
void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx=0, bool z=false, int sae=0)
Definition make_512.cpp:524
void putMin()
void putAVX512()
void put512_cvt()
void putGprR_RM_R()
Definition make_nm.cpp:2172
void put512_X_XM()
Definition make_512.cpp:819
void put512_X3_I()
void putAVX512_M_X()
Definition make_512.cpp:750
void putGather()
Definition make_nm.cpp:2213
void putSeg()
Definition make_nm.cpp:2290
void separateFunc()
Definition make_nm.cpp:2134
void put_vmov()
Definition make_512.cpp:768
void putCmpK()
Definition make_512.cpp:596
void putGprOtherwise()
Definition make_nm.cpp:2204
void putAVX1()
void put()
Definition make_512.cpp:457
void put512_X_X_XM()
Definition make_512.cpp:867
void putGatherAll()
Definition make_nm.cpp:2258
void put512_FMA()
void put512_Y_XM()
void putOpmask()
Definition make_512.cpp:461
void putGprR_R_RM()
Definition make_nm.cpp:2156
const struct Ptn tbl[]
const uint64 VM32X_64
Definition make_512.cpp:36
const uint64 VM32Y_64
Definition make_512.cpp:38
const uint64 REG32
Definition make_512.cpp:66
const uint64 REG16_2
Definition make_512.cpp:55
const uint64 XMM_SAE
Definition make_512.cpp:94
const uint64 NOPARA
Definition make_512.cpp:114
const uint64 XMM
Definition make_512.cpp:76
const uint64 YMM
Definition make_512.cpp:77
const uint64 M_xword
Definition make_512.cpp:110
const uint64 XMM_KZ
Definition make_512.cpp:100
const uint64 REG32e
Definition make_512.cpp:68
const uint64 _MEMe
Definition make_512.cpp:53
const uint64 MEM_ONLY_DISP
Definition make_512.cpp:32
const uint64 ZMM
Definition make_512.cpp:85
const uint64 IMM8
Definition make_512.cpp:18
const uint64 MEM
Definition make_512.cpp:70
const uint64 EAX
Definition make_512.cpp:16
const uint64 _REG64
Definition make_512.cpp:58
const uint64 M_yword
Definition make_512.cpp:111
const uint64 K
Definition make_512.cpp:78
const uint64 IMM_2
Definition make_512.cpp:74
const uint64 M_1to8
Definition make_512.cpp:107
const uint64 _REG64_2
Definition make_512.cpp:59
const uint64 _MEM
Definition make_512.cpp:14
const uint64 _XMM2
Definition make_512.cpp:60
const uint64 REG32_2
Definition make_512.cpp:54
const uint64 IMM
Definition make_512.cpp:75
const uint64 MEM64
Definition make_512.cpp:71
const uint64 MEM8
Definition make_512.cpp:27
const uint64 _ZMM
Definition make_512.cpp:79
const uint64 VM32Y_32
Definition make_512.cpp:37
const uint64 AL
Definition make_512.cpp:25
const uint64 REG8_3
Definition make_512.cpp:57
const uint64 REG64
Definition make_512.cpp:65
const uint64 M_1to2
Definition make_512.cpp:105
const uint64 K2
Definition make_512.cpp:88
const uint64 ZMM_KZ
Definition make_512.cpp:102
const uint64 _REG8
Definition make_512.cpp:19
const uint64 REG16
Definition make_512.cpp:67
const uint64 VM32X
Definition make_512.cpp:62
const uint64 ZMM_ER
Definition make_512.cpp:90
const uint64 REG8
Definition make_512.cpp:69
const uint64 VM32X_32
Definition make_512.cpp:35
const uint64 AX
Definition make_512.cpp:24
const uint64 MY_1to4
Definition make_512.cpp:112
const uint64 _REG16
Definition make_512.cpp:20
const uint64 REG8_2
Definition make_512.cpp:56
const uint64 _ZMM2
Definition make_512.cpp:80
const uint64 VM32Y
Definition make_512.cpp:63
const uint64 _YMM2
Definition make_512.cpp:61
const uint64 _YMM
Definition make_512.cpp:34
const uint64 M_1to16
Definition make_512.cpp:108
const uint64 IMM32
Definition make_512.cpp:17
const uint64 _XMM
Definition make_512.cpp:13
const uint64 MEM32
Definition make_512.cpp:29
const uint64 IMM_1
Definition make_512.cpp:26
const uint64 _REG32
Definition make_512.cpp:15
const uint64 YMM_KZ
Definition make_512.cpp:101
const uint64 MEM_K
Definition make_512.cpp:104
const uint64 _YMM3
Definition make_512.cpp:86
const uint64 XMM_ER
Definition make_512.cpp:109
const int bitEnd
Definition make_512.cpp:10
const uint64 ZMM_SAE
Definition make_512.cpp:89
const uint64 M_1to4
Definition make_512.cpp:106
const uint64 MEM16
Definition make_512.cpp:28
const uint64 VM32X_64
Definition make_nm.cpp:38
const uint64 VM32Y_64
Definition make_nm.cpp:40
const uint64 REG32
Definition make_nm.cpp:69
const uint64 REG16_2
Definition make_nm.cpp:57
const uint64 XMM_SAE
Definition make_nm.cpp:97
const uint64 NOPARA
Definition make_nm.cpp:118
const uint64 XMM
Definition make_nm.cpp:79
const uint64 YMM
Definition make_nm.cpp:80
const uint64 M_xword
Definition make_nm.cpp:113
const uint64 ONE
Definition make_nm.cpp:32
const uint64 XMM_KZ
Definition make_nm.cpp:103
const uint64 REG32e
Definition make_nm.cpp:71
const uint64 _MEMe
Definition make_nm.cpp:55
const uint64 MEM_ONLY_DISP
Definition make_nm.cpp:34
const uint64 NEG8
Definition make_nm.cpp:23
const uint64 ZMM
Definition make_nm.cpp:88
const uint64 STi
Definition make_nm.cpp:76
const uint64 IMM8
Definition make_nm.cpp:20
const uint64 MEM
Definition make_nm.cpp:73
const uint64 EAX
Definition make_nm.cpp:18
const uint64 _REG64
Definition make_nm.cpp:60
const uint64 M_yword
Definition make_nm.cpp:114
const uint64 K
Definition make_nm.cpp:81
const uint64 IMM_2
Definition make_nm.cpp:77
const uint64 ST0
Definition make_nm.cpp:75
const uint64 M_1to8
Definition make_nm.cpp:110
const uint64 _REG64_2
Definition make_nm.cpp:61
const uint64 _MEM
Definition make_nm.cpp:16
const uint64 _XMM2
Definition make_nm.cpp:63
const uint64 REG32_2
Definition make_nm.cpp:56
const uint64 IMM
Definition make_nm.cpp:78
const uint64 MEM64
Definition make_nm.cpp:74
const uint64 MEM8
Definition make_nm.cpp:29
const uint64 _ZMM
Definition make_nm.cpp:82
const uint64 VM32Y_32
Definition make_nm.cpp:39
const uint64 BNDREG
Definition make_nm.cpp:116
const uint64 AL
Definition make_nm.cpp:27
const uint64 REG8_3
Definition make_nm.cpp:59
const uint64 REG64
Definition make_nm.cpp:68
const uint64 M_1to2
Definition make_nm.cpp:108
const uint64 K2
Definition make_nm.cpp:91
const uint64 ZMM_KZ
Definition make_nm.cpp:105
const uint64 _REG8
Definition make_nm.cpp:21
const uint64 REG16
Definition make_nm.cpp:70
const uint64 VM32X
Definition make_nm.cpp:65
const uint64 NEG16
Definition make_nm.cpp:25
const uint64 ZMM_ER
Definition make_nm.cpp:93
const uint64 REG8
Definition make_nm.cpp:72
const uint64 VM32X_32
Definition make_nm.cpp:37
const uint64 AX
Definition make_nm.cpp:26
const uint64 MY_1to4
Definition make_nm.cpp:115
const uint64 _REG16
Definition make_nm.cpp:22
const uint64 REG8_2
Definition make_nm.cpp:58
const uint64 IMM16
Definition make_nm.cpp:24
const uint64 _ZMM2
Definition make_nm.cpp:83
const uint64 VM32Y
Definition make_nm.cpp:66
const uint64 _YMM2
Definition make_nm.cpp:64
const uint64 CL
Definition make_nm.cpp:33
const uint64 _YMM
Definition make_nm.cpp:36
const uint64 M_1to16
Definition make_nm.cpp:111
const uint64 IMM32
Definition make_nm.cpp:19
const uint64 RAX
Definition make_nm.cpp:62
const uint64 _XMM
Definition make_nm.cpp:15
const uint64 NEG32
Definition make_nm.cpp:35
const uint64 MMX
Definition make_nm.cpp:14
const uint64 MEM32
Definition make_nm.cpp:31
const uint64 IMM_1
Definition make_nm.cpp:28
const uint64 _REG32
Definition make_nm.cpp:17
const uint64 YMM_KZ
Definition make_nm.cpp:104
const uint64 MEM_K
Definition make_nm.cpp:107
const uint64 _YMM3
Definition make_nm.cpp:89
const uint64 XMM_ER
Definition make_nm.cpp:112
const int bitEnd
Definition make_nm.cpp:12
const uint64 ZMM_SAE
Definition make_nm.cpp:92
const uint64 M_1to4
Definition make_nm.cpp:109
const uint64 MEM16
Definition make_nm.cpp:30
LOGGING_API void printf(Category category, const char *format,...)
Definition Logging.cpp:30
Definition xbyak.h:104
unsigned char uint8
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
Xbyak::uint64 uint64
Definition quantize.cpp:51
const int N
Definition quantize.cpp:54
unsigned __int64 uint64_t
Definition stdint.h:136
int type definition and macros Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
@ B01010100
@ B01010101
@ B01011111
@ B01011000
@ B01010001
@ B01010010
@ B00010100
@ B00010101
@ B01010011
@ B01011100
@ B01011001
@ B01011110
@ B11000010
@ B01010111
@ B01010110
@ B11000110
@ B01011101
char * s
uint16_t j
uint8_t buf[2048]