2#define XBYAK_NO_OP_NAMES
8#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
95const uint64 _XMM3 = 1ULL << 50;
122 void operator=(
const Test&);
128 std::string removeUnderScore(std::string
s)
const
130 if (!isXbyak_ &&
s[
s.size() - 1] ==
'_')
s.resize(
s.size() - 1);
137 for (
int i = 0; i <
bitEnd; i++) {
138 if ((op1 & (1ULL << i)) == 0)
continue;
140 if ((op2 & (1ULL <<
j)) == 0)
continue;
141 for (
int k = 0; k <
bitEnd; k++) {
142 if ((op3 & (1ULL << k)) == 0)
continue;
144 if ((op4 & (1ULL <<
s)) == 0)
continue;
145 printf(
"%s ", nm.c_str());
146 if (isXbyak_) printf(
"(");
147 if (!(op1 &
NOPARA)) printf(
"%s", get(1ULL << i));
148 if (!(op2 &
NOPARA)) printf(
", %s", get(1ULL <<
j));
149 if (!(op3 &
NOPARA)) printf(
", %s", get(1ULL << k));
150 if (!(op4 &
NOPARA)) printf(
", %s", get(1ULL <<
s));
151 if (isXbyak_) printf(
"); dump();");
158 void put(
const char *nm,
uint64 op,
const char *xbyak,
const char *nasm)
const
160 for (
int i = 0; i <
bitEnd; i++) {
161 if ((
op & (1ULL << i)) == 0)
continue;
163 if (isXbyak_) printf(
"(");
164 if (!(
op &
NOPARA)) printf(
"%s", get(1ULL << i));
165 printf(
", %s", isXbyak_ ? xbyak : nasm);
166 if (isXbyak_) printf(
"); dump();");
170 void put(
const char *nm,
const char *xbyak,
const char *nasm = 0,
uint64 op =
NOPARA)
const
172 if (nasm == 0) nasm = xbyak;
173 for (
int i = 0; i <
bitEnd; i++) {
174 if ((
op & (1ULL << i)) == 0)
continue;
176 if (isXbyak_) printf(
"(");
177 printf(
"%s ", isXbyak_ ? xbyak : nasm);
178 if (!(
op &
NOPARA)) printf(
", %s", get(1ULL << i));
179 if (isXbyak_) printf(
"); dump();");
183 const char *get(
uint64 type)
const
185 int idx = (rand() / 31) & 7;
195 static const char MmxTbl[][4] = {
196 "mm0",
"mm1",
"mm2",
"mm3",
"mm4",
"mm5",
"mm6",
"mm7"
202 static const char tbl[][6] = {
203 "xmm0",
"xmm1",
"xmm2",
"xmm3",
"xmm4",
"xmm5",
"xmm6",
"xmm7",
209 static const char tbl[][6] = {
210 "ymm0",
"ymm1",
"ymm2",
"ymm3",
"ymm4",
"ymm5",
"ymm6",
"ymm7"
216 static const char tbl[][6] = {
217 "zmm0",
"zmm1",
"zmm2",
"zmm3",
"zmm4",
"zmm5",
"zmm6",
"zmm7"
224 static const char tbl[][6] = {
225 "xmm8",
"xmm9",
"xmm10",
"xmm11",
"xmm12",
"xmm13",
"xmm14",
"xmm15"
231 static const char tbl[][6] = {
232 "xmm16",
"xmm17",
"xmm18",
"xmm19",
"xmm20",
"xmm21",
"xmm22",
"xmm23"
238 static const char tbl[][6] = {
239 "ymm8",
"ymm9",
"ymm10",
"ymm11",
"ymm12",
"ymm13",
"ymm14",
"ymm15",
245 static const char tbl[][6] = {
246 "ymm16",
"ymm17",
"ymm18",
"ymm19",
"ymm20",
"ymm21",
"ymm22",
"ymm23",
252 static const char tbl[][6] = {
253 "zmm8",
"zmm9",
"zmm10",
"zmm11",
"zmm28",
"zmm29",
"zmm30",
"zmm31",
260 return isXbyak_ ?
"ptr[eax+ecx+3]" :
"[eax+ecx+3]";
279 return isXbyak_ ?
"ptr[rdx+r15+0x12]" :
"[rdx+r15+0x12]";
281 return isXbyak_ ?
"ptr[rip - 0x13456+1-3]" :
"[rip - 0x13456+1-3]";
285 return "byte [eax+edx]";
289 return "dword [ebp*2]";
291 return "qword [eax+ecx*8]";
293 return isXbyak_ ?
"ptr[(void*)0x123]" :
"[0x123]";
296 static const char Reg16Tbl[][4] = {
297 "ax",
"cx",
"dx",
"bx",
"sp",
"bp",
"si",
"di"
299 return Reg16Tbl[(idx % 7) + 1];
303 static const char Reg8Tbl[][4] = {
305 "al",
"cl",
"dl",
"bl",
"al",
"cl",
"dl",
"bl"
307 "al",
"cl",
"dl",
"bl",
"ah",
"ch",
"dh",
"bh"
310 return Reg8Tbl[(idx % 7) + 1];
314 static const char Reg32Tbl[][4] = {
315 "eax",
"ecx",
"edx",
"ebx",
"esp",
"ebp",
"esi",
"edi"
317 return Reg32Tbl[(idx % 7) + 1];
322 static const char Reg64Tbl[][4] = {
323 "rax",
"rcx",
"rdx",
"rbx",
"rsp",
"rbp",
"rsi",
"rdi"
325 return Reg64Tbl[(idx % 7) + 1];
329 static const char Reg64_2Tbl[][4] = {
330 "r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15"
332 return Reg64_2Tbl[idx];
336 static const char Reg32eTbl[][5] = {
337 "r8d",
"r9d",
"r10d",
"r11d",
"r12d",
"r13d",
"r14d",
"r15d"
339 return Reg32eTbl[idx];
343 static const char Reg16eTbl[][5] = {
344 "r8w",
"r9w",
"r10w",
"r11w",
"r12w",
"r13w",
"r14w",
"r15w"
346 return Reg16eTbl[idx];
350 static const char Reg8_2Tbl[][5] = {
351 "r8b",
"r9b",
"r10b",
"r11b",
"r12b",
"r13b",
"r14b",
"r15b"
353 return Reg8_2Tbl[idx];
357 static const char Reg8_3Tbl[][5] = {
358 "spl",
"bpl",
"sil",
"dil",
"spl",
"bpl",
"sil",
"dil"
360 return Reg8_3Tbl[idx];
376 return isXbyak_ ?
"12345678" :
"dword 12345678";
378 return isXbyak_ ?
"1000" :
"word 1000";
380 return isXbyak_ ?
"4" :
"byte 4";
382 return isXbyak_ ?
"-30" :
"byte -30";
384 return isXbyak_ ?
"-1000" :
"word -1000";
386 return isXbyak_ ?
"-100000" :
"dword -100000";
390 return isXbyak_ ?
"0xda" :
"0xda";
392 return isXbyak_ ?
"ptr [ebp+4+xmm1*8]" :
"[ebp+4+xmm1*8]";
394 return isXbyak_ ?
"ptr [12345+xmm13*2]" :
"[12345+xmm13*2]";
396 return isXbyak_ ?
"ptr [ymm4]" :
"[ymm4]";
398 return isXbyak_ ?
"ptr [12345+ymm13*2+r13]" :
"[12345+ymm13*2+r13]";
399 case M_1to2:
return isXbyak_ ?
"ptr_b [eax+32]" :
"[eax+32]{1to2}";
400 case M_1to4:
return isXbyak_ ?
"ptr_b [eax+32]" :
"[eax+32]{1to4}";
401 case M_1to8:
return isXbyak_ ?
"ptr_b [eax+32]" :
"[eax+32]{1to8}";
402 case M_1to16:
return isXbyak_ ?
"ptr_b [eax+32]" :
"[eax+32]{1to16}";
404 case M_xword:
return isXbyak_ ?
"ptr [eax+33]" :
"oword [eax+33]";
405 case M_yword:
return isXbyak_ ?
"yword [eax+33]" :
"yword [eax+33]";
406 case MY_1to4:
return isXbyak_ ?
"yword_b [eax+32]" :
"[eax+32]{1to4}";
409 static const char kTbl[][5] = {
410 "k1",
"k2",
"k3",
"k4",
"k5",
"k6",
"k7",
412 return kTbl[idx % 7];
415 return isXbyak_ ?
"k3 | k5" :
"k3{k5}";
418 static const char tbl[][5] = {
419 "bnd0",
"bnd1",
"bnd2",
"bnd3",
425 return isXbyak_ ?
"xmm25 | T_sae" :
"xmm25, {sae}";
427 return isXbyak_ ?
"zmm25 | T_sae" :
"zmm25, {sae}";
429 return isXbyak_ ?
"xmm4 | T_rd_sae" :
"xmm4, {rd-sae}";
431 return isXbyak_ ?
"zmm20 | T_rd_sae" :
"zmm20, {rd-sae}";
433 return isXbyak_ ?
"xmm5 | k5" :
"xmm5{k5}";
435 return isXbyak_ ?
"ymm2 |k3|T_z" :
"ymm2{k3}{z}";
437 return isXbyak_ ?
"zmm7|k1" :
"zmm7{k1}";
439 return isXbyak_ ?
"ptr [rax] | k1" :
"[rax]{k1}";
442 return isXbyak_ ?
"xmm5 | T_sae" :
"xmm5, {sae}";
444 return isXbyak_ ?
"zmm5 | T_sae" :
"zmm5, {sae}";
446 return isXbyak_ ?
"xmm30 | T_rd_sae" :
"xmm30, {rd-sae}";
448 return isXbyak_ ?
"zmm2 | T_rd_sae" :
"zmm2, {rd-sae}";
450 return isXbyak_ ?
"ptr [eax] | k1" :
"[eax]{k1}";
455 void putSIMPLE()
const
457 const char tbl[][20] = {
593 put(
"call",
"getCode() + 5",
"$ + 5");
597 put(
"jmp",
"ptr[(void*)0x12345678]",
"[0x12345678]");
598 put(
"call",
"ptr[(void*)0x12345678]",
"[0x12345678]");
600 put(
"jmp",
"ptr[rip + 0x12345678]",
"[rip+0x12345678]");
601 put(
"call",
"ptr[rip + 0x12345678]",
"[rip+0x12345678]");
602 put(
"call",
"ptr[rip -23]",
"[rip-23]");
603 put(
"call",
"ptr[rip -23+56]",
"[rip-23+56]");
607 puts(
"{ Label label0;");
609 puts(
"pshufb (xmm14, ptr [rip+label0]); dump();");
613 puts(
"pshufb xmm14, [rel label0]");
654 put(
"movq",
XMM|
MMX,
"qword[eax]",
"qword[eax]");
655 put(
"movq",
XMM|
MMX,
"ptr[eax]",
"qword[eax]");
656 put(
"movq",
"qword[eax]",
"qword[eax]",
XMM|
MMX);
657 put(
"movq",
"ptr[eax]",
"qword[eax]",
XMM|
MMX);
668 static const char nmTbl[][16] = {
735 static const char nmTbl[][16] = {
763 static const char nmTbl[][16] = {
798 static const struct XmmTbl1 {
804 {
B01011000, PS|SS|PD|SD,
"add",
false },
808 {
B01011110, PS|SS|PD|SD,
"div",
false },
809 {
B01011111, PS|SS|PD|SD,
"max",
false },
810 {
B01011101, PS|SS|PD|SD,
"min",
false },
811 {
B01011001, PS|SS|PD|SD,
"mul",
false },
816 {
B01010001, PS|SS|PD|SD,
"sqrt",
false },
817 {
B01011100, PS|SS|PD|SD,
"sub",
false },
824 const XmmTbl1 *
p = &xmmTbl1[i];
826 if (!(
p->mode & (1 <<
j)))
continue;
828 sprintf(
buf,
"%s%s",
p->name, sufTbl[
j].name);
840 static const char tbl[][16] = {
876 static const struct Tbl {
895 const Tbl *
p = &
tbl[i];
896 put(
p->name,
p->op1,
p->op2);
901 static const char tbl[][16] = {
908 const char *
p =
tbl[i];
915 const char tbl[][4] = {
949 sprintf(
buf,
"cmov%s",
tbl[i]);
953 sprintf(
buf,
"set%s",
tbl[i]);
961 static const char tbl[][16] = {
972 const std::string
s = removeUnderScore(
tbl[i]);
973 const char *
p =
s.c_str();
994 const char tbl[][8] = {
999 const char *
p =
tbl[i];
1007 static const char tbl[][16] = {
1014 const char *
p =
tbl[i];
1021 void putRorM()
const
1023 static const char tbl[][16] = {
1034 const std::string
s = removeUnderScore(
tbl[i]);
1035 const char *
p =
s.c_str();
1039 const char *
p =
"imul";
1047 void putPushPop()
const
1057 const char *
p =
"push";
1071 void putTest()
const
1073 const char *
p =
"test";
1080 void putMov64()
const
1087 {
"0x123",
"0x123" },
1088 {
"0x12345678",
"0x12345678" },
1089 {
"0x7fffffff",
"0x7fffffff" },
1090 {
"0xffffffff",
"0xffffffff" },
1091 {
"0x80000000",
"0x80000000" },
1092 {
"2147483648U",
"2147483648" },
1093 {
"0x80000001",
"0x80000001" },
1094 {
"0xffffffffffffffff",
"0xffffffffffffffff" },
1096 {
"0xffffffff80000000",
"0xffffffff80000000" },
1097 {
"0xffffffff80000001",
"0xffffffff80000001" },
1098 {
"0xffffffff12345678",
"0xffffffff12345678" },
1105 void putMovImm64()
const
1107 put(
"mov",
REG64,
"0x1234567890abcdefLL",
"0x1234567890abcdef");
1108 put(
"mov",
REG64,
"0x12345678",
"0x12345678");
1109 put(
"mov",
REG64,
"0xffffffff12345678LL",
"0xffffffff12345678");
1115 const char *
p =
"ret";
1125 put(
p,
REG64,
"0x1234567890abcdefLL",
"0x1234567890abcdef");
1129 put(
p,
RAX|
EAX|
AX|
AL,
"ptr [0x1234567890abcdefLL]",
"[qword 0x1234567890abcdef]");
1130 put(
p,
"ptr [0x1234567890abcdefLL]",
"[qword 0x1234567890abcdef]",
RAX|
EAX|
AX|
AL);
1131 put(
p,
"qword [rax], 0");
1132 put(
p,
"qword [rax], 0x12");
1133 put(
p,
"qword [rax], 0x1234");
1134 put(
p,
"qword [rax], 0x12345678");
1136 put(
p,
"qword [rax], 1000000");
1137 put(
p,
"rdx, qword [rax]");
1139 put(
"mov",
EAX,
"ptr [eax + ecx * 0]",
"[eax + ecx * 0]");
1142 const char tbl[][8] = {
1147 const char *
p =
tbl[i];
1161 const char tbl[][8] = {
1166 const char *
p =
tbl[i];
1182 void putShift()
const
1184 const char tbl[][8] = {
1196 const char *
p =
tbl[i];
1200 void putShxd()
const
1202 const char tbl[][8] = {
1207 const char *
p =
tbl[i];
1215 const char tbl[][8] = {
1223 const char *
p =
tbl[i];
1229 void putSSSE3()
const
1231 const char tbl[][16] = {
1249 const char *
p =
tbl[i];
1256 void putSSE4_1()
const
1258 const char tbl[][16] = {
1296 const char *
p =
tbl[i];
1300 void putSSE4_2()
const
1303 const char tbl[][16] = {
1322 const char *
p =
tbl[i];
1327 const char tbl[][16] = {
1334 const char *
p =
tbl[i];
1378 put(
"bndstx",
"ptr [eax]",
"[eax]",
BNDREG);
1379 put(
"bndstx",
"ptr [eax+5]",
"[eax+5]",
BNDREG);
1380 put(
"bndstx",
"ptr [eax+500]",
"[eax+500]",
BNDREG);
1381 put(
"bndstx",
"ptr [eax+ecx]",
"[eax+ecx]",
BNDREG);
1382 put(
"bndstx",
"ptr [ecx+eax]",
"[ecx+eax]",
BNDREG);
1383 put(
"bndstx",
"ptr [eax+esp]",
"[eax+esp]",
BNDREG);
1384 put(
"bndstx",
"ptr [esp+eax]",
"[esp+eax]",
BNDREG);
1385 put(
"bndstx",
"ptr [eax+ecx*2]",
"[eax+ecx*2]",
BNDREG);
1386 put(
"bndstx",
"ptr [ecx+ecx]",
"[ecx+ecx]",
BNDREG);
1387 put(
"bndstx",
"ptr [ecx*2]",
"[ecx*2]",
BNDREG);
1388 put(
"bndstx",
"ptr [eax+ecx*2+500]",
"[eax+ecx*2+500]",
BNDREG);
1390 put(
"bndstx",
"ptr [rax+rcx*2]",
"[rax+rcx*2]",
BNDREG);
1391 put(
"bndstx",
"ptr [r9*2]",
"[r9*2]",
BNDREG);
1392 put(
"bndstx",
"ptr [r9*2+r15]",
"[r9*2+r15]",
BNDREG);
1395 void putFpuMem16_32()
const
1397 const char tbl[][8] = {
1409 const char *
p =
tbl[i];
1413 void putFpuMem32_64()
const
1415 const char tbl[][8] = {
1429 const char *
p =
tbl[i];
1433 void putFpuMem16_32_64()
const
1435 const char tbl[][8] = {
1441 const char *
p =
tbl[i];
1445 void putFpuFpu()
const
1477 const Tbl *
p = &
tbl[i];
1485 const char tbl[][16] = {
1532 if (
tbl[i].only_pd_ps &&
j == 2)
break;
1533 std::string
name = std::string(
"v") +
tbl[i].name + suf[
j].suf;
1534 const char *
p =
name.c_str();
1537 if (!suf[
j].supportYMM)
continue;
1543 void putAVX_X_X_XM_omit()
1549 {
"vaesenc",
false },
1550 {
"vaesenclast",
false },
1551 {
"vaesdec",
false },
1552 {
"vaesdeclast",
false },
1553 {
"vcvtsd2ss",
false },
1554 {
"vcvtss2sd",
false },
1555 {
"vpacksswb",
true },
1556 {
"vpackssdw",
true },
1557 {
"vpackuswb",
true },
1558 {
"vpackusdw",
true },
1565 {
"vpaddsb",
true },
1566 {
"vpaddsw",
true },
1568 {
"vpaddusb",
true },
1569 {
"vpaddusw",
true },
1576 {
"vpcmpeqb",
true },
1577 {
"vpcmpeqw",
true },
1578 {
"vpcmpeqd",
true },
1579 {
"vpcmpeqq",
true },
1581 {
"vpcmpgtb",
true },
1582 {
"vpcmpgtw",
true },
1583 {
"vpcmpgtd",
true },
1584 {
"vpcmpgtq",
true },
1586 {
"vphaddw",
true },
1587 {
"vphaddd",
true },
1588 {
"vphaddsw",
true },
1590 {
"vphsubw",
true },
1591 {
"vphsubd",
true },
1592 {
"vphsubsw",
true },
1593 {
"vpmaddwd",
true },
1594 {
"vpmaddubsw",
true },
1596 {
"vpmaxsb",
true },
1597 {
"vpmaxsw",
true },
1598 {
"vpmaxsd",
true },
1600 {
"vpmaxub",
true },
1601 {
"vpmaxuw",
true },
1602 {
"vpmaxud",
true },
1604 {
"vpminsb",
true },
1605 {
"vpminsw",
true },
1606 {
"vpminsd",
true },
1608 {
"vpminub",
true },
1609 {
"vpminuw",
true },
1610 {
"vpminud",
true },
1612 {
"vpmulhuw",
true },
1613 {
"vpmulhrsw",
true },
1614 {
"vpmulhw",
true },
1615 {
"vpmullw",
true },
1616 {
"vpmulld",
true },
1618 {
"vpmuludq",
true },
1619 {
"vpmuldq",
true },
1622 {
"vpsadbw",
true },
1624 {
"vpsignb",
true },
1625 {
"vpsignw",
true },
1626 {
"vpsignd",
true },
1628 {
"vpsllw",
false },
1629 {
"vpslld",
false },
1630 {
"vpsllq",
false },
1632 {
"vpsraw",
false },
1633 {
"vpsrad",
false },
1634 {
"vpsrlw",
false },
1635 {
"vpsrld",
false },
1636 {
"vpsrlq",
false },
1643 {
"vpsubsb",
true },
1644 {
"vpsubsw",
true },
1646 {
"vpsubusb",
true },
1647 {
"vpsubusw",
true },
1649 {
"vpunpckhbw",
true },
1650 {
"vpunpckhwd",
true },
1651 {
"vpunpckhdq",
true },
1652 {
"vpunpckhqdq",
true },
1654 {
"vpunpcklbw",
true },
1655 {
"vpunpcklwd",
true },
1656 {
"vpunpckldq",
true },
1657 {
"vpunpcklqdq",
true },
1660 {
"vsqrtsd",
false },
1661 {
"vsqrtss",
false },
1663 {
"vunpckhpd",
true },
1664 {
"vunpckhps",
true },
1665 {
"vunpcklpd",
true },
1666 {
"vunpcklps",
true },
1669 const Tbl *
p = &
tbl[i];
1672 if (!
p->supportYMM)
continue;
1677 void putAVX_X_X_XM_IMM()
1683 {
"vblendpd",
true },
1684 {
"vblendps",
true },
1687 {
"vmpsadbw",
true },
1688 {
"vpblendw",
true },
1689 {
"vpblendd",
true },
1690 {
"vroundsd",
false },
1691 {
"vroundss",
false },
1692 {
"vpclmulqdq",
false },
1695 {
"vcmpsd",
false },
1696 {
"vcmpss",
false },
1697 {
"vinsertps",
false },
1698 {
"vpalignr",
true },
1699 {
"vshufpd",
true },
1700 {
"vshufps",
true },
1703 const Tbl *
p = &
tbl[i];
1706 if (!
p->supportYMM)
continue;
1711 void putAVX_X_XM_IMM()
1717 {
"vroundpd",
true },
1718 {
"vroundps",
true },
1719 {
"vpcmpestri",
false },
1720 {
"vpcmpestrm",
false },
1721 {
"vpcmpistri",
false },
1722 {
"vpcmpistrm",
false },
1723 {
"vpermilpd",
true },
1724 {
"vpermilps",
true },
1725 {
"vaeskeygenassist",
false },
1726 {
"vpshufd",
true },
1727 {
"vpshufhw",
true },
1728 {
"vpshuflw",
true },
1731 const Tbl *
p = &
tbl[i];
1733 if (!
p->supportYMM)
continue;
1737 void putAVX_X_X_XM()
1743 {
"vpermilpd",
true },
1744 {
"vpermilps",
true },
1745 {
"vpshufb",
true },
1747 {
"vpsllvd",
true },
1748 {
"vpsllvq",
true },
1749 {
"vpsravd",
true },
1750 {
"vpsrlvd",
true },
1751 {
"vpsrlvq",
true },
1754 const Tbl *
p = &
tbl[i];
1756 if (!
p->supportYMM)
continue;
1766 {
"vaesimc",
false },
1767 {
"vtestps",
true },
1768 {
"vtestpd",
true },
1769 {
"vcomisd",
false },
1770 {
"vcomiss",
false },
1771 {
"vcvtdq2ps",
true },
1772 {
"vcvtps2dq",
true },
1773 {
"vcvttps2dq",
true },
1774 {
"vmovapd",
true },
1775 {
"vmovaps",
true },
1776 {
"vmovddup",
true },
1777 {
"vmovdqa",
true },
1778 {
"vmovdqu",
true },
1779 {
"vmovupd",
true },
1780 {
"vmovups",
true },
1785 {
"vphminposuw",
false },
1787 {
"vpmovsxbw",
false },
1788 {
"vpmovsxbd",
false },
1789 {
"vpmovsxbq",
false },
1790 {
"vpmovsxwd",
false },
1791 {
"vpmovsxwq",
false },
1792 {
"vpmovsxdq",
false },
1794 {
"vpmovzxbw",
false },
1795 {
"vpmovzxbd",
false },
1796 {
"vpmovzxbq",
false },
1797 {
"vpmovzxwd",
false },
1798 {
"vpmovzxwq",
false },
1799 {
"vpmovzxdq",
false },
1803 {
"vrcpss",
false },
1805 {
"vrsqrtps",
true },
1806 {
"vrsqrtss",
false },
1808 {
"vsqrtpd",
true },
1809 {
"vsqrtps",
true },
1810 {
"vucomisd",
false },
1811 {
"vucomiss",
false },
1814 const Tbl *
p = &
tbl[i];
1816 if (!
p->supportYMM)
continue;
1822 const char *
tbl[] = {
1847 {
"vmovapd",
true },
1848 {
"vmovaps",
true },
1849 {
"vmovdqa",
true },
1850 {
"vmovdqu",
true },
1851 {
"vmovupd",
true },
1852 {
"vmovups",
true },
1855 const Tbl *
p = &
tbl[i];
1857 if (!
p->supportYMM)
continue;
1861 void putAVX_X_X_IMM_omit()
1867 {
"vpslldq",
false },
1868 {
"vpsrldq",
false },
1879 const Tbl&
p =
tbl[i];
1888 if (
p.support_Y_Y_X) {
1900 {
"vfmadd",
false },
1901 {
"vfmaddsub",
true },
1902 {
"vfmsubadd",
true },
1904 {
"vfmsub",
false },
1905 {
"vfnmadd",
true },
1906 {
"vfnmadd",
false },
1907 {
"vfnmsub",
true },
1908 {
"vfnmsub",
false },
1911 const Tbl&
p =
tbl[i];
1920 const char sufTbl[][2][8] = {
1924 for (
size_t k = 0; k < 2; k++) {
1925 const std::string suf = sufTbl[
p.supportYMM ? 0 : 1][k];
1926 std::string
name = std::string(
p.name) + ord[
j].name + suf;
1927 const char *q =
name.c_str();
1929 if (!
p.supportYMM)
continue;
1954 const char tbl[][16] = {
1974 const char tbl[][8] = {
"vmovsd",
"vmovss" };
1984 for (
int i = 0; i < 3; i++) {
1985 const char tbl[][8] = {
"vpinsrb",
"vpinsrw",
"vpinsrd" };
1997 {
"vblendvpd",
true },
1998 {
"vblendvps",
true },
1999 {
"vpblendvb",
true },
2002 const Tbl&
p =
tbl[i];
2005 if (!
p.supportYMM)
continue;
2082 const char pred[32][16] = {
2083 "eq",
"lt",
"le",
"unord",
"neq",
"nlt",
"nle",
"ord",
2084 "eq_uq",
"nge",
"ngt",
"false",
"neq_oq",
"ge",
"gt",
2085 "true",
"eq_os",
"lt_oq",
"le_oq",
"unord_s",
"neq_us",
"nlt_uq",
"nle_uq",
"ord_s",
2086 "eq_us",
"nge_uq",
"ngt_uq",
"false_os",
"neq_os",
"ge_oq",
"gt_oq",
"true_us"
2088 const char suf[][4] = {
"pd",
"ps",
"sd",
"ss" };
2089 for (
int i = 0; i < 4; i++) {
2090 for (
int j = 0;
j < 32;
j++) {
2092 put((std::string(
"cmp") + pred[
j] + suf[i]).c_str(),
XMM,
XMM |
MEM);
2094 std::string str = std::string(
"vcmp") + pred[
j] + suf[i];
2095 const char *
p = str.c_str();
2098 if (i >= 2)
continue;
2106 const char tbl[][2][64] = {
2107 {
"mov(byte [rip - 10], 3);dump();",
"mov byte [rip - 10], 3" },
2108 {
"mov(word [rip - 10], 3);dump();",
"mov word [rip - 10], 3" },
2109 {
"mov(dword[rip - 10], 3);dump();",
"mov dword [rip - 10], 3" },
2110 {
"mov(qword [rip - 10], 3);dump();",
"mov qword [rip - 10], 3" },
2111 {
"mov(ptr [rip - 10], al);dump();",
"mov byte [rip - 10], al" },
2112 {
"mov(ptr [rip - 10], ax);dump();",
"mov word [rip - 10], ax" },
2113 {
"mov(ptr [rip - 10], eax);dump();",
"mov dword [rip - 10], eax" },
2114 {
"mov(ptr [rip - 10], rax);dump();",
"mov qword [rip - 10], rax" },
2117 puts(
tbl[i][isXbyak_ ? 0 : 1]);
2125 if (!isXbyak_)
return;
2136 if (!isXbyak_)
return;
2140 " {\n", funcNum_++);
2144 if (!isXbyak_)
return;
2149 for (
int i = 0; i < funcNum_; i++) {
2158 const char *
tbl[] = {
2174 const char *
tbl[] = {
2191 const char *
tbl[] = {
2215 const int y_vx_y = 0;
2216 const int y_vy_y = 1;
2217 const int x_vy_x = 2;
2222 {
"vgatherdpd", y_vx_y },
2223 {
"vgatherqpd", y_vy_y },
2224 {
"vgatherdps", y_vy_y },
2225 {
"vgatherqps", x_vy_x },
2226 {
"vpgatherdd", y_vy_y },
2227 {
"vpgatherqd", x_vy_x },
2228 {
"vpgatherdq", y_vx_y },
2229 {
"vpgatherqq", y_vy_y },
2232 const Tbl&
p =
tbl[i];
2233 const char *
name =
p.name;
2246 printf(
"ERR mode=%d\n",
p.mode);
2253 std::string x =
"xmm1, ";
2254 std::string
a = std::string(
"[") + vsib +
"], xmm3";
2255 put(
"vgatherdpd", (x +
"ptr" +
a).c_str(), (x +
a).c_str());
2260 const char *xmmTbl[] = {
2267 std::string
s = xmmTbl[i];
2275 int ord[] = { 0, 1, 2 };
2278 for (
int j = 0;
j < 3;
j++) {
2279 if (
j > 0)
s +=
'+';
2281 case 0:
s += xmmTbl[i];
break;
2282 case 1:
s +=
"123";
break;
2283 case 2:
s +=
"ebp";
break;
2287 }
while (std::next_permutation(ord, ord + 3));
2293 const char *segTbl[] = {
2302 const char *seg = segTbl[i];
2303 const char *op1Tbl[] = {
2306 (isXbyak_ ?
"ptr [eax]" :
"[eax]"),
2312 const char *op1 = op1Tbl[
j];
2314 printf(
"mov(%s, %s); dump();\n", op1, seg);
2315 printf(
"mov(%s, %s); dump();\n", seg, op1);
2317 printf(
"mov %s, %s\n", op1, seg);
2318 printf(
"mov %s, %s\n", seg, op1);
2324 const char *segTbl[] = {
2334 const char *seg = segTbl[i];
2336 printf(
"push(%s); dump();\n", seg);
2337 printf(
"pop(%s); dump();\n", seg);
2339 printf(
"push %s\n", seg);
2340 printf(
"pop %s\n", seg);
2367 putAVX_X_X_XM_omit();
2369 putAVX_X_X_XM_IMM();
2378 putAVX_X_X_IMM_omit();
2429 putFpuMem16_32_64();
2456 const char *
tbl[] = {
2471 put(
"kunpckbw",
K,
K,
K);
2472 put(
"kunpckwd",
K,
K,
K);
2473 put(
"kunpckdq",
K,
K,
K);
2476 const char *
tbl[] = {
2490 const char *
tbl[] = {
2516 void put_vaddpd(
const char *r1,
const char *r2,
const char *r3,
int kIdx = 0,
bool z =
false,
int sae = 0)
2518 std::string modifier;
2520 const char *pz =
"";
2521 const char *saeTblXbyak[] = {
"",
"|T_rn_sae",
"|T_rd_sae",
"|T_ru_sae",
"|T_rz_sae" };
2522 const char *saeTblNASM[] = {
"",
",{rn-sae}",
",{rd-sae}",
",{ru-sae}",
",{rz-sae}" };
2526 printf(
"vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
2530 printf(
"vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
2535 const char *xTbl[] = {
2543 const char *yTbl[] = {
2551 const char *zTbl[] = {
2560 for (
size_t i = 0; i <
N; i++) {
2561 for (
size_t j = 0;
j <
N;
j++) {
2563 for (
size_t k = 0; k <
N; k++) {
2565 for (
int kIdx = 0; kIdx < 8; kIdx++) {
2566 for (
int z = 0; z < 2; z++) {
2567 put_vaddpd(xTbl[i], xTbl[
j], xTbl[k], kIdx, z == 1);
2568 put_vaddpd(yTbl[i], yTbl[
j], yTbl[k], kIdx, z == 1);
2569 for (
int sae = 0; sae < 5; sae++) {
2570 put_vaddpd(zTbl[i], zTbl[
j], zTbl[k], kIdx, z == 1, sae);
2577 for (
int sae = 0; sae < 5; sae++) {
2597 {
"vcmpsd",
false },
2598 {
"vcmpss",
false },
2601 const Tbl *
p = &
tbl[i];
2603 if (!
p->supportYMM)
continue;
2620 const Tbl *
p = &
tbl[i];
2631 const char *
a =
"rax";
2633 const char *
a =
"eax";
2636 printf(
"vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx,
a, disp);
2637 printf(
"vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx,
a, disp);
2638 printf(
"vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx,
a, disp);
2640 printf(
"vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx,
a, disp);
2641 printf(
"vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx,
a, disp);
2642 printf(
"vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx,
a, disp);
2647 for (
int i = 0; i < 9; i++) {
2661 const char *
tbl[] = {
2679 const char *
tbl[] = {
2698 put(
"vmovhlps", _XMM3, _XMM3, _XMM3);
2699 put(
"vmovlhps", _XMM3, _XMM3, _XMM3);
2717 const char *
tbl[] = {
2729 const char tbl[][16] = {
2748 {
"vmovddup",
false },
2749 {
"vmovdqa32",
true },
2750 {
"vmovdqa64",
true },
2751 {
"vmovdqu8",
true },
2752 {
"vmovdqu16",
true },
2753 {
"vmovdqu32",
true },
2754 {
"vmovdqu64",
true },
2755 {
"vpabsb",
false },
2756 {
"vpabsw",
false },
2757 {
"vpabsd",
false },
2758 {
"vpabsq",
false },
2761 const Tbl&
p =
tbl[i];
2765 if (!
p.M_X)
continue;
2803 const Tbl&
p =
tbl[i];
2908 {
"vpslldq", _XMM3, _XMM3 |
_MEM,
IMM8 },
2912 {
"vpsrldq", _XMM3, _XMM3 |
_MEM,
IMM8 },
2926 {
"vpsllw", _XMM3, _XMM3 |
_MEM,
IMM8 },
2996 {
"vpmulhrsw", _XMM3,
_XMM,
_XMM },
3017 {
"vpunpckhbw", _XMM3,
_XMM,
_XMM },
3020 {
"vpunpckhwd", _XMM3,
_XMM,
_XMM },
3029 {
"vpunpcklbw", _XMM3,
_XMM,
_XMM },
3032 {
"vpunpcklwd", _XMM3,
_XMM,
_XMM },
3086 const Tbl&
p =
tbl[i];
3087 put(
p.name,
p.x1,
p.x2,
p.xm);
3100 {
"vinsertps",
_XMM,
_XMM, _XMM3 },
3129 const Tbl&
p =
tbl[i];
3150 {
"vfmadd",
false },
3151 {
"vfmaddsub",
true },
3152 {
"vfmsubadd",
true },
3154 {
"vfmsub",
false },
3155 {
"vfnmadd",
true },
3156 {
"vfnmadd",
false },
3157 {
"vfnmsub",
true },
3158 {
"vfnmsub",
false },
3161 const Tbl&
p =
tbl[i];
3170 const char sufTbl[][2][8] = {
3174 for (
size_t k = 0; k < 2; k++) {
3175 const std::string suf = sufTbl[
p.supportYMM ? 0 : 1][k];
3179 }
else if (suf ==
"ps") {
3184 std::string
name = std::string(
p.name) + ord[
j].name + suf;
3185 const char *q =
name.c_str();
3187 if (!
p.supportYMM)
continue;
3190 }
else if (suf ==
"ps") {
3202 const char *
tbl[] = {
3251 if (
tbl[i].only_pd_ps &&
j == 2)
break;
3252 std::string suf = sufTbl[
j].suf;
3253 std::string
name =
tbl[i].name + suf;
3254 const char *
p =
name.c_str();
3258 }
else if (suf ==
"ps") {
3262 if (!sufTbl[
j].supportYMM)
continue;
3266 }
else if (suf ==
"ps") {
3335 Test test(argc > 1);
#define CYBOZU_SNPRINTF(x, len,...)
void putBroadcastSub(int idx, int disp)
void putGath(const std::string &vsib)
void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx=0, bool z=false, int sae=0)
const uint64 MEM_ONLY_DISP
const uint64 MEM_ONLY_DISP
LOGGING_API void printf(Category category, const char *format,...)
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
unsigned __int64 uint64_t
int type definition and macros Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.