Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
Test Class Reference
Inheritance diagram for Test:
Collaboration diagram for Test:

Public Types

enum  { xx_yy_zz , xx_yx_zy , xx_xy_yz }
 

Public Member Functions

 Test (bool isXbyak)
 
void separateFunc ()
 
 ~Test ()
 
void put ()
 
void putOpmask ()
 
void put_vaddpd (const char *r1, const char *r2, const char *r3, int kIdx=0, bool z=false, int sae=0)
 
void putCombi ()
 
void putCmpK ()
 
void putBroadcastSub (int idx, int disp)
 
void putBroadcast ()
 
void putMisc1 ()
 
void putAVX512_M_X ()
 
void put_vmov ()
 
void put512_X_XM ()
 
void put512_X_X_XM ()
 
void put512_X3 ()
 
void put512_X3_I ()
 
void put512_FMA ()
 
void put512_Y_XM ()
 
void put512_AVX1 ()
 
void putAVX1 ()
 
void put512_cvt ()
 
void putGather ()
 
void putScatter ()
 
void putBlend ()
 
void putVpcmp ()
 
void putVtest ()
 
void putCompExp ()
 
void putPerm ()
 
void putShuff ()
 
void putMov ()
 
void putRot ()
 
void putMisc2 ()
 
void classSubMem (const char *nm, char x, bool broadcast, int size)
 
void putClassSub (const char *name, int size)
 
void putClass ()
 
void putMin ()
 
void putDisp8N ()
 
void putAVX512 ()
 
 Test (bool isXbyak)
 
void separateFunc ()
 
 ~Test ()
 
void putGprR_R_RM ()
 
void putGprR_RM_R ()
 
void putGprR_RM ()
 
void putGprOtherwise ()
 
void putGather ()
 
void putGath (const std::string &vsib)
 
void putGatherAll ()
 
void putSeg ()
 
void put ()
 
 Test (int n)
 
void threadEntry ()
 
void stopThread ()
 

Detailed Description

Definition at line 116 of file make_512.cpp.

Member Enumeration Documentation

◆ anonymous enum

anonymous enum
Enumerator
xx_yy_zz 
xx_yx_zy 
xx_xy_yz 

Definition at line 1550 of file make_512.cpp.

1550 {
1551 xx_yy_zz,
1552 xx_yx_zy,
1553 xx_xy_yz
1554 };
@ xx_yx_zy
@ xx_yy_zz
@ xx_xy_yz

Constructor & Destructor Documentation

◆ Test() [1/3]

Test::Test ( bool isXbyak)
inline

Definition at line 422 of file make_512.cpp.

423 : isXbyak_(isXbyak)
424 , funcNum_(1)
425 {
426 if (!isXbyak_) return;
427 printf("%s",
428 " void gen0()\n"
429 " {\n");
430 }
LOGGING_API void printf(Category category, const char *format,...)
Definition Logging.cpp:30

◆ ~Test() [1/2]

Test::~Test ( )
inline

Definition at line 443 of file make_512.cpp.

444 {
445 if (!isXbyak_) return;
446 printf("%s",
447 " }\n"
448 " void gen()\n"
449 " {\n");
450 for (int i = 0; i < funcNum_; i++) {
451 printf(
452 " gen%d();\n", i);
453 }
454 printf(
455 " }\n");
456 }

◆ Test() [2/3]

Test::Test ( bool isXbyak)
inline

Definition at line 2121 of file make_nm.cpp.

2122 : isXbyak_(isXbyak)
2123 , funcNum_(1)
2124 {
2125 if (!isXbyak_) return;
2126 printf("%s",
2127 " void gen0()\n"
2128 " {\n");
2129 }

◆ ~Test() [2/2]

Test::~Test ( )
inline

Definition at line 2142 of file make_nm.cpp.

2143 {
2144 if (!isXbyak_) return;
2145 printf("%s",
2146 " }\n"
2147 " void gen()\n"
2148 " {\n");
2149 for (int i = 0; i < funcNum_; i++) {
2150 printf(
2151 " gen%d();\n", i);
2152 }
2153 printf(
2154 " }\n");
2155 }

◆ Test() [3/3]

Test::Test ( int n)
inline

Definition at line 36 of file test_mmx.cpp.

37 : n_(n)
38 {
39 }

Member Function Documentation

◆ classSubMem()

void Test::classSubMem ( const char * nm,
char x,
bool broadcast,
int size )
inline

Definition at line 2053 of file make_512.cpp.

2054 {
2055 printf("%s ", nm);
2056 if (isXbyak_) {
2057 printf("(k5|k3, %cword%s [rax+64], 5);dump();\n", x, broadcast ? "_b" : "");
2058 } else {
2059 if (broadcast) {
2060 int d = x == 'x' ? 128 / size : x == 'y' ? 256 / size : 512 / size;
2061 printf("k5{k3}, [rax+64]{1to%d}, 5\n", d);
2062 } else {
2063 if (x == 'x') x = 'o'; // nasm
2064 printf("k5{k3}, %cword [rax+64], 5\n", x);
2065 }
2066 }
2067 }
CK_ULONG d
Here is the caller graph for this function:

◆ put() [1/2]

void Test::put ( )
inline

Definition at line 457 of file make_512.cpp.

458 {
459 putAVX512();
460 }
void putAVX512()
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put() [2/2]

void Test::put ( )
inline

Definition at line 2345 of file make_nm.cpp.

2346 {
2347#ifdef USE_AVX512
2348 putAVX512();
2349#else
2350
2351#ifdef USE_AVX
2352
2353 separateFunc();
2354 putFMA2();
2355
2356#ifdef USE_YASM
2357 putGprR_R_RM();
2358 putGprR_RM_R();
2359 putGprR_RM();
2361 putGather();
2362 putGatherAll();
2363#else
2364 putAVX1();
2365 separateFunc();
2366 putAVX2();
2367 putAVX_X_X_XM_omit();
2368 separateFunc();
2369 putAVX_X_X_XM_IMM();
2370 separateFunc();
2371 putAVX_X_XM_IMM();
2372 separateFunc();
2373 putAVX_X_X_XM();
2374 separateFunc();
2375 putAVX_X_XM();
2376 separateFunc();
2377 putAVX_M_X();
2378 putAVX_X_X_IMM_omit();
2379 separateFunc();
2380 putAVX_Y_XM();
2381 separateFunc();
2382 putFMA();
2383 putSHA();
2384#endif
2385
2386#else // USE_AVX
2387
2388 putJmp();
2389
2390#ifdef USE_YASM
2391
2392 putSSSE3();
2393 putSSE4_1();
2394 separateFunc();
2395 putSSE4_2();
2396 putSeg(); // same behavior as yasm for mov rax, cx
2397#else
2398 putSIMPLE();
2399 putReg1();
2400 putBt();
2401 putRorM();
2402 separateFunc();
2403 putPushPop();
2404 putTest();
2405 separateFunc();
2406 putEtc();
2407 putShift();
2408 putShxd();
2409
2410 separateFunc();
2411
2412 putBs();
2413 putMMX1();
2414 putMMX2();
2415 separateFunc();
2416 putMMX3();
2417 putMMX4();
2418 putMMX5();
2419 separateFunc();
2420 putXMM1();
2421 putXMM2();
2422 putXMM3();
2423 putXMM4();
2424 separateFunc();
2425 putCmov();
2426 putFpuMem16_32();
2427 putFpuMem32_64();
2428 separateFunc();
2429 putFpuMem16_32_64();
2430 put("clflush", MEM); // current nasm is ok
2431 putFpu();
2432 putFpuFpu();
2433 putCmp();
2434 putMPX();
2435#endif
2436
2437#ifdef XBYAK64
2438
2439#ifdef USE_YASM
2440 putRip();
2441#else
2442 putMov64();
2443 putMovImm64();
2444#endif
2445
2446#endif // XBYAK64
2447
2448#endif // USE_AVX
2449
2450#endif // USE_AVX512
2451 }
void putGprR_RM()
Definition make_nm.cpp:2189
void putGprR_RM_R()
Definition make_nm.cpp:2172
void putGather()
void putSeg()
Definition make_nm.cpp:2290
void separateFunc()
Definition make_512.cpp:435
void putGprOtherwise()
Definition make_nm.cpp:2204
void putAVX1()
void put()
Definition make_512.cpp:457
void putGatherAll()
Definition make_nm.cpp:2258
void putGprR_R_RM()
Definition make_nm.cpp:2156
const uint64 MEM
Definition make_512.cpp:70
Here is the call graph for this function:

◆ put512_AVX1()

void Test::put512_AVX1 ( )
inline

Definition at line 1309 of file make_512.cpp.

1310 {
1311#ifdef XBYAK64
1312 const struct Tbl {
1313 std::string name;
1314 bool only_pd_ps;
1315 } tbl[] = {
1316 { "vadd", false },
1317 { "vsub", false },
1318 { "vmul", false },
1319 { "vdiv", false },
1320 { "vmax", false },
1321 { "vmin", false },
1322 { "vand", true },
1323 { "vandn", true },
1324 { "vor", true },
1325 { "vxor", true },
1326 };
1327 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1328 const struct Suf {
1329 const char *suf;
1330 bool supportYMM;
1331 } sufTbl[] = {
1332 { "pd", true },
1333 { "ps", true },
1334 { "sd", false },
1335 { "ss", false },
1336 };
1337 for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) {
1338 if (tbl[i].only_pd_ps && j == 2) break;
1339 std::string suf = sufTbl[j].suf;
1340 std::string name = tbl[i].name + suf;
1341 const char *p = name.c_str();
1342 uint64_t mem = 0;
1343 if (suf == "pd") {
1344 mem = M_1to2;
1345 } else if (suf == "ps") {
1346 mem = M_1to4;
1347 }
1348 put(p, _XMM3 | XMM_KZ, _XMM, mem | _MEM);
1349 if (!sufTbl[j].supportYMM) continue;
1350 mem = 0;
1351 if (suf == "pd") {
1352 mem = M_1to8;
1353 } else if (suf == "ps") {
1354 mem = M_1to16;
1355 }
1356 put(p, _ZMM, _ZMM, mem | _MEM);
1357 }
1358 }
1359#endif
1360 }
#define NUM_OF_ARRAY(x)
Definition bench.cpp:12
const mie::Vuint & p
Definition bn.cpp:27
std::string name
const struct Ptn tbl[]
const uint64 XMM_KZ
Definition make_512.cpp:100
const uint64 M_1to8
Definition make_512.cpp:107
const uint64 _MEM
Definition make_512.cpp:14
const uint64 _ZMM
Definition make_512.cpp:79
const uint64 M_1to2
Definition make_512.cpp:105
const uint64 M_1to16
Definition make_512.cpp:108
const uint64 _XMM
Definition make_512.cpp:13
const uint64 M_1to4
Definition make_512.cpp:106
unsigned __int64 uint64_t
Definition stdint.h:136
uint16_t j
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_cvt()

void Test::put512_cvt ( )
inline

Definition at line 1401 of file make_512.cpp.

1402 {
1403#ifdef XBYAK64
1404 put("vcvtdq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
1405 put("vcvtdq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
1406 put("vcvtdq2pd", ZMM_KZ, _YMM | _MEM | M_1to8);
1407
1408 put("vcvtdq2ps", XMM_KZ, _XMM | _MEM | M_1to4);
1409 put("vcvtdq2ps", YMM_KZ, _YMM | _MEM | M_1to8);
1410 put("vcvtdq2ps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_ER);
1411
1412 put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
1413 put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
1414 put("vcvtpd2dq", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
1415
1416 put("vcvtpd2ps", XMM_KZ, _XMM | M_xword | M_1to2);
1417 put("vcvtpd2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
1418 put("vcvtpd2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
1419
1420 put("vcvtpd2qq", XMM_KZ, _XMM | _MEM | M_1to2);
1421 put("vcvtpd2qq", YMM_KZ, _YMM | _MEM | M_1to4);
1422 put("vcvtpd2qq", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_ER);
1423
1424 put("vcvtpd2udq", XMM_KZ, _XMM | M_xword | M_1to2);
1425 put("vcvtpd2udq", XMM_KZ, _YMM | M_yword | MY_1to4);
1426 put("vcvtpd2udq", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
1427
1428 put("vcvtpd2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
1429 put("vcvtpd2uqq", YMM_KZ, _YMM | _MEM | M_1to4);
1430 put("vcvtpd2uqq", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_ER);
1431
1432 put("vcvtph2ps", XMM_KZ, _XMM | _MEM);
1433 put("vcvtph2ps", YMM_KZ, _XMM | _MEM);
1434 put("vcvtph2ps", ZMM_KZ, _YMM | _MEM | YMM_SAE);
1435
1436 put("vcvtps2ph", XMM_KZ | _MEM, _XMM, IMM8);
1437 put("vcvtps2ph", XMM_KZ | _MEM, _YMM, IMM8);
1438 put("vcvtps2ph", YMM_KZ | _MEM, _ZMM, IMM8);
1439 put("vcvtps2ph", YMM_KZ, ZMM_SAE, IMM8);
1440
1441 put("vcvtps2dq", XMM_KZ, _XMM | _MEM | M_1to4);
1442 put("vcvtps2dq", YMM_KZ, _YMM | _MEM | M_1to8);
1443 put("vcvtps2dq", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_ER);
1444
1445 put("vcvtps2udq", XMM_KZ, _XMM | M_1to4);
1446 put("vcvtps2udq", YMM_KZ, _YMM | M_1to8);
1447 put("vcvtps2udq", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_ER);
1448
1449 put("vcvtps2qq", XMM_KZ, _XMM | _MEM | M_1to2);
1450 put("vcvtps2qq", YMM_KZ, _XMM | _MEM | M_1to4);
1451 put("vcvtps2qq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_ER);
1452
1453 put("vcvtps2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
1454 put("vcvtps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
1455 put("vcvtps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_ER);
1456
1457 put("vcvtps2pd", XMM_KZ, _XMM | _MEM | M_1to2);
1458 put("vcvtps2pd", YMM_KZ, _XMM | _MEM | M_1to4);
1459 put("vcvtps2pd", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
1460
1461 put("vcvtqq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
1462 put("vcvtqq2pd", YMM_KZ, _YMM | _MEM | M_1to4);
1463 put("vcvtqq2pd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_ER);
1464
1465 put("vcvtqq2ps", XMM_KZ, _XMM | M_xword | M_1to2);
1466 put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
1467 put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
1468
1469 put("vcvtsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
1470
1471 put("vcvtsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
1472
1473 put("vcvtsd2ss", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_ER);
1474
1475 put("vcvtsi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
1476 put("vcvtsi2sd", XMM, XMM_ER, REG64);
1477
1478 put("vcvtsi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
1479 put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64);
1480
1481 put("vcvtss2sd", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_SAE);
1482
1483 put("vcvtss2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
1484
1485 put("vcvtss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
1486
1487 put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
1488 put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
1489 put("vcvtpd2dq", YMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_ER);
1490
1491 put("vcvttpd2qq", XMM_KZ, _XMM | _MEM | M_1to2);
1492 put("vcvttpd2qq", YMM_KZ, _YMM | _MEM | M_1to4);
1493 put("vcvttpd2qq", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE);
1494
1495 put("vcvttpd2udq", XMM_KZ, _XMM | M_xword | M_1to2);
1496 put("vcvttpd2udq", XMM_KZ, _YMM | M_yword | MY_1to4);
1497 put("vcvttpd2udq", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_SAE);
1498
1499 put("vcvttpd2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
1500 put("vcvttpd2uqq", YMM_KZ, _YMM | _MEM | M_1to4);
1501 put("vcvttpd2uqq", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE);
1502
1503 put("vcvttps2dq", XMM_KZ, _XMM | _MEM | M_1to4);
1504 put("vcvttps2dq", YMM_KZ, _YMM | _MEM | M_1to8);
1505 put("vcvttps2dq", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE);
1506
1507 put("vcvttps2udq", XMM_KZ, _XMM | M_1to4);
1508 put("vcvttps2udq", YMM_KZ, _YMM | M_1to8);
1509 put("vcvttps2udq", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE);
1510
1511 put("vcvttps2qq", XMM_KZ, _XMM | _MEM | M_1to2);
1512 put("vcvttps2qq", YMM_KZ, _XMM | _MEM | M_1to4);
1513 put("vcvttps2qq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
1514
1515 put("vcvttps2uqq", XMM_KZ, _XMM | _MEM | M_1to2);
1516 put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
1517 put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
1518
1519 put("vcvttsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
1520
1521 put("vcvttsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
1522
1523 put("vcvttss2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
1524
1525 put("vcvttss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
1526
1527 put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
1528 put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
1529 put("vcvtudq2pd", ZMM_KZ, _YMM | _MEM | M_1to8);
1530
1531 put("vcvtudq2ps", XMM_KZ, _XMM | _MEM | M_1to4);
1532 put("vcvtudq2ps", YMM_KZ, _YMM | _MEM | M_1to8);
1533 put("vcvtudq2ps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_ER);
1534
1535 put("vcvtuqq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
1536 put("vcvtuqq2pd", YMM_KZ, _YMM | _MEM | M_1to4);
1537 put("vcvtuqq2pd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_ER);
1538
1539 put("vcvtuqq2ps", XMM_KZ, _XMM | M_xword | M_1to2);
1540 put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
1541 put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
1542
1543 put("vcvtusi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
1544 put("vcvtusi2sd", XMM, XMM_ER, REG64);
1545
1546 put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
1547 put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
1548#endif
1549 }
const uint64 REG32
Definition make_512.cpp:66
const uint64 XMM_SAE
Definition make_512.cpp:94
const uint64 XMM
Definition make_512.cpp:76
const uint64 M_xword
Definition make_512.cpp:110
const uint64 ZMM
Definition make_512.cpp:85
const uint64 IMM8
Definition make_512.cpp:18
const uint64 M_yword
Definition make_512.cpp:111
const uint64 YMM_SAE
Definition make_512.cpp:12
const uint64 MEM64
Definition make_512.cpp:71
const uint64 REG64
Definition make_512.cpp:65
const uint64 ZMM_KZ
Definition make_512.cpp:102
const uint64 ZMM_ER
Definition make_512.cpp:90
const uint64 MY_1to4
Definition make_512.cpp:112
const uint64 YMM_ER
Definition make_512.cpp:72
const uint64 _YMM
Definition make_512.cpp:34
const uint64 MEM32
Definition make_512.cpp:29
const uint64 YMM_KZ
Definition make_512.cpp:101
const uint64 XMM_ER
Definition make_512.cpp:109
const uint64 ZMM_SAE
Definition make_512.cpp:89
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_FMA()

void Test::put512_FMA ( )
inline

Definition at line 1221 of file make_512.cpp.

1222 {
1223 const struct Tbl {
1224 const char *name;
1225 bool supportYMM;
1226 } tbl[] = {
1227 { "vfmadd", true },
1228 { "vfmadd", false },
1229 { "vfmaddsub", true },
1230 { "vfmsubadd", true },
1231 { "vfmsub", true },
1232 { "vfmsub", false },
1233 { "vfnmadd", true },
1234 { "vfnmadd", false },
1235 { "vfnmsub", true },
1236 { "vfnmsub", false },
1237 };
1238 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1239 const Tbl& p = tbl[i];
1240 const struct Ord {
1241 const char *name;
1242 } ord[] = {
1243 { "132" },
1244 { "213" },
1245 { "231" },
1246 };
1247 for (size_t j = 0; j < NUM_OF_ARRAY(ord); j++) {
1248 const char sufTbl[][2][8] = {
1249 { "pd", "ps" },
1250 { "sd", "ss" },
1251 };
1252 for (size_t k = 0; k < 2; k++) {
1253 const std::string suf = sufTbl[p.supportYMM ? 0 : 1][k];
1254 uint64_t mem = 0;
1255 if (suf == "pd") {
1256 mem = M_1to2;
1257 } else if (suf == "ps") {
1258 mem = M_1to4;
1259 } else {
1260 mem = XMM_ER;
1261 }
1262 std::string name = std::string(p.name) + ord[j].name + suf;
1263 const char *q = name.c_str();
1264 put(q, XMM_KZ, _XMM, mem | _MEM);
1265 if (!p.supportYMM) continue;
1266 if (suf == "pd") {
1267 mem = M_1to8;
1268 } else if (suf == "ps") {
1269 mem = M_1to16;
1270 } else {
1271 mem = XMM_ER;
1272 }
1273 put(q, _ZMM, _ZMM, mem | _MEM);
1274 }
1275 }
1276 }
1277 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_X3()

void Test::put512_X3 ( )
inline

Definition at line 885 of file make_512.cpp.

886 {
887#ifdef XBYAK64
888 const struct Tbl {
889 const char *name;
890 uint64_t x1;
891 uint64_t x2;
892 uint64_t xm;
893 } tbl[] = {
894 { "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
895 { "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
896 { "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
897
898 { "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
899 { "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
900 { "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
901
902 { "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
903 { "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
904 { "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
905
906 { "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
907 { "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
908 { "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
909
910 { "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
911 { "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
912 { "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
913 { "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
914
915 { "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
916 { "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
917
918 { "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
919 { "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
920
921 { "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
922 { "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
923
924 { "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
925 { "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
926
927 { "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
928 { "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
929 { "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
930 { "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
931
932 { "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
933 { "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
934
935 { "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
936 { "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
937
938 { "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
939 { "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
940
941 { "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
942 { "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
943
944 { "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
945 { "vpandq", ZMM_KZ, _ZMM, _ZMM | M_1to8 | _MEM },
946
947 { "vpandnd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
948 { "vpandnq", ZMM_KZ, _ZMM, _ZMM | M_1to8 | _MEM },
949
950 { "vpavgb", ZMM_KZ, _ZMM, _ZMM },
951 { "vpavgw", ZMM_KZ, _ZMM, _ZMM },
952
953 { "vpcmpeqb", K2, _ZMM, _ZMM | _MEM },
954 { "vpcmpeqw", K2, _ZMM, _ZMM | _MEM },
955 { "vpcmpeqd", K2, _ZMM, _ZMM | M_1to16 | _MEM },
956 { "vpcmpeqq", K2, _ZMM, _ZMM | M_1to8 | _MEM },
957
958 { "vpcmpgtb", K2, _ZMM, _ZMM | _MEM },
959 { "vpcmpgtw", K2, _ZMM, _ZMM | _MEM },
960 { "vpcmpgtd", K2, _ZMM, _ZMM | M_1to16 | _MEM },
961 { "vpcmpgtq", K2, _ZMM, _ZMM | M_1to8 | _MEM },
962
963 { "vpmaddubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
964 { "vpmaddwd", ZMM_KZ, _ZMM, _ZMM | _MEM },
965
966 { "vpmaxsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
967 { "vpmaxsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
968 { "vpmaxsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
969 { "vpmaxsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
970
971 { "vpmaxub", ZMM_KZ, _ZMM, _ZMM | _MEM },
972 { "vpmaxuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
973 { "vpmaxud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
974 { "vpmaxuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
975
976 { "vpminsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
977 { "vpminsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
978 { "vpminsd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
979 { "vpminsq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
980
981 { "vpminub", ZMM_KZ, _ZMM, _ZMM | _MEM },
982 { "vpminuw", ZMM_KZ, _ZMM, _ZMM | _MEM },
983 { "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
984 { "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
985
986 { "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
987 { "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
988 { "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
989
990 { "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
991 { "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
992 { "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
993
994 { "vpsraw", XMM_KZ, _XMM | _MEM, IMM8 },
995 { "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 },
996
997 { "vpsrad", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
998 { "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
999
1000 { "vpsraq", XMM, XMM, IMM8 },
1001 { "vpsraq", XMM_KZ, _XMM | M_1to2 | _MEM, IMM8 },
1002 { "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
1003
1004 { "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
1005 { "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
1006 { "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
1007
1008 { "vpsrlw", XMM_KZ, _XMM | _MEM, IMM8 },
1009 { "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 },
1010
1011 { "vpsrld", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
1012 { "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
1013
1014 { "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
1015 { "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
1016
1017 { "vpsravw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1018 { "vpsravw", _ZMM, _ZMM, _MEM },
1019
1020 { "vpsravd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1021 { "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM },
1022
1023 { "vpsravq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1024 { "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM },
1025
1026 { "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1027 { "vpsllvw", _ZMM, _ZMM, _MEM },
1028
1029 { "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1030 { "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM },
1031
1032 { "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1033 { "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM },
1034
1035 { "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1036 { "vpsrlvw", _ZMM, _ZMM, _MEM },
1037
1038 { "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1039 { "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM },
1040
1041 { "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
1042 { "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM },
1043
1044 { "vpshufb", _XMM | XMM_KZ, _XMM, _XMM | _MEM },
1045 { "vpshufb", ZMM_KZ, _ZMM, _MEM },
1046
1047 { "vpshufhw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
1048 { "vpshufhw", ZMM_KZ, _MEM, IMM8 },
1049
1050 { "vpshuflw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
1051 { "vpshuflw", ZMM_KZ, _MEM, IMM8 },
1052
1053 { "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
1054 { "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
1055
1056 { "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
1057 { "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
1058
1059 { "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
1060 { "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
1061
1062 { "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
1063 { "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
1064
1065 { "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
1066 { "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
1067
1068 { "vpsadbw", _XMM3, _XMM, _XMM | _MEM },
1069 { "vpsadbw", _ZMM, _ZMM, _MEM },
1070
1071 { "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 | _MEM },
1072 { "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1073
1074 { "vpmulhrsw", _XMM3, _XMM, _XMM | _MEM },
1075 { "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
1076
1077 { "vpmulhuw", _XMM3, _XMM, _XMM | _MEM },
1078 { "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
1079
1080 { "vpmulhw", _XMM3, _XMM, _XMM | _MEM },
1081 { "vpmulhw", ZMM_KZ, _ZMM, _MEM },
1082
1083 { "vpmullw", _XMM3, _XMM, _XMM | _MEM },
1084 { "vpmullw", ZMM_KZ, _ZMM, _MEM },
1085
1086 { "vpmulld", _XMM3, _XMM, M_1to4 | _MEM },
1087 { "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM },
1088
1089 { "vpmullq", _XMM3, _XMM, M_1to2 | _MEM },
1090 { "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1091
1092 { "vpmuludq", _XMM3, _XMM, M_1to2 | _MEM },
1093 { "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1094
1095 { "vpunpckhbw", _XMM3, _XMM, _XMM | _MEM },
1096 { "vpunpckhbw", _ZMM, _ZMM, _MEM },
1097
1098 { "vpunpckhwd", _XMM3, _XMM, _XMM | _MEM },
1099 { "vpunpckhwd", _ZMM, _ZMM, _MEM },
1100
1101 { "vpunpckhdq", _XMM3, _XMM, M_1to4 | _MEM },
1102 { "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM },
1103
1104 { "vpunpckhqdq", _XMM3, _XMM, M_1to2 | _MEM },
1105 { "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM },
1106
1107 { "vpunpcklbw", _XMM3, _XMM, _XMM | _MEM },
1108 { "vpunpcklbw", _ZMM, _ZMM, _MEM },
1109
1110 { "vpunpcklwd", _XMM3, _XMM, _XMM | _MEM },
1111 { "vpunpcklwd", _ZMM, _ZMM, _MEM },
1112
1113 { "vpunpckldq", _XMM3, _XMM, M_1to4 | _MEM },
1114 { "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM },
1115
1116 { "vpunpcklqdq", _XMM3, _XMM, M_1to2 | _MEM },
1117 { "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM },
1118
1119 { "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
1120 { "vextractf64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
1121 { "vextractf32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
1122 { "vextractf64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
1123
1124 { "vextracti32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
1125 { "vextracti64x2", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
1126 { "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
1127 { "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
1128
1129 { "vextractps", REG32 | _MEM, _XMM3, IMM8 },
1130
1131 { "vpermb", XMM_KZ, _XMM, _XMM | _MEM },
1132 { "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
1133
1134 { "vpermw", XMM_KZ, _XMM, _XMM | _MEM },
1135 { "vpermw", ZMM_KZ, _ZMM, _ZMM | _MEM },
1136
1137 { "vpermd", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
1138 { "vpermd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
1139
1140 { "vpermilpd", XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
1141 { "vpermilpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1142 { "vpermilpd", XMM_KZ, M_1to2 | _MEM, IMM8 },
1143 { "vpermilpd", ZMM_KZ, M_1to8 | _MEM, IMM8 },
1144
1145 { "vpermilps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4 },
1146 { "vpermilps", ZMM_KZ, _ZMM, _MEM | M_1to16 },
1147 { "vpermilps", XMM_KZ, _MEM | M_1to4 | _MEM, IMM8 },
1148 { "vpermilps", ZMM_KZ, _MEM | M_1to16 | _MEM, IMM8 },
1149
1150 { "vpermpd", YMM_KZ, _YMM | M_1to4 | _MEM, IMM8 },
1151 { "vpermpd", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
1152 { "vpermpd", YMM_KZ, _YMM, M_1to4 | _MEM },
1153 { "vpermpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1154
1155 { "vpermps", YMM_KZ, _YMM, M_1to8 | _MEM },
1156 { "vpermps", ZMM_KZ, _ZMM, M_1to16 | _MEM },
1157
1158 { "vpermq", YMM_KZ, _YMM | M_1to4 | _MEM, IMM8 },
1159 { "vpermq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
1160 { "vpermq", YMM_KZ, _YMM, M_1to4 | _MEM },
1161 { "vpermq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1162 };
1163 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1164 const Tbl& p = tbl[i];
1165 put(p.name, p.x1, p.x2, p.xm);
1166 }
1167#endif
1168 }
const uint64 K2
Definition make_512.cpp:88
const uint64 _YMM3
Definition make_512.cpp:86
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_X3_I()

void Test::put512_X3_I ( )
inline

Definition at line 1169 of file make_512.cpp.

1170 {
1171 const struct Tbl {
1172 const char *name;
1173 uint64_t x1;
1174 uint64_t x2;
1175 uint64_t xm;
1176 } tbl[] = {
1177#ifdef XBYAK64
1178 { "vinsertps", _XMM, _XMM, _XMM3 | _MEM },
1179
1180 { "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM },
1181 { "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
1182
1183 { "vshufps", XMM_KZ, _XMM, M_1to4 | _MEM },
1184 { "vshufps", ZMM_KZ, _ZMM, M_1to16 | _MEM },
1185
1186 { "vinsertf32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
1187 { "vinsertf32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
1188
1189 { "vinsertf64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
1190 { "vinsertf64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
1191
1192 { "vinsertf32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
1193 { "vinsertf64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
1194
1195 { "vinserti32x4", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
1196 { "vinserti32x4", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
1197
1198 { "vinserti64x2", _YMM | YMM_KZ, _YMM, _XMM | _MEM },
1199 { "vinserti64x2", _ZMM | ZMM_KZ, _ZMM, _XMM | _MEM },
1200
1201 { "vinserti32x8", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
1202 { "vinserti64x4", _ZMM | ZMM_KZ, _ZMM, _YMM | _MEM },
1203#endif
1204 { "vpalignr", ZMM_KZ, _ZMM, _ZMM | _MEM },
1205 };
1206 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1207 const Tbl& p = tbl[i];
1208 put(p.name, p.x1, p.x2, p.xm, IMM8);
1209 }
1210#ifdef XBYAK64
1211 put("vpextrb", _REG64 | _MEM, _XMM3, IMM8);
1212 put("vpextrw", _REG64 | _MEM, _XMM3, IMM8);
1213 put("vpextrd", _REG32 | _MEM, _XMM3, IMM8);
1214 put("vpextrq", _REG64 | _MEM, _XMM3, IMM8);
1215 put("vpinsrb", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
1216 put("vpinsrw", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
1217 put("vpinsrd", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
1218 put("vpinsrq", _XMM3, _XMM3, _REG64 | _MEM, IMM8);
1219#endif
1220 }
const uint64 _REG64
Definition make_512.cpp:58
const uint64 _REG32
Definition make_512.cpp:15
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_X_X_XM()

void Test::put512_X_X_XM ( )
inline

Definition at line 867 of file make_512.cpp.

868 {
869 const struct Tbl {
870 const char *name;
871 uint64_t mem;
872 } tbl[] = {
873 { "vsqrtsd", MEM },
874 { "vsqrtss", MEM },
875 { "vunpckhpd", M_1to2 },
876 { "vunpckhps", M_1to4 },
877 { "vunpcklpd", M_1to2 },
878 { "vunpcklps", M_1to4 },
879 };
880 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
881 const Tbl& p = tbl[i];
882 put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
883 }
884 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_X_XM()

void Test::put512_X_XM ( )
inline

Definition at line 819 of file make_512.cpp.

820 {
821 const struct Tbl {
822 const char *name;
823 bool M_X;
824 } tbl[] = {
825 { "vmovddup", false },
826 { "vmovdqa32", true },
827 { "vmovdqa64", true },
828 { "vmovdqu8", true },
829 { "vmovdqu16", true },
830 { "vmovdqu32", true },
831 { "vmovdqu64", true },
832 { "vpabsb", false },
833 { "vpabsw", false },
834 { "vpabsd", false },
835 { "vpabsq", false },
836 };
837 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
838 const Tbl& p = tbl[i];
839 put(p.name, _XMM|XMM_KZ, _XMM|MEM);
840 put(p.name, _YMM|YMM_KZ, _YMM|MEM);
841 put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
842 if (!p.M_X) continue;
843 put(p.name, MEM|MEM_K, _XMM);
844 put(p.name, MEM|MEM_K, _YMM);
845 put(p.name, MEM|MEM_K, _ZMM);
846 }
847 put("vsqrtpd", XMM_KZ, M_1to2 | _MEM);
848 put("vsqrtpd", YMM_KZ, M_1to4 | _MEM);
849 put("vsqrtpd", ZMM_KZ, M_1to8 | _MEM);
850 put("vsqrtpd", ZMM_KZ, ZMM_ER);
851
852 put("vsqrtps", XMM_KZ, M_1to4 | _MEM);
853 put("vsqrtps", YMM_KZ, M_1to8 | _MEM);
854 put("vsqrtps", ZMM_KZ, M_1to16 | _MEM);
855 put("vsqrtps", ZMM_KZ, ZMM_ER);
856
857 put("vpabsd", ZMM_KZ, M_1to16 | _MEM);
858 put("vpabsq", ZMM_KZ, M_1to8 | _MEM);
859
860 put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
861 put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
862
863 put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
864 put("vbroadcastf64x4", ZMM_KZ, _MEM);
865 put("vbroadcastf32x8", ZMM_KZ, _MEM);
866 }
const uint64 MEM_K
Definition make_512.cpp:104
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put512_Y_XM()

void Test::put512_Y_XM ( )
inline

Definition at line 1278 of file make_512.cpp.

1279 {
1280 const struct Tbl {
1281 const char *name;
1282 bool all_xmm; // 2nd param
1283 } tbl[] = {
1284 { "vpmovsxbw", false },
1285 { "vpmovsxbd", true },
1286 { "vpmovsxbq", true },
1287 { "vpmovsxwd", false },
1288 { "vpmovsxwq", true },
1289 { "vpmovsxdq", false },
1290
1291 { "vpmovzxbw", false },
1292 { "vpmovzxbd", true },
1293 { "vpmovzxbq", true },
1294 { "vpmovzxwd", false },
1295 { "vpmovzxwq", true },
1296 { "vpmovzxdq", false },
1297 };
1298 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1299 const Tbl& p = tbl[i];
1300 const char *name = p.name;
1301 put(name, XMM_KZ | YMM, _XMM | _MEM);
1302 if (p.all_xmm) {
1303 put(name, ZMM, _XMM | _MEM);
1304 } else {
1305 put(name, ZMM, YMM | _MEM);
1306 }
1307 }
1308 }
const uint64 YMM
Definition make_512.cpp:77
Here is the call graph for this function:
Here is the caller graph for this function:

◆ put_vaddpd()

void Test::put_vaddpd ( const char * r1,
const char * r2,
const char * r3,
int kIdx = 0,
bool z = false,
int sae = 0 )
inline

Definition at line 524 of file make_512.cpp.

525 {
526 std::string modifier;
527 char pk[16] = "";
528 const char *pz = "";
529 const char *saeTblXbyak[] = { "", "|T_rn_sae", "|T_rd_sae", "|T_ru_sae", "|T_rz_sae" };
530 const char *saeTblNASM[] = { "", ",{rn-sae}", ",{rd-sae}", ",{ru-sae}", ",{rz-sae}" };
531 if (isXbyak_) {
532 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "|k%d", kIdx);
533 if (z) pz = "|T_z";
534 printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]);
535 } else {
536 if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx);
537 if (z) pz = "{z}";
538 printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]);
539 }
540 }
#define CYBOZU_SNPRINTF(x, len,...)
Definition inttype.hpp:64
Here is the caller graph for this function:

◆ put_vmov()

void Test::put_vmov ( )
inline

Definition at line 768 of file make_512.cpp.

769 {
770#ifdef XBYAK64
771 put("vmovd", _XMM3, MEM|REG32);
772 put("vmovd", MEM|REG32, _XMM3);
773 put("vmovq", _XMM3, MEM|REG64|XMM);
774 put("vmovq", MEM|REG64|XMM, _XMM3);
775 put("vmovhlps", _XMM3, _XMM3, _XMM3);
776 put("vmovlhps", _XMM3, _XMM3, _XMM3);
777 put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
778 put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
779 put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
780 put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
781
782 put("vmovsd", XMM_KZ, _XMM3, _XMM3);
783 put("vmovsd", XMM_KZ, MEM);
784 put("vmovsd", MEM_K, XMM);
785 put("vmovss", XMM_KZ, _XMM3, _XMM3);
786 put("vmovss", XMM_KZ, MEM);
787 put("vmovss", MEM_K, XMM);
788
789 put("vmovshdup", _ZMM, _ZMM);
790 put("vmovsldup", _ZMM, _ZMM);
791
792
793 {
794 const char *tbl[] = {
795 "valignd",
796 "valignq",
797 };
798 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
799 const char *name = tbl[i];
800 put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
802 put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM);
803 }
804 }
805 {
806 const char tbl[][16] = {
807 "vmovhpd",
808 "vmovhps",
809 "vmovlpd",
810 "vmovlps",
811 };
812 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
813 put(tbl[i], _XMM3, _XMM3, MEM);
814 put(tbl[i], MEM, _XMM3);
815 }
816 }
817#endif
818 }
const uint64 IMM
Definition make_512.cpp:75
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putAVX1()

void Test::putAVX1 ( )
inline

Definition at line 1361 of file make_512.cpp.

1362 {
1363 const struct Tbl {
1364 const char *name;
1365 bool only_pd_ps;
1366 } tbl[] = {
1367 { "add", false },
1368 { "sub", false },
1369 { "mul", false },
1370 { "div", false },
1371 { "max", false },
1372 { "min", false },
1373 { "and", true },
1374 { "andn", true },
1375 { "or", true },
1376 { "xor", true },
1377 };
1378 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1379 const struct Suf {
1380 const char *suf;
1381 bool supportYMM;
1382 } suf[] = {
1383 { "pd", true },
1384 { "ps", true },
1385 { "sd", false },
1386 { "ss", false },
1387 };
1388 for (size_t j = 0; j < NUM_OF_ARRAY(suf); j++) {
1389 if (tbl[i].only_pd_ps && j == 2) break;
1390 std::string name = std::string("v") + tbl[i].name + suf[j].suf;
1391 const char *p = name.c_str();
1392 put(p, XMM, XMM | MEM);
1393 put(p, XMM, XMM, XMM | MEM);
1394 if (!suf[j].supportYMM) continue;
1395 put(p, YMM, YMM | MEM);
1396 put(p, YMM, YMM, YMM | MEM);
1397 put(p, ZMM, ZMM, ZMM | MEM);
1398 }
1399 }
1400 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putAVX512()

void Test::putAVX512 ( )
inline

Definition at line 2123 of file make_512.cpp.

2124 {
2125#ifdef MIN_TEST
2126 putMin();
2127#else
2128 putOpmask();
2129 separateFunc();
2130 putCombi();
2131 separateFunc();
2132 putCmpK();
2133 separateFunc();
2134 putBroadcast();
2135 separateFunc();
2136 putAVX512_M_X();
2137 separateFunc();
2138 put_vmov();
2139 separateFunc();
2140 put512_X_XM();
2141 separateFunc();
2142 put512_X_X_XM();
2143 separateFunc();
2144 put512_X3();
2145 separateFunc();
2146 put512_X3_I();
2147 separateFunc();
2148 put512_FMA();
2149 separateFunc();
2150 put512_Y_XM();
2151 separateFunc();
2152 put512_AVX1();
2153 separateFunc();
2154 put512_cvt();
2155 separateFunc();
2156 putMisc1();
2157 separateFunc();
2158 putGather();
2159 separateFunc();
2160 putBlend();
2161 separateFunc();
2162 putVpcmp();
2163 separateFunc();
2164 putVtest();
2165 separateFunc();
2166 putCompExp();
2167 separateFunc();
2168 putPerm();
2169 separateFunc();
2170 putShuff();
2171 separateFunc();
2172 putMisc2();
2173 separateFunc();
2174 putMov();
2175 separateFunc();
2176 putRot();
2177 separateFunc();
2178 putScatter();
2179 separateFunc();
2180 putClass();
2181 putDisp8N();
2182#endif
2183 }
void put512_AVX1()
void putMisc2()
void putCombi()
Definition make_512.cpp:541
void putBroadcast()
Definition make_512.cpp:653
void put512_X3()
Definition make_512.cpp:885
void putMin()
void putMisc1()
Definition make_512.cpp:685
void putMov()
void put512_cvt()
void putVtest()
void put512_X_XM()
Definition make_512.cpp:819
void put512_X3_I()
void putAVX512_M_X()
Definition make_512.cpp:750
void putBlend()
void putVpcmp()
void put_vmov()
Definition make_512.cpp:768
void putDisp8N()
void putPerm()
void putCmpK()
Definition make_512.cpp:596
void putShuff()
void put512_X_X_XM()
Definition make_512.cpp:867
void put512_FMA()
void putRot()
void put512_Y_XM()
void putOpmask()
Definition make_512.cpp:461
void putClass()
void putCompExp()
void putScatter()
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putAVX512_M_X()

void Test::putAVX512_M_X ( )
inline

Definition at line 750 of file make_512.cpp.

751 {
752 const char *tbl[] = {
753 "vmovapd",
754 "vmovaps",
755 "vmovupd",
756 "vmovups",
757 };
758 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
759 const char *name = tbl[i];
760 put(name, MEM, ZMM);
761 put(name, ZMM, MEM);
762#ifdef XBYAK64
763 put(name, MEM, _XMM3);
764 put(name, _XMM3, MEM);
765#endif
766 }
767 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putBlend()

void Test::putBlend ( )
inline

Definition at line 1632 of file make_512.cpp.

1633 {
1634 put("vblendmpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1635 put("vblendmpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1636 put("vblendmpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
1637
1638 put("vblendmps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
1639 put("vblendmps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
1640 put("vblendmps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
1641
1642 put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
1643 put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
1644 put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);
1645
1646 put("vpblendmb", XMM_KZ, _XMM, _XMM | _MEM);
1647 put("vpblendmb", YMM_KZ, _YMM, _YMM | _MEM);
1648 put("vpblendmb", ZMM_KZ, _ZMM, _ZMM | _MEM);
1649
1650 put("vpblendmd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
1651 put("vpblendmd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
1652 put("vpblendmd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
1653
1654 put("vpblendmq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1655 put("vpblendmq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1656 put("vpblendmq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
1657 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putBroadcast()

void Test::putBroadcast ( )
inline

Definition at line 653 of file make_512.cpp.

654 {
655 for (int i = 0; i < 9; i++) {
656 putBroadcastSub(0, i);
657#ifdef XBYAK64
658 putBroadcastSub(10, i);
659 putBroadcastSub(20, i);
660#endif
661 }
662 put("vpbroadcastb", XMM_KZ | ZMM_KZ, REG8 | _MEM);
663 put("vpbroadcastw", XMM_KZ | ZMM_KZ, REG16 | _MEM);
664 put("vpbroadcastd", XMM_KZ | ZMM_KZ, REG32 | _MEM);
665#ifdef XBYAK64
666 put("vpbroadcastq", XMM_KZ | ZMM_KZ, REG64 | _MEM);
667#endif
668 {
669 const char *tbl[] = {
670 "vpbroadcastb",
671 "vpbroadcastw",
672 "vpbroadcastd",
673 "vpbroadcastq",
674 };
675 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
676 put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
677 }
678 }
679 put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
680 put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
681 put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
682 put("vbroadcasti32x8", ZMM_KZ, _MEM);
683 put("vbroadcasti64x4", ZMM_KZ, _MEM);
684 }
void putBroadcastSub(int idx, int disp)
Definition make_512.cpp:636
const uint64 REG16
Definition make_512.cpp:67
const uint64 REG8
Definition make_512.cpp:69
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putBroadcastSub()

void Test::putBroadcastSub ( int idx,
int disp )
inline

Definition at line 636 of file make_512.cpp.

637 {
638#ifdef XBYAK64
639 const char *a = "rax";
640#else
641 const char *a = "eax";
642#endif
643 if (isXbyak_) {
644 printf("vaddpd(zmm%d, zmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
645 printf("vaddpd(ymm%d, ymm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
646 printf("vaddpd(xmm%d, xmm1, ptr_b[%s+%d]);dump();\n", idx, a, disp);
647 } else {
648 printf("vaddpd zmm%d, zmm1, [%s+%d]{1to8}\n", idx, a, disp);
649 printf("vaddpd ymm%d, ymm1, [%s+%d]{1to4}\n", idx, a, disp);
650 printf("vaddpd xmm%d, xmm1, [%s+%d]{1to2}\n", idx, a, disp);
651 }
652 }
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
Here is the caller graph for this function:

◆ putClass()

void Test::putClass ( )
inline

Definition at line 2077 of file make_512.cpp.

2078 {
2079#ifdef XBYAK64
2080 putClassSub("vfpclasspd", 64);
2081 putClassSub("vfpclassps", 32);
2082 put("vfpclasssd", K_K, _XMM | _MEM, IMM8);
2083 put("vfpclassss", K_K, _XMM | _MEM, IMM8);
2084#endif
2085 }
void putClassSub(const char *name, int size)
const uint64 K_K
Definition make_512.cpp:31
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putClassSub()

void Test::putClassSub ( const char * name,
int size )
inline

Definition at line 2068 of file make_512.cpp.

2069 {
2070 put(name, K_K, _XMM | _YMM | _ZMM, IMM8);
2071 for (int i = 0; i < 2; i++) {
2072 classSubMem(name, 'x', i == 0, size);
2073 classSubMem(name, 'y', i == 0, size);
2074 classSubMem(name, 'z', i == 0, size);
2075 }
2076 }
void classSubMem(const char *nm, char x, bool broadcast, int size)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putCmpK()

void Test::putCmpK ( )
inline

Definition at line 596 of file make_512.cpp.

597 {
598 {
599 const struct Tbl {
600 const char *name;
601 bool supportYMM;
602 } tbl[] = {
603 { "vcmppd", true },
604 { "vcmpps", true },
605 { "vcmpsd", false },
606 { "vcmpss", false },
607 };
608 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
609 const Tbl *p = &tbl[i];
610 put(p->name, K, _XMM, _XMM | MEM, IMM8);
611 if (!p->supportYMM) continue;
612 put(p->name, K, _YMM, _YMM | MEM, IMM8);
613 put(p->name, K, _ZMM, _ZMM | MEM, IMM8);
614 }
615 }
616 put("vcmppd", K2, ZMM, ZMM_SAE, IMM);
617#ifdef XBYAK64
618 {
619 const struct Tbl {
620 const char *name;
621 } tbl[] = {
622 { "vcomisd" },
623 { "vcomiss" },
624 { "vucomisd" },
625 { "vucomiss" },
626 };
627 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
628 const Tbl *p = &tbl[i];
629 put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
630 }
631 }
632 put("vcomiss", _XMM3, XMM | MEM);
633 put("vcomiss", XMM, XMM_SAE);
634#endif
635 }
const uint64 K
Definition make_512.cpp:78
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putCombi()

void Test::putCombi ( )
inline

Definition at line 541 of file make_512.cpp.

542 {
543 const char *xTbl[] = {
544 "xmm2",
545#ifdef XBYAK64
546 "xmm8", "xmm31"
547#else
548 "xmm5", "xmm6"
549#endif
550 };
551 const char *yTbl[] = {
552 "ymm0",
553#ifdef XBYAK64
554 "ymm15", "ymm31"
555#else
556 "ymm4", "ymm2"
557#endif
558 };
559 const char *zTbl[] = {
560 "zmm1",
561#ifdef XBYAK64
562 "zmm9", "zmm30"
563#else
564 "zmm3", "zmm7"
565#endif
566 };
567 const size_t N = NUM_OF_ARRAY(zTbl);
568 for (size_t i = 0; i < N; i++) {
569 for (size_t j = 0; j < N; j++) {
570 separateFunc();
571 for (size_t k = 0; k < N; k++) {
572#ifdef XBYAK64
573 for (int kIdx = 0; kIdx < 8; kIdx++) {
574 for (int z = 0; z < 2; z++) {
575 put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1);
576 put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1);
577 for (int sae = 0; sae < 5; sae++) {
578 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae);
579 }
580 }
581 }
582#else
583 put_vaddpd(xTbl[i], xTbl[j], xTbl[k]);
584 put_vaddpd(yTbl[i], yTbl[j], yTbl[k]);
585 for (int sae = 0; sae < 5; sae++) {
586 put_vaddpd(zTbl[i], zTbl[j], zTbl[k], sae);
587 }
588#endif
589 }
590 }
591 }
592 put("vaddpd", XMM, XMM, _MEM);
593 put("vaddpd", YMM, YMM, _MEM);
594 put("vaddpd", ZMM, ZMM, _MEM);
595 }
void put_vaddpd(const char *r1, const char *r2, const char *r3, int kIdx=0, bool z=false, int sae=0)
Definition make_512.cpp:524
const int N
Definition quantize.cpp:54
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putCompExp()

void Test::putCompExp ( )
inline

Definition at line 1711 of file make_512.cpp.

1712 {
1713 {
1714 const char *tbl[] = {
1715 "vcompresspd",
1716 "vcompressps",
1717 "vpcompressd",
1718 "vpcompressq",
1719 };
1720 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1721 const char *name = tbl[i];
1722 put(name, XMM_KZ | _MEM, _XMM);
1723 put(name, YMM_KZ | _MEM, _YMM);
1724 put(name, ZMM_KZ | _MEM, _ZMM);
1725 }
1726 }
1727 {
1728 const char *tbl[] = {
1729 "vexpandpd",
1730 "vexpandps",
1731 "vpexpandd",
1732 "vpexpandq",
1733 };
1734 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1735 const char *name = tbl[i];
1736 put(name, XMM_KZ, _XMM | _MEM);
1737 put(name, YMM_KZ, _YMM | _MEM);
1738 put(name, ZMM_KZ, _ZMM | _MEM);
1739 }
1740 }
1741 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putDisp8N()

void Test::putDisp8N ( )
inline

Definition at line 2092 of file make_512.cpp.

2093 {
2094 {
2095 const int tbl[] = {
2096 -129, -128, -127, 0, 1, 64, 65, 127, 128
2097 };
2098 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2099 char xs[128], ns[128];
2100 int v = tbl[i];
2101 CYBOZU_SNPRINTF(xs, sizeof(xs), "xmm0, ptr[eax%+d]", v);
2102 CYBOZU_SNPRINTF(ns, sizeof(ns), "xmm0, [eax%+d]", v);
2103 put("vpbroadcastb", xs, ns);
2104 }
2105 }
2106 {
2107 const int tbl[] = {
2108 -1024, -512 -256, -128, -64, -32, -16, -8, -4, -2, -1,
2109 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512
2110 };
2111 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2112 char xs[128], ns[128];
2113 int v = tbl[i];
2114 CYBOZU_SNPRINTF(xs, sizeof(xs), "zmm0, zmm1, ptr_b[eax%+d]", v);
2115 CYBOZU_SNPRINTF(ns, sizeof(ns), "zmm0, zmm1, [eax%+d]{1to16}", v);
2116 put("vaddps", xs, ns);
2117 }
2118 }
2119#ifdef XBYAK64
2120 put("vfmadd231ps", "zmm8, zmm31, ptr_b[r14+rbp-0x1e4]", "zmm8, zmm31, [r14+rbp-0x1e4]{1to16}");
2121#endif
2122 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGath()

void Test::putGath ( const std::string & vsib)
inline

Definition at line 2251 of file make_nm.cpp.

2252 {
2253 std::string x = "xmm1, ";
2254 std::string a = std::string("[") + vsib + "], xmm3";
2255 put("vgatherdpd", (x + "ptr" + a).c_str(), (x + a).c_str());
2256 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGather() [1/2]

void Test::putGather ( )
inline

Definition at line 1555 of file make_512.cpp.

1556 {
1557#ifdef XBYAK64
1558 const struct Tbl {
1559 const char *name;
1560 int mode;
1561 } tbl[] = {
1562 { "vpgatherdd", xx_yy_zz },
1563 { "vpgatherdq", xx_yx_zy },
1564 { "vpgatherqd", xx_xy_yz },
1565 { "vpgatherqq", xx_yy_zz },
1566 { "vgatherdps", xx_yy_zz },
1567 { "vgatherdpd", xx_yx_zy },
1568 { "vgatherqps", xx_xy_yz },
1569 { "vgatherqpd", xx_yy_zz },
1570 };
1571 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1572 const Tbl& p = tbl[i];
1573 switch (p.mode) {
1574 case xx_yy_zz:
1575 put(p.name, XMM_K, VM32X);
1576 put(p.name, YMM_K, VM32Y);
1577 put(p.name, ZMM_K, VM32Z);
1578 break;
1579 case xx_yx_zy:
1580 put(p.name, XMM_K, VM32X);
1581 put(p.name, YMM_K, VM32X);
1582 put(p.name, ZMM_K, VM32Y);
1583 break;
1584 case xx_xy_yz:
1585 put(p.name, XMM_K, VM32X);
1586 put(p.name, XMM_K, VM32Y);
1587 put(p.name, YMM_K, VM32Z);
1588 break;
1589 }
1590 }
1591#endif
1592 }
const uint64 ZMM_K
Definition make_512.cpp:23
const uint64 VM32X
Definition make_512.cpp:62
const uint64 VM32Y
Definition make_512.cpp:63
const uint64 XMM_K
Definition make_512.cpp:21
const uint64 YMM_K
Definition make_512.cpp:22
const uint64 VM32Z
Definition make_512.cpp:30
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGather() [2/2]

void Test::putGather ( )
inline

Definition at line 2213 of file make_nm.cpp.

2214 {
2215 const int y_vx_y = 0;
2216 const int y_vy_y = 1;
2217 const int x_vy_x = 2;
2218 const struct Tbl {
2219 const char *name;
2220 int mode;
2221 } tbl[] = {
2222 { "vgatherdpd", y_vx_y },
2223 { "vgatherqpd", y_vy_y },
2224 { "vgatherdps", y_vy_y },
2225 { "vgatherqps", x_vy_x },
2226 { "vpgatherdd", y_vy_y },
2227 { "vpgatherqd", x_vy_x },
2228 { "vpgatherdq", y_vx_y },
2229 { "vpgatherqq", y_vy_y },
2230 };
2231 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2232 const Tbl& p = tbl[i];
2233 const char *name = p.name;
2234 put(name, XMM, VM32X, XMM);
2235 switch (p.mode) {
2236 case y_vx_y:
2237 put(name, YMM, VM32X, YMM);
2238 break;
2239 case y_vy_y:
2240 put(name, YMM, VM32Y, YMM);
2241 break;
2242 case x_vy_x:
2243 put(name, XMM, VM32Y, XMM);
2244 break;
2245 default:
2246 printf("ERR mode=%d\n", p.mode);
2247 exit(1);
2248 }
2249 }
2250 }
Here is the call graph for this function:

◆ putGatherAll()

void Test::putGatherAll ( )
inline

Definition at line 2258 of file make_nm.cpp.

2259 {
2260 const char *xmmTbl[] = {
2261 "xmm2",
2262 "xmm4",
2263 "xmm2*1",
2264 "xmm2*4",
2265 };
2266 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2267 std::string s = xmmTbl[i];
2268 putGath(s);
2269 putGath(s + "+3");
2270 putGath(s + "+eax");
2271 putGath("3+" + s);
2272 putGath("eax+" + s);
2273 }
2274 for (size_t i = 0; i < NUM_OF_ARRAY(xmmTbl); i++) {
2275 int ord[] = { 0, 1, 2 };
2276 do {
2277 std::string s;
2278 for (int j = 0; j < 3; j++) {
2279 if (j > 0) s += '+';
2280 switch (ord[j]) {
2281 case 0: s += xmmTbl[i]; break;
2282 case 1: s += "123"; break;
2283 case 2: s += "ebp"; break;
2284 }
2285 }
2286 putGath(s);
2287 } while (std::next_permutation(ord, ord + 3));
2288 }
2289 }
void putGath(const std::string &vsib)
Definition make_nm.cpp:2251
char * s
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGprOtherwise()

void Test::putGprOtherwise ( )
inline

Definition at line 2204 of file make_nm.cpp.

2205 {
2206 put("rdrand", REG16 | REG32e);
2207 put("rdseed", REG16 | REG32e);
2208 put("rorx", REG32, REG32 | MEM, IMM8);
2209#ifdef XBYAK64
2210 put("rorx", REG64, REG64 | MEM, IMM8);
2211#endif
2212 }
const uint64 REG32e
Definition make_512.cpp:68
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGprR_R_RM()

void Test::putGprR_R_RM ( )
inline

Definition at line 2156 of file make_nm.cpp.

2157 {
2158 const char *tbl[] = {
2159 "andn",
2160 "mulx",
2161 "pdep",
2162 "pext",
2163 };
2164 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2165 const char *name = tbl[i];
2166 put(name, REG32, REG32, REG32 | MEM);
2167#ifdef XBYAK64
2168 put(name, REG64, REG64, REG64 | MEM);
2169#endif
2170 }
2171 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGprR_RM()

void Test::putGprR_RM ( )
inline

Definition at line 2189 of file make_nm.cpp.

2190 {
2191 const char *tbl[] = {
2192 "blsi",
2193 "blsmsk",
2194 "blsr",
2195 };
2196 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2197 const char *name = tbl[i];
2198 put(name, REG32, REG32 | MEM);
2199#ifdef XBYAK64
2200 put(name, REG64, REG64 | MEM);
2201#endif
2202 }
2203 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putGprR_RM_R()

void Test::putGprR_RM_R ( )
inline

Definition at line 2172 of file make_nm.cpp.

2173 {
2174 const char *tbl[] = {
2175 "bextr",
2176 "bzhi",
2177 "sarx",
2178 "shlx",
2179 "shrx",
2180 };
2181 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
2182 const char *name = tbl[i];
2183 put(name, REG32, REG32 | MEM, REG32);
2184#ifdef XBYAK64
2185 put(name, REG64, REG64 | MEM, REG64);
2186#endif
2187 }
2188 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putMin()

void Test::putMin ( )
inline

Definition at line 2086 of file make_512.cpp.

2087 {
2088#ifdef XBYAK64
2089 put("vextractf32x4", XMM_KZ, _YMM, IMM8);
2090#endif
2091 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putMisc1()

void Test::putMisc1 ( )
inline

Definition at line 685 of file make_512.cpp.

686 {
687 put("vmaskmovps", XMM, XMM, MEM);
688 put("vmaskmovps", YMM, YMM, MEM);
689
690 put("vmaskmovpd", YMM, YMM, MEM);
691 put("vmaskmovpd", XMM, XMM, MEM);
692
693 put("vmaskmovps", MEM, XMM, XMM);
694 put("vmaskmovpd", MEM, XMM, XMM);
695
696 put("vbroadcastf128", YMM, MEM);
697 put("vbroadcasti128", YMM, MEM);
698 put("vbroadcastsd", YMM|_YMM3, XMM|MEM);
699 put("vbroadcastsd", ZMM, XMM|MEM);
700 {
701 const char *tbl[] = {
702 "vbroadcastss",
703 "vpbroadcastb",
704 "vpbroadcastw",
705 "vpbroadcastd",
706 "vpbroadcastq",
707 };
708 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
709 put(tbl[i], XMM | YMM | ZMM, XMM|MEM);
710 }
711 }
712
713 put("vinsertf128", YMM, YMM, XMM | MEM, IMM8);
714 put("vinserti128", YMM, YMM, XMM | MEM, IMM8);
715 put("vperm2f128", YMM, YMM, YMM | MEM, IMM8);
716 put("vperm2i128", YMM, YMM, YMM | MEM, IMM8);
717
718 {
719 const char *tbl[] = {
720 "vpmaskmovd", "vpmaskmovq"
721 };
722 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
723 const char *name = tbl[i];
724 put(name, XMM, XMM, MEM);
725 put(name, YMM, YMM, MEM);
726 put(name, MEM, XMM, XMM);
727 put(name, MEM, YMM, YMM);
728 }
729 }
730 {
731 const char *tbl[] = {
732 "vpermd", "vpermps",
733 };
734 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
735 const char *name = tbl[i];
736 put(name, YMM, YMM, YMM | MEM);
737 }
738 }
739 {
740 const char *tbl[] = {
741 "vpermq", "vpermpd",
742 };
743 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
744 const char *name = tbl[i];
745 put(name, YMM, YMM | MEM, IMM8);
746 }
747 }
748 put("vpextrw", REG32e | MEM, XMM, IMM); // nasm is ok, yasm generate redundant code
749 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putMisc2()

void Test::putMisc2 ( )
inline

Definition at line 1871 of file make_512.cpp.

1872 {
1873#ifdef XBYAK64
1874 put("vpternlogd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
1875 put("vpternlogd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
1876 put("vpternlogd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
1877
1878 put("vpternlogq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
1879 put("vpternlogq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
1880 put("vpternlogq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
1881
1882 put("vgetexppd", XMM_KZ, _XMM | MEM | M_1to2);
1883 put("vgetexppd", YMM_KZ, _YMM | MEM | M_1to4);
1884 put("vgetexppd", ZMM_KZ, _ZMM | MEM | M_1to8 | ZMM_SAE);
1885
1886 put("vgetexpps", XMM_KZ, _XMM | MEM | M_1to4);
1887 put("vgetexpps", YMM_KZ, _YMM | MEM | M_1to8);
1888 put("vgetexpps", ZMM_KZ, _ZMM | MEM | M_1to16 | ZMM_SAE);
1889
1890 put("vgetexpsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
1891 put("vgetexpss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
1892
1893 put("vgetmantpd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
1894 put("vgetmantpd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
1895 put("vgetmantpd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
1896
1897 put("vgetmantps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
1898 put("vgetmantps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
1899 put("vgetmantps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
1900
1901 put("vgetmantsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
1902 put("vgetmantss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
1903
1904 put("vfixupimmpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
1905 put("vfixupimmpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
1906 put("vfixupimmpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
1907
1908 put("vfixupimmps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
1909 put("vfixupimmps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
1910 put("vfixupimmps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
1911
1912 put("vfixupimmsd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
1913 put("vfixupimmss", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
1914
1915 put("vrcp14pd", XMM_KZ, _XMM | _MEM | M_1to2);
1916 put("vrcp14pd", YMM_KZ, _YMM | _MEM | M_1to4);
1917 put("vrcp14pd", ZMM_KZ, _ZMM | _MEM | M_1to8);
1918
1919 put("vrcp14ps", XMM_KZ, _XMM | _MEM | M_1to4);
1920 put("vrcp14ps", YMM_KZ, _YMM | _MEM | M_1to8);
1921 put("vrcp14ps", ZMM_KZ, _ZMM | _MEM | M_1to16);
1922
1923 put("vrcp14sd", XMM_KZ, _XMM, _XMM | _MEM);
1924
1925 put("vrcp14ss", XMM_KZ, _XMM, _XMM | _MEM);
1926
1927 put("vrsqrt14pd", XMM_KZ, _XMM | _MEM | M_1to2);
1928 put("vrsqrt14pd", YMM_KZ, _YMM | _MEM | M_1to4);
1929 put("vrsqrt14pd", ZMM_KZ, _ZMM | _MEM | M_1to8);
1930
1931 put("vrsqrt14ps", XMM_KZ, _XMM | _MEM | M_1to4);
1932 put("vrsqrt14ps", YMM_KZ, _YMM | _MEM | M_1to8);
1933 put("vrsqrt14ps", ZMM_KZ, _ZMM | _MEM | M_1to16);
1934
1935 put("vrsqrt14sd", XMM_KZ, _XMM, _XMM | _MEM);
1936
1937 put("vrsqrt14ss", XMM_KZ, _XMM, _XMM | _MEM);
1938
1939 put("vrndscalepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
1940 put("vrndscalepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
1941 put("vrndscalepd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
1942
1943 put("vrndscaleps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
1944 put("vrndscaleps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
1945 put("vrndscaleps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
1946
1947 put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
1948
1949 put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
1950
1951 put("vscalefpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1952 put("vscalefpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1953 put("vscalefpd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 | ZMM_ER);
1954
1955 put("vscalefps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
1956 put("vscalefps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
1957 put("vscalefps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 | ZMM_ER);
1958
1959 put("vscalefsd", XMM_KZ, _XMM, _XMM | _MEM | XMM_ER);
1960 put("vscalefss", XMM_KZ, _XMM, _XMM | _MEM | XMM_ER);
1961
1962 put("vdbpsadbw", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
1963 put("vdbpsadbw", YMM_KZ, _YMM, _YMM | _MEM, IMM8);
1964 put("vdbpsadbw", ZMM_KZ, _ZMM, _ZMM | _MEM, IMM8);
1965
1966 put("vpmultishiftqb", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1967 put("vpmultishiftqb", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1968 put("vpmultishiftqb", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
1969
1970 put("vpconflictd", XMM_KZ, _XMM | _MEM | M_1to4);
1971 put("vpconflictd", YMM_KZ, _YMM | _MEM | M_1to8);
1972 put("vpconflictd", ZMM_KZ, _ZMM | _MEM | M_1to16);
1973
1974 put("vpconflictq", XMM_KZ, _XMM | _MEM | M_1to2);
1975 put("vpconflictq", YMM_KZ, _YMM | _MEM | M_1to4);
1976 put("vpconflictq", ZMM_KZ, _ZMM | _MEM | M_1to8);
1977
1978 put("vplzcntd", XMM_KZ, _XMM | _MEM | M_1to4);
1979 put("vplzcntd", YMM_KZ, _YMM | _MEM | M_1to8);
1980 put("vplzcntd", ZMM_KZ, _ZMM | _MEM | M_1to16);
1981
1982 put("vplzcntq", XMM_KZ, _XMM | _MEM | M_1to2);
1983 put("vplzcntq", YMM_KZ, _YMM | _MEM | M_1to4);
1984 put("vplzcntq", ZMM_KZ, _ZMM | _MEM | M_1to8);
1985
1986 put("vpbroadcastmb2q", _XMM | _YMM | _ZMM, K);
1987 put("vpbroadcastmw2d", _XMM | _YMM | _ZMM, K);
1988
1989 put("vexp2pd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE);
1990 put("vexp2ps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE);
1991
1992 put("vrcp28pd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE);
1993 put("vrcp28ps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE);
1994 put("vrcp28sd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
1995 put("vrcp28ss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
1996
1997 put("vrsqrt28pd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE);
1998 put("vrsqrt28ps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE);
1999 put("vrsqrt28sd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
2000 put("vrsqrt28ss", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE);
2001
2002 put("vgatherpf0dps", VM32Z_K);
2003 put("vgatherpf0qps", VM32Z_K);
2004 put("vgatherpf0dpd", VM32Y_K);
2005 put("vgatherpf0qpd", VM32Z_K);
2006
2007 put("vgatherpf1dps", VM32Z_K);
2008 put("vgatherpf1qps", VM32Z_K);
2009 put("vgatherpf1dpd", VM32Y_K);
2010 put("vgatherpf1qpd", VM32Z_K);
2011
2012 put("vscatterpf0dps", VM32Z_K);
2013 put("vscatterpf0qps", VM32Z_K);
2014 put("vscatterpf0dpd", VM32Y_K);
2015 put("vscatterpf0qpd", VM32Z_K);
2016
2017 put("vscatterpf1dps", VM32Z_K);
2018 put("vscatterpf1qps", VM32Z_K);
2019 put("vscatterpf1dpd", VM32Y_K);
2020 put("vscatterpf1qpd", VM32Z_K);
2021
2022 put("vrangepd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2, IMM8);
2023 put("vrangepd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
2024 put("vrangepd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 | ZMM_SAE, IMM8);
2025
2026 put("vrangeps", XMM_KZ, _XMM, _XMM | _MEM | M_1to4, IMM8);
2027 put("vrangeps", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
2028 put("vrangeps", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 | ZMM_SAE, IMM8);
2029
2030 put("vrangesd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
2031 put("vrangess", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
2032
2033 put("vreducepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
2034 put("vreducepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
2035 put("vreducepd", ZMM_KZ, _ZMM | _MEM | M_1to8 | ZMM_SAE, IMM8);
2036
2037 put("vreduceps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
2038 put("vreduceps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
2039 put("vreduceps", ZMM_KZ, _ZMM | _MEM | M_1to16 | ZMM_SAE, IMM8);
2040
2041 put("vreducesd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
2042 put("vreducess", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
2043
2044 put("vpmadd52luq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
2045 put("vpmadd52luq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
2046 put("vpmadd52luq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
2047
2048 put("vpmadd52huq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
2049 put("vpmadd52huq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
2050 put("vpmadd52huq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
2051#endif
2052 }
const uint64 VM32Y_K
Definition make_512.cpp:73
const uint64 VM32Z_K
Definition make_512.cpp:39
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putMov()

void Test::putMov ( )
inline

Definition at line 1786 of file make_512.cpp.

1787 {
1788 put("vpmovm2b", _XMM | _YMM | _ZMM, K);
1789 put("vpmovm2w", _XMM | _YMM | _ZMM, K);
1790 put("vpmovm2d", _XMM | _YMM | _ZMM, K);
1791 put("vpmovm2q", _XMM | _YMM | _ZMM, K);
1792
1793 put("vpmovb2m", K, _XMM | _YMM | _ZMM);
1794 put("vpmovw2m", K, _XMM | _YMM | _ZMM);
1795 put("vpmovd2m", K, _XMM | _YMM | _ZMM);
1796 put("vpmovq2m", K, _XMM | _YMM | _ZMM);
1797
1798 put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1799 put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1800 put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1801
1802 put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1803 put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1804 put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1805
1806 put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM);
1807 put("vpmovqd", YMM_KZ | _MEM, _ZMM);
1808
1809 put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM);
1810 put("vpmovsqd", YMM_KZ | _MEM, _ZMM);
1811
1812 put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM);
1813 put("vpmovusqd", YMM_KZ | _MEM, _ZMM);
1814
1815 put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1816 put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1817 put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
1818
1819 put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM);
1820 put("vpmovdw", YMM_KZ | _MEM, _ZMM);
1821
1822 put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM);
1823 put("vpmovsdw", YMM_KZ | _MEM, _ZMM);
1824
1825 put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM);
1826 put("vpmovusdw", YMM_KZ | _MEM, _ZMM);
1827
1828 put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM);
1829 put("vpmovwb", YMM_KZ | _MEM, _ZMM);
1830
1831 put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM);
1832 put("vpmovswb", YMM_KZ | _MEM, _ZMM);
1833
1834 put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
1835 put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
1836 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putOpmask()

void Test::putOpmask ( )
inline

Definition at line 461 of file make_512.cpp.

462 {
463 {
464 const char *tbl[] = {
465 "kadd",
466 "kand",
467 "kandn",
468 "kor",
469 "kxnor",
470 "kxor",
471 };
472 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
473 std::string name = tbl[i];
474 put(name + "b", K, K, K);
475 put(name + "w", K, K, K);
476 put(name + "q", K, K, K);
477 put(name + "d", K, K, K);
478 }
479 put("kunpckbw", K, K, K);
480 put("kunpckwd", K, K, K);
481 put("kunpckdq", K, K, K);
482 }
483 {
484 const char *tbl[] = {
485 "knot",
486 "kortest",
487 "ktest",
488 };
489 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
490 std::string name = tbl[i];
491 put(name + "b", K, K);
492 put(name + "w", K, K);
493 put(name + "q", K, K);
494 put(name + "d", K, K);
495 }
496 }
497 {
498 const char *tbl[] = {
499 "kshiftl",
500 "kshiftr",
501 };
502 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
503 std::string name = tbl[i];
504 put(name + "b", K, K, IMM8);
505 put(name + "w", K, K, IMM8);
506 put(name + "q", K, K, IMM8);
507 put(name + "d", K, K, IMM8);
508 }
509 }
510 put("kmovw", K, K | MEM | REG32);
511 put("kmovq", K, K | MEM);
512 put("kmovb", K, K | MEM | REG32);
513 put("kmovd", K, K | MEM | REG32);
514
515 put("kmovw", MEM | REG32, K);
516 put("kmovq", MEM, K);
517 put("kmovb", MEM | REG32, K);
518 put("kmovd", MEM | REG32, K);
519#ifdef XBYAK64
520 put("kmovq", K, REG64);
521 put("kmovq", REG64, K);
522#endif
523 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putPerm()

void Test::putPerm ( )
inline

Definition at line 1742 of file make_512.cpp.

1743 {
1744 const uint64_t b0Tbl[] = { 0, 0, 0 };
1745 const uint64_t b4Tbl[] = { M_1to4, M_1to8, M_1to16 };
1746 const uint64_t b2Tbl[] = { M_1to2, M_1to4, M_1to8 };
1747 const struct Tbl {
1748 const char *name;
1749 uint64_t b;
1750 } tbl[] = {
1751 { "vpermt2b", 0 },
1752 { "vpermt2w", 0 },
1753 { "vpermt2d", M_1to4 },
1754 { "vpermt2q", M_1to2 },
1755 { "vpermt2ps", M_1to4 },
1756 { "vpermt2pd", M_1to2 },
1757
1758 { "vpermi2b", 0 },
1759 { "vpermi2w", 0 },
1760 { "vpermi2d", M_1to4 },
1761 { "vpermi2q", M_1to2 },
1762 { "vpermi2ps", M_1to4 },
1763 };
1764 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1765 const Tbl& p = tbl[i];
1766 const uint64_t *bTbl = p.b == 0 ? b0Tbl : p.b == M_1to4 ? b4Tbl : b2Tbl;
1767 put(p.name, XMM_KZ, _XMM, _XMM | _MEM | bTbl[0]);
1768 put(p.name, YMM_KZ, _YMM, _YMM | _MEM | bTbl[1]);
1769 put(p.name, ZMM_KZ, _ZMM, _ZMM | _MEM | bTbl[2]);
1770 }
1771 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putRot()

void Test::putRot ( )
inline

Definition at line 1837 of file make_512.cpp.

1838 {
1839 put("vprolvd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
1840 put("vprolvd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
1841 put("vprolvd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
1842
1843 put("vprolvq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1844 put("vprolvq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1845 put("vprolvq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
1846
1847 put("vprorvd", XMM_KZ, _XMM, _XMM | _MEM | M_1to4);
1848 put("vprorvd", YMM_KZ, _YMM, _YMM | _MEM | M_1to8);
1849 put("vprorvd", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16);
1850
1851 put("vprorvq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
1852 put("vprorvq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
1853 put("vprorvq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
1854
1855 put("vprold", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
1856 put("vprold", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
1857 put("vprold", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
1858
1859 put("vprolq", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
1860 put("vprolq", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
1861 put("vprolq", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
1862
1863 put("vprord", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
1864 put("vprord", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
1865 put("vprord", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
1866
1867 put("vprorq", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
1868 put("vprorq", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
1869 put("vprorq", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
1870 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putScatter()

void Test::putScatter ( )
inline

Definition at line 1593 of file make_512.cpp.

1594 {
1595#ifdef XBYAK64
1596 const struct Tbl {
1597 const char *name;
1598 int mode;
1599 } tbl[] = {
1600 { "vpscatterdd", xx_yy_zz },
1601 { "vpscatterdq", xx_xy_yz },
1602 { "vpscatterqd", xx_yx_zy },
1603 { "vpscatterqq", xx_yy_zz },
1604
1605 { "vscatterdps", xx_yy_zz },
1606 { "vscatterdpd", xx_xy_yz },
1607 { "vscatterqps", xx_yx_zy },
1608 { "vscatterqpd", xx_yy_zz },
1609 };
1610 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1611 const Tbl& p = tbl[i];
1612 switch (p.mode) {
1613 case xx_yy_zz:
1614 put(p.name, VM32X_K, _XMM);
1615 put(p.name, VM32Y_K, _YMM);
1616 put(p.name, VM32Z_K, _ZMM);
1617 break;
1618 case xx_yx_zy:
1619 put(p.name, VM32X_K, _XMM);
1620 put(p.name, VM32Y_K, _XMM);
1621 put(p.name, VM32Z_K, _YMM);
1622 break;
1623 case xx_xy_yz:
1624 put(p.name, VM32X_K, _XMM);
1625 put(p.name, VM32X_K, _YMM);
1626 put(p.name, VM32Y_K, _ZMM);
1627 break;
1628 }
1629 }
1630#endif
1631 }
const uint64 VM32X_K
Definition make_512.cpp:33
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putSeg()

void Test::putSeg ( )
inline

Definition at line 2290 of file make_nm.cpp.

2291 {
2292 {
2293 const char *segTbl[] = {
2294 "es",
2295 "cs",
2296 "ss",
2297 "ds",
2298 "fs",
2299 "gs",
2300 };
2301 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2302 const char *seg = segTbl[i];
2303 const char *op1Tbl[] = {
2304 "ax",
2305 "edx",
2306 (isXbyak_ ? "ptr [eax]" : "[eax]"),
2307#ifdef XBYAK64
2308 "r9",
2309#endif
2310 };
2311 for (size_t j = 0; j < NUM_OF_ARRAY(op1Tbl); j++) {
2312 const char *op1 = op1Tbl[j];
2313 if (isXbyak_) {
2314 printf("mov(%s, %s); dump();\n", op1, seg);
2315 printf("mov(%s, %s); dump();\n", seg, op1);
2316 } else {
2317 printf("mov %s, %s\n", op1, seg);
2318 printf("mov %s, %s\n", seg, op1);
2319 }
2320 }
2321 }
2322 }
2323 {
2324 const char *segTbl[] = {
2325#ifdef XBYAK32
2326 "es",
2327 "ss",
2328 "ds",
2329#endif
2330 "fs",
2331 "gs",
2332 };
2333 for (size_t i = 0; i < NUM_OF_ARRAY(segTbl); i++) {
2334 const char *seg = segTbl[i];
2335 if (isXbyak_) {
2336 printf("push(%s); dump();\n", seg);
2337 printf("pop(%s); dump();\n", seg);
2338 } else {
2339 printf("push %s\n", seg);
2340 printf("pop %s\n", seg);
2341 }
2342 }
2343 }
2344 }
Here is the caller graph for this function:

◆ putShuff()

void Test::putShuff ( )
inline

Definition at line 1772 of file make_512.cpp.

1773 {
1774 put("vshuff32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
1775 put("vshuff32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
1776
1777 put("vshuff64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
1778 put("vshuff64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
1779
1780 put("vshufi32x4", YMM_KZ, _YMM, _YMM | _MEM | M_1to8, IMM8);
1781 put("vshufi32x4", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16, IMM8);
1782
1783 put("vshufi64x2", YMM_KZ, _YMM, _YMM | _MEM | M_1to4, IMM8);
1784 put("vshufi64x2", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8, IMM8);
1785 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putVpcmp()

void Test::putVpcmp ( )
inline

Definition at line 1658 of file make_512.cpp.

1659 {
1660 const uint64_t b0Tbl[] = { 0, 0, 0 };
1661 const uint64_t b4Tbl[] = { M_1to4, M_1to8, M_1to16 };
1662 const uint64_t b2Tbl[] = { M_1to2, M_1to4, M_1to8 };
1663 const struct Tbl {
1664 const char *name;
1665 uint64_t b;
1666 } tbl[] = {
1667 { "vpcmpb", 0 },
1668 { "vpcmpub", 0 },
1669 { "vpcmpw", 0 },
1670 { "vpcmpuw", 0 },
1671 { "vpcmpd", M_1to4 },
1672 { "vpcmpud", M_1to4 },
1673 { "vpcmpq", M_1to2 },
1674 { "vpcmpuq", M_1to2 },
1675 };
1676 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1677 const Tbl& p = tbl[i];
1678 const uint64_t *bTbl = p.b == 0 ? b0Tbl : p.b == M_1to4 ? b4Tbl : b2Tbl;
1679 put(p.name, K_K, _XMM, _XMM | _MEM | bTbl[0], IMM8);
1680 put(p.name, K_K, _YMM, _YMM | _MEM | bTbl[1], IMM8);
1681 put(p.name, K_K, _ZMM, _ZMM | _MEM | bTbl[2], IMM8);
1682 }
1683 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ putVtest()

void Test::putVtest ( )
inline

Definition at line 1684 of file make_512.cpp.

1685 {
1686 const uint64_t b0Tbl[] = { 0, 0, 0 };
1687 const uint64_t b4Tbl[] = { M_1to4, M_1to8, M_1to16 };
1688 const uint64_t b2Tbl[] = { M_1to2, M_1to4, M_1to8 };
1689 const struct Tbl {
1690 const char *name;
1691 uint64_t b;
1692 } tbl[] = {
1693 { "vptestmb", 0 },
1694 { "vptestmw", 0 },
1695 { "vptestmd", M_1to4 },
1696 { "vptestmq", M_1to2 },
1697
1698 { "vptestnmb", 0 },
1699 { "vptestnmw", 0 },
1700 { "vptestnmd", M_1to4 },
1701 { "vptestnmq", M_1to2 },
1702 };
1703 for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
1704 const Tbl& p = tbl[i];
1705 const uint64_t *bTbl = p.b == 0 ? b0Tbl : p.b == M_1to4 ? b4Tbl : b2Tbl;
1706 put(p.name, K_K, _XMM, _XMM | _MEM | bTbl[0]);
1707 put(p.name, K_K, _YMM, _YMM | _MEM | bTbl[1]);
1708 put(p.name, K_K, _ZMM, _ZMM | _MEM | bTbl[2]);
1709 }
1710 }
Here is the call graph for this function:
Here is the caller graph for this function:

◆ separateFunc() [1/2]

void Test::separateFunc ( )
inline

Definition at line 435 of file make_512.cpp.

436 {
437 if (!isXbyak_) return;
438 printf(
439 " }\n"
440 " void gen%d()\n"
441 " {\n", funcNum_++);
442 }
Here is the caller graph for this function:

◆ separateFunc() [2/2]

void Test::separateFunc ( )
inline

Definition at line 2134 of file make_nm.cpp.

2135 {
2136 if (!isXbyak_) return;
2137 printf(
2138 " }\n"
2139 " void gen%d()\n"
2140 " {\n", funcNum_++);
2141 }

◆ stopThread()

void Test::stopThread ( )
inline

Definition at line 55 of file test_mmx.cpp.

55{ }

◆ threadEntry()

void Test::threadEntry ( )
inline

Definition at line 40 of file test_mmx.cpp.

41 {
42 printf("n=%d\n", n_);
43 WriteMMX w;
44 w.set()(n_);
45 ReadMMX r;
46 for (;;) {
47 int b = r.get()();
48 printf("b=%d\n", b);
49 if (b != n_) {
50 printf("mm0 has changed!\n");
51 }
52 MIE::MIE_Sleep(1000);
53 }
54 }
const mie::Vuint & r
Definition bn.cpp:28
void(*)(int x) set() const
Definition test_mmx.cpp:20

The documentation for this class was generated from the following files: