1#ifndef XBYAK_XBYAK_UTIL_H_
2#define XBYAK_XBYAK_UTIL_H_
13 #if (_MSC_VER < 1400) && defined(XBYAK32)
14 static inline __declspec(naked)
void __cpuid(
int[4],
int)
36 #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
38 #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
41 #if defined(__APPLE__) && defined(XBYAK32)
42 #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
43 #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
45 #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
46 #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
51namespace Xbyak {
namespace util {
58 unsigned int get32bitAsBE(
const char *x)
const
60 return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
62 unsigned int mask(
int n)
const
71 model = (data[0] >> 4) & mask(4);
72 family = (data[0] >> 8) & mask(4);
74 extModel = (data[0] >> 16) & mask(4);
87 unsigned int extractBit(
unsigned int val,
unsigned int base,
unsigned int end)
89 return (val >> base) & ((1u << (end - base)) - 1);
91 void setCacheHierarchy()
93 if ((type_ &
tINTEL) == 0)
return;
94 const unsigned int NO_CACHE = 0;
95 const unsigned int DATA_CACHE = 1;
97 const unsigned int UNIFIED_CACHE = 3;
98 unsigned int smt_width = 0;
99 unsigned int n_cores = (
unsigned int) -1;
100 unsigned int data[4];
109 smt_width = data[1] & 0x7FFF;
111 n_cores = data[1] & 0x7FFF;
125 unsigned int cacheType = extractBit(data[0], 0, 4);
126 if (cacheType == NO_CACHE)
break;
127 if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
128 unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores);
130 (extractBit(data[1], 22, 31) + 1)
131 * (extractBit(data[1], 12, 21) + 1)
132 * (extractBit(data[1], 0, 11) + 1)
134 if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
135 assert(smt_width != 0);
172 static inline void getCpuid(
unsigned int eaxIn,
unsigned int data[4])
175 __cpuid(
reinterpret_cast<int*
>(data), eaxIn);
177 __cpuid(eaxIn, data[0], data[1], data[2], data[3]);
180 static inline void getCpuidEx(
unsigned int eaxIn,
unsigned int ecxIn,
unsigned int data[4])
183 __cpuidex(
reinterpret_cast<int*
>(data), eaxIn, ecxIn);
185 __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
193 unsigned int eax,
edx;
196 __asm__
volatile(
".byte 0x0f, 0x01, 0xd0" :
"=a"(eax),
"=d"(
edx) :
"c"(0));
270 unsigned int data[4];
271 const unsigned int&
EAX = data[0];
272 const unsigned int& EBX = data[1];
273 const unsigned int& ECX = data[2];
274 const unsigned int& EDX = data[3];
276 const unsigned int maxNum =
EAX;
277 static const char intel[] =
"ntel";
278 static const char amd[] =
"cAMD";
279 if (ECX == get32bitAsBE(amd)) {
282 if (EDX & (1U << 31)) type_ |=
t3DN;
283 if (EDX & (1U << 15)) type_ |=
tCMOV;
284 if (EDX & (1U << 30)) type_ |=
tE3DN;
285 if (EDX & (1U << 22)) type_ |=
tMMX2;
286 if (EDX & (1U << 27)) type_ |=
tRDTSCP;
288 if (ECX == get32bitAsBE(intel)) {
291 if (EDX & (1U << 27)) type_ |=
tRDTSCP;
292 if (ECX & (1U << 5)) type_ |=
tLZCNT;
296 if (ECX & (1U << 0)) type_ |=
tSSE3;
297 if (ECX & (1U << 9)) type_ |=
tSSSE3;
298 if (ECX & (1U << 19)) type_ |=
tSSE41;
299 if (ECX & (1U << 20)) type_ |=
tSSE42;
300 if (ECX & (1U << 22)) type_ |=
tMOVBE;
301 if (ECX & (1U << 23)) type_ |=
tPOPCNT;
302 if (ECX & (1U << 25)) type_ |=
tAESNI;
304 if (ECX & (1U << 27)) type_ |=
tOSXSAVE;
305 if (ECX & (1U << 30)) type_ |=
tRDRAND;
306 if (ECX & (1U << 29)) type_ |=
tF16C;
308 if (EDX & (1U << 15)) type_ |=
tCMOV;
309 if (EDX & (1U << 23)) type_ |=
tMMX;
310 if (EDX & (1U << 25)) type_ |=
tMMX2 |
tSSE;
311 if (EDX & (1U << 26)) type_ |=
tSSE2;
317 if (ECX & (1U << 28)) type_ |=
tAVX;
318 if (ECX & (1U << 12)) type_ |=
tFMA;
319 if (((bv >> 5) & 7) == 7) {
321 if (EBX & (1U << 16)) type_ |=
tAVX512F;
323 if (EBX & (1U << 17)) type_ |=
tAVX512DQ;
325 if (EBX & (1U << 26)) type_ |=
tAVX512PF;
326 if (EBX & (1U << 27)) type_ |=
tAVX512ER;
327 if (EBX & (1U << 28)) type_ |=
tAVX512CD;
328 if (EBX & (1U << 30)) type_ |=
tAVX512BW;
329 if (EBX & (1U << 31)) type_ |=
tAVX512VL;
332 if (ECX & (1U << 8)) type_ |=
tGFNI;
333 if (ECX & (1U << 9)) type_ |=
tVAES;
346 if (type_ &
tAVX && (EBX & (1U << 5))) type_ |=
tAVX2;
347 if (EBX & (1U << 3)) type_ |=
tBMI1;
348 if (EBX & (1U << 8)) type_ |=
tBMI2;
350 if (EBX & (1U << 18)) type_ |=
tRDSEED;
351 if (EBX & (1U << 19)) type_ |=
tADX;
352 if (EBX & (1U << 20)) type_ |=
tSMAP;
353 if (EBX & (1U << 4)) type_ |=
tHLE;
354 if (EBX & (1U << 11)) type_ |=
tRTM;
355 if (EBX & (1U << 14)) type_ |=
tMPX;
356 if (EBX & (1U << 29)) type_ |=
tSHA;
364 printf(
"family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
370 return (type & type_) != 0;
381 unsigned int eax,
edx;
382 __asm__
volatile(
"rdtsc" :
"=a"(eax),
"=d"(
edx));
402 void clear() { count_ = 0; clock_ = 0; }
409const int UseRCX = 1 << 6;
410const int UseRDX = 1 << 7;
413 static const size_t maxTblNum = 10;
414 const Xbyak::Reg64 *tbl_[maxTblNum];
417 Pack() : tbl_(), n_(0) {}
418 Pack(
const Xbyak::Reg64 *
tbl,
size_t n) {
init(
tbl, n); }
419 Pack(
const Pack& rhs)
422 for (
size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
424 Pack& operator=(
const Pack& rhs)
427 for (
size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
430 Pack(
const Xbyak::Reg64& t0)
431 { n_ = 1; tbl_[0] = &t0; }
432 Pack(
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
433 { n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
434 Pack(
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
435 { n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
436 Pack(
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
437 { n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
438 Pack(
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
439 { n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
440 Pack(
const Xbyak::Reg64& t5,
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
441 { n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
442 Pack(
const Xbyak::Reg64& t6,
const Xbyak::Reg64& t5,
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
443 { n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
444 Pack(
const Xbyak::Reg64& t7,
const Xbyak::Reg64& t6,
const Xbyak::Reg64& t5,
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
445 { n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
446 Pack(
const Xbyak::Reg64& t8,
const Xbyak::Reg64& t7,
const Xbyak::Reg64& t6,
const Xbyak::Reg64& t5,
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
447 { n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
448 Pack(
const Xbyak::Reg64& t9,
const Xbyak::Reg64& t8,
const Xbyak::Reg64& t7,
const Xbyak::Reg64& t6,
const Xbyak::Reg64& t5,
const Xbyak::Reg64& t4,
const Xbyak::Reg64& t3,
const Xbyak::Reg64& t2,
const Xbyak::Reg64& t1,
const Xbyak::Reg64& t0)
449 { n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
450 Pack& append(
const Xbyak::Reg64& t)
452 if (n_ == maxTblNum) {
453 fprintf(stderr,
"ERR Pack::can't append\n");
454 throw Error(ERR_BAD_PARAMETER);
459 void init(
const Xbyak::Reg64 *
tbl,
size_t n)
462 fprintf(stderr,
"ERR Pack::init bad n=%d\n", (
int)n);
463 throw Error(ERR_BAD_PARAMETER);
466 for (
size_t i = 0; i < n; i++) {
470 const Xbyak::Reg64& operator[](
size_t n)
const
473 fprintf(stderr,
"ERR Pack bad n=%d\n", (
int)n);
474 throw Error(ERR_BAD_PARAMETER);
478 size_t size()
const {
return n_; }
482 Pack
sub(
size_t pos,
size_t num =
size_t(-1))
const
484 if (num ==
size_t(-1))
num = n_ - pos;
485 if (pos + num > n_) {
486 fprintf(stderr,
"ERR Pack::sub bad pos=%d, num=%d\n", (
int)pos, (
int)num);
487 throw Error(ERR_BAD_PARAMETER);
491 for (
size_t i = 0; i <
num; i++) {
492 pack.tbl_[i] = tbl_[pos + i];
498 for (
size_t i = 0; i < n_; i++) {
499 printf(
"%s ", tbl_[i]->toString());
507 static const int noSaveNum = 6;
508 static const int rcxPos = 0;
509 static const int rdxPos = 1;
511 static const int noSaveNum = 8;
512 static const int rcxPos = 3;
513 static const int rdxPos = 2;
523 Xbyak::Reg64 pTbl_[4];
524 Xbyak::Reg64 tTbl_[10];
527 StackFrame(
const StackFrame&);
528 void operator=(
const StackFrame&);
548 StackFrame(
Xbyak::CodeGenerator *code,
int pNum,
int tNum = 0,
int stackSizeByte = 0,
bool makeEpilog =
true)
551 , tNum_(tNum & ~(UseRCX | UseRDX))
552 , useRcx_((tNum & UseRCX) != 0)
553 , useRdx_((tNum & UseRDX) != 0)
556 , makeEpilog_(makeEpilog)
560 using namespace Xbyak;
561 if (pNum < 0 || pNum > 4)
throw Error(ERR_BAD_PNUM);
562 const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
563 if (allRegNum < pNum || allRegNum > 14)
throw Error(ERR_BAD_TNUM);
564 const Reg64& _rsp = code->rsp;
566 saveNum_ = (std::max)(0, allRegNum - noSaveNum);
567 const int *
tbl = getOrderTbl() + noSaveNum;
568 P_ = saveNum_ + (stackSizeByte + 7) / 8;
569 if (P_ > 0 && (P_ & 1) == 0) P_++;
571 if (P_ > 0) code->sub(_rsp, P_);
573 for (
int i = 0; i < (std::min)(saveNum_, 4); i++) {
574 code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(
tbl[i]));
576 for (
int i = 4; i < saveNum_; i++) {
577 code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(
tbl[i]));
580 for (
int i = 0; i < saveNum_; i++) {
581 code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(
tbl[i]));
585 for (
int i = 0; i < pNum; i++) {
586 pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
588 for (
int i = 0; i < tNum_; i++) {
589 tTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
591 if (useRcx_ && rcxPos < pNum) code_->
mov(code_->r10, code_->rcx);
592 if (useRdx_ && rdxPos < pNum) code_->
mov(code_->r11, code_->rdx);
593 p_.init(pTbl_, pNum);
594 t_.init(tTbl_, tNum_);
600 void close(
bool callRet =
true)
602 using namespace Xbyak;
603 const Reg64& _rsp = code_->rsp;
605 const int *
tbl = getOrderTbl() + noSaveNum;
607 for (
int i = 0; i < (std::min)(saveNum_, 4); i++) {
608 code_->
mov(Reg64(
tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
610 for (
int i = 4; i < saveNum_; i++) {
611 code_->
mov(Reg64(
tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
614 for (
int i = 0; i < saveNum_; i++) {
615 code_->
mov(Reg64(
tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
618 if (P_ > 0) code_->
add(_rsp, P_);
620 if (callRet) code_->
ret();
624 if (!makeEpilog_)
return;
627 }
catch (std::exception& e) {
628 printf(
"ERR:StackFrame %s\n", e.what());
631 printf(
"ERR:StackFrame otherwise\n");
636 const int *getOrderTbl()
const
638 using namespace Xbyak;
639 static const int tbl[] = {
641 Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI,
643 Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11,
645 Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15
649 int getRegIdx(
int& pos)
const
652 using namespace Xbyak;
653 const int *
tbl = getOrderTbl();
656 if (
r == Operand::RCX) {
return Operand::R10; }
657 if (
r == Operand::R10) {
r =
tbl[pos++]; }
660 if (
r == Operand::RDX) {
return Operand::R11; }
661 if (
r == Operand::R11) {
return tbl[pos++]; }
void add(const Operand &op, uint32 imm)
void mov(const Operand ®1, const Operand ®2)
static const Type tAVX512PF
static const Type tRDRAND
static const unsigned int maxNumberCacheLevels
static const Type tPREFETCHW
unsigned int data_cache_size[maxNumberCacheLevels]
unsigned int cores_sharing_data_cache[maxNumberCacheLevels]
unsigned int getDataCacheLevels() const
static void getCpuid(unsigned int eaxIn, unsigned int data[4])
bool has(Type type) const
unsigned int getCoresSharingDataCache(unsigned int i) const
static const Type tAVX512_VBMI
static void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
static const Type tAVX512_VPOPCNTDQ
static const Type tAVX512_BITALG
static const Type tOSXSAVE
static const Type tPCLMULQDQ
static const Type tPREFETCHWT1
static const Type tAVX512CD
static const Type tVPCLMULQDQ
unsigned int data_cache_levels
static const Type tAVX512_4VNNIW
static const Type tAVX512_IFMA
static const Type tAVX512BW
static const Type tAVX512_VNNI
static const Type tAVX512ER
static uint64 getXfeature()
static const Type tAVX512IFMA
static const Type tRDSEED
static const Type tPOPCNT
static const Type tRDTSCP
static const Type tAVX512VBMI
unsigned int getDataCacheSize(unsigned int i) const
static const Type tENHANCED_REP
static const Type tAVX512DQ
static const Type tAVX512_VBMI2
static const Type tAVX512VL
static const Type tAVX512F
static const Type tAVX512_4FMAPS
void close(T *e, websocketpp::connection_hdl hdl)
LOGGING_API void printf(Category category, const char *format,...)
static const Reg32 esp(Operand::ESP)
static const Reg32 ecx(Operand::ECX)
static const Reg32 edx(Operand::EDX)
static const AddressFrame dword(32)
static const Reg32 ebx(Operand::EBX)
static const Reg32 esi(Operand::ESI)
void pack(instruction_stream *stream, uint32_t field)
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
void sub(const Operand &op, uint32 imm)
#define __cpuid(eaxIn, a, b, c, d)
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d)