Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
xbyak_util.h
Go to the documentation of this file.
1#ifndef XBYAK_XBYAK_UTIL_H_
2#define XBYAK_XBYAK_UTIL_H_
3
10#include "xbyak.h"
11
12#ifdef _MSC_VER
13 #if (_MSC_VER < 1400) && defined(XBYAK32)
14 static inline __declspec(naked) void __cpuid(int[4], int)
15 {
16 __asm {
17 push ebx
18 push esi
19 mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
20 cpuid
21 mov esi, dword ptr [esp + 4 * 2 + 4] // data
22 mov dword ptr [esi], eax
23 mov dword ptr [esi + 4], ebx
24 mov dword ptr [esi + 8], ecx
25 mov dword ptr [esi + 12], edx
26 pop esi
27 pop ebx
28 ret
29 }
30 }
31 #else
32 #include <intrin.h> // for __cpuid
33 #endif
34#else
35 #ifndef __GNUC_PREREQ
36 #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
37 #endif
38 #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
39 #include <cpuid.h>
40 #else
41 #if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
42 #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
43 #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
44 #else
45 #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
46 #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
47 #endif
48 #endif
49#endif
50
51namespace Xbyak { namespace util {
52
56class Cpu {
57 uint64 type_;
58 unsigned int get32bitAsBE(const char *x) const
59 {
60 return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
61 }
62 unsigned int mask(int n) const
63 {
64 return (1U << n) - 1;
65 }
66 void setFamily()
67 {
68 unsigned int data[4];
69 getCpuid(1, data);
70 stepping = data[0] & mask(4);
71 model = (data[0] >> 4) & mask(4);
72 family = (data[0] >> 8) & mask(4);
73 // type = (data[0] >> 12) & mask(2);
74 extModel = (data[0] >> 16) & mask(4);
75 extFamily = (data[0] >> 20) & mask(8);
76 if (family == 0x0f) {
78 } else {
80 }
81 if (family == 6 || family == 0x0f) {
82 displayModel = (extModel << 4) + model;
83 } else {
85 }
86 }
87 unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
88 {
89 return (val >> base) & ((1u << (end - base)) - 1);
90 }
91 void setCacheHierarchy()
92 {
93 if ((type_ & tINTEL) == 0) return;
94 const unsigned int NO_CACHE = 0;
95 const unsigned int DATA_CACHE = 1;
96// const unsigned int INSTRUCTION_CACHE = 2;
97 const unsigned int UNIFIED_CACHE = 3;
98 unsigned int smt_width = 0;
99 unsigned int n_cores = (unsigned int) -1;
100 unsigned int data[4];
101
102 /*
103 if leaf 11 exists, we use it to get the number of smt cores and cores on socket
104 If x2APIC is supported, these are the only correct numbers.
105 */
106 getCpuidEx(0x0, 0, data);
107 if (data[0] >= 11) {
108 getCpuidEx(0xB, 0, data); // CPUID for SMT Level
109 smt_width = data[1] & 0x7FFF;
110 getCpuidEx(0xB, 1, data); // CPUID for CORE Level
111 n_cores = data[1] & 0x7FFF;
112 }
113
114 /*
115 Assumptions:
116 the first level of data cache is not shared (which is the
117 case for every existing architecture) and use this to
118 determine the SMT width for arch not supporting leaf 11.
119 when leaf 4 reports a number of core less than n_cores
120 on socket reported by leaf 11, then it is a correct number
121 of cores not an upperbound.
122 */
123 for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
124 getCpuidEx(0x4, i, data);
125 unsigned int cacheType = extractBit(data[0], 0, 4);
126 if (cacheType == NO_CACHE) break;
127 if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
128 unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores);
130 (extractBit(data[1], 22, 31) + 1)
131 * (extractBit(data[1], 12, 21) + 1)
132 * (extractBit(data[1], 0, 11) + 1)
133 * (data[2] + 1);
134 if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
135 assert(smt_width != 0);
136 cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
138 }
139 }
140 }
141
142public:
143 int model;
148 int displayFamily; // family + extFamily
149 int displayModel; // model + extModel
150
151 // may I move these members into private?
152 static const unsigned int maxNumberCacheLevels = 10;
155 unsigned int data_cache_levels;
156
157 unsigned int getDataCacheLevels() const { return data_cache_levels; }
158 unsigned int getCoresSharingDataCache(unsigned int i) const
159 {
161 return cores_sharing_data_cache[i];
162 }
163 unsigned int getDataCacheSize(unsigned int i) const
164 {
166 return data_cache_size[i];
167 }
168
169 /*
170 data[] = { eax, ebx, ecx, edx }
171 */
172 static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
173 {
174#ifdef _MSC_VER
175 __cpuid(reinterpret_cast<int*>(data), eaxIn);
176#else
177 __cpuid(eaxIn, data[0], data[1], data[2], data[3]);
178#endif
179 }
180 static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
181 {
182#ifdef _MSC_VER
183 __cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
184#else
185 __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
186#endif
187 }
188 static inline uint64 getXfeature()
189 {
190#ifdef _MSC_VER
191 return _xgetbv(0);
192#else
193 unsigned int eax, edx;
194 // xgetvb is not support on gcc 4.2
195// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
196 __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
197 return ((uint64)edx << 32) | eax;
198#endif
199 }
200 typedef uint64 Type;
201
202 static const Type NONE = 0;
203 static const Type tMMX = 1 << 0;
204 static const Type tMMX2 = 1 << 1;
205 static const Type tCMOV = 1 << 2;
206 static const Type tSSE = 1 << 3;
207 static const Type tSSE2 = 1 << 4;
208 static const Type tSSE3 = 1 << 5;
209 static const Type tSSSE3 = 1 << 6;
210 static const Type tSSE41 = 1 << 7;
211 static const Type tSSE42 = 1 << 8;
212 static const Type tPOPCNT = 1 << 9;
213 static const Type tAESNI = 1 << 10;
214 static const Type tSSE5 = 1 << 11;
215 static const Type tOSXSAVE = 1 << 12;
216 static const Type tPCLMULQDQ = 1 << 13;
217 static const Type tAVX = 1 << 14;
218 static const Type tFMA = 1 << 15;
219
220 static const Type t3DN = 1 << 16;
221 static const Type tE3DN = 1 << 17;
222 static const Type tSSE4a = 1 << 18;
223 static const Type tRDTSCP = 1 << 19;
224 static const Type tAVX2 = 1 << 20;
225 static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
226 static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
227 static const Type tLZCNT = 1 << 23;
228
229 static const Type tINTEL = 1 << 24;
230 static const Type tAMD = 1 << 25;
231
232 static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
233 static const Type tRDRAND = 1 << 27;
234 static const Type tADX = 1 << 28; // adcx, adox
235 static const Type tRDSEED = 1 << 29; // rdseed
236 static const Type tSMAP = 1 << 30; // stac
237 static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
238 static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
239 static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
240 static const Type tMOVBE = uint64(1) << 34; // mobve
241 static const Type tAVX512F = uint64(1) << 35;
242 static const Type tAVX512DQ = uint64(1) << 36;
243 static const Type tAVX512_IFMA = uint64(1) << 37;
245 static const Type tAVX512PF = uint64(1) << 38;
246 static const Type tAVX512ER = uint64(1) << 39;
247 static const Type tAVX512CD = uint64(1) << 40;
248 static const Type tAVX512BW = uint64(1) << 41;
249 static const Type tAVX512VL = uint64(1) << 42;
250 static const Type tAVX512_VBMI = uint64(1) << 43;
251 static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
252 static const Type tAVX512_4VNNIW = uint64(1) << 44;
253 static const Type tAVX512_4FMAPS = uint64(1) << 45;
254 static const Type tPREFETCHWT1 = uint64(1) << 46;
255 static const Type tPREFETCHW = uint64(1) << 47;
256 static const Type tSHA = uint64(1) << 48;
257 static const Type tMPX = uint64(1) << 49;
258 static const Type tAVX512_VBMI2 = uint64(1) << 50;
259 static const Type tGFNI = uint64(1) << 51;
260 static const Type tVAES = uint64(1) << 52;
261 static const Type tVPCLMULQDQ = uint64(1) << 53;
262 static const Type tAVX512_VNNI = uint64(1) << 54;
263 static const Type tAVX512_BITALG = uint64(1) << 55;
264 static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
265
267 : type_(NONE)
269 {
270 unsigned int data[4];
271 const unsigned int& EAX = data[0];
272 const unsigned int& EBX = data[1];
273 const unsigned int& ECX = data[2];
274 const unsigned int& EDX = data[3];
275 getCpuid(0, data);
276 const unsigned int maxNum = EAX;
277 static const char intel[] = "ntel";
278 static const char amd[] = "cAMD";
279 if (ECX == get32bitAsBE(amd)) {
280 type_ |= tAMD;
281 getCpuid(0x80000001, data);
282 if (EDX & (1U << 31)) type_ |= t3DN;
283 if (EDX & (1U << 15)) type_ |= tCMOV;
284 if (EDX & (1U << 30)) type_ |= tE3DN;
285 if (EDX & (1U << 22)) type_ |= tMMX2;
286 if (EDX & (1U << 27)) type_ |= tRDTSCP;
287 }
288 if (ECX == get32bitAsBE(intel)) {
289 type_ |= tINTEL;
290 getCpuid(0x80000001, data);
291 if (EDX & (1U << 27)) type_ |= tRDTSCP;
292 if (ECX & (1U << 5)) type_ |= tLZCNT;
293 if (ECX & (1U << 8)) type_ |= tPREFETCHW;
294 }
295 getCpuid(1, data);
296 if (ECX & (1U << 0)) type_ |= tSSE3;
297 if (ECX & (1U << 9)) type_ |= tSSSE3;
298 if (ECX & (1U << 19)) type_ |= tSSE41;
299 if (ECX & (1U << 20)) type_ |= tSSE42;
300 if (ECX & (1U << 22)) type_ |= tMOVBE;
301 if (ECX & (1U << 23)) type_ |= tPOPCNT;
302 if (ECX & (1U << 25)) type_ |= tAESNI;
303 if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
304 if (ECX & (1U << 27)) type_ |= tOSXSAVE;
305 if (ECX & (1U << 30)) type_ |= tRDRAND;
306 if (ECX & (1U << 29)) type_ |= tF16C;
307
308 if (EDX & (1U << 15)) type_ |= tCMOV;
309 if (EDX & (1U << 23)) type_ |= tMMX;
310 if (EDX & (1U << 25)) type_ |= tMMX2 | tSSE;
311 if (EDX & (1U << 26)) type_ |= tSSE2;
312
313 if (type_ & tOSXSAVE) {
314 // check XFEATURE_ENABLED_MASK[2:1] = '11b'
315 uint64 bv = getXfeature();
316 if ((bv & 6) == 6) {
317 if (ECX & (1U << 28)) type_ |= tAVX;
318 if (ECX & (1U << 12)) type_ |= tFMA;
319 if (((bv >> 5) & 7) == 7) {
320 getCpuidEx(7, 0, data);
321 if (EBX & (1U << 16)) type_ |= tAVX512F;
322 if (type_ & tAVX512F) {
323 if (EBX & (1U << 17)) type_ |= tAVX512DQ;
324 if (EBX & (1U << 21)) type_ |= tAVX512_IFMA;
325 if (EBX & (1U << 26)) type_ |= tAVX512PF;
326 if (EBX & (1U << 27)) type_ |= tAVX512ER;
327 if (EBX & (1U << 28)) type_ |= tAVX512CD;
328 if (EBX & (1U << 30)) type_ |= tAVX512BW;
329 if (EBX & (1U << 31)) type_ |= tAVX512VL;
330 if (ECX & (1U << 1)) type_ |= tAVX512_VBMI;
331 if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2;
332 if (ECX & (1U << 8)) type_ |= tGFNI;
333 if (ECX & (1U << 9)) type_ |= tVAES;
334 if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
335 if (ECX & (1U << 11)) type_ |= tAVX512_VNNI;
336 if (ECX & (1U << 12)) type_ |= tAVX512_BITALG;
337 if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
338 if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
339 if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
340 }
341 }
342 }
343 }
344 if (maxNum >= 7) {
345 getCpuidEx(7, 0, data);
346 if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
347 if (EBX & (1U << 3)) type_ |= tBMI1;
348 if (EBX & (1U << 8)) type_ |= tBMI2;
349 if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
350 if (EBX & (1U << 18)) type_ |= tRDSEED;
351 if (EBX & (1U << 19)) type_ |= tADX;
352 if (EBX & (1U << 20)) type_ |= tSMAP;
353 if (EBX & (1U << 4)) type_ |= tHLE;
354 if (EBX & (1U << 11)) type_ |= tRTM;
355 if (EBX & (1U << 14)) type_ |= tMPX;
356 if (EBX & (1U << 29)) type_ |= tSHA;
357 if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
358 }
359 setFamily();
360 setCacheHierarchy();
361 }
362 void putFamily() const
363 {
364 printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
366 printf("display:family=%X, model=%X\n", displayFamily, displayModel);
367 }
368 bool has(Type type) const
369 {
370 return (type & type_) != 0;
371 }
372};
373
374class Clock {
375public:
376 static inline uint64 getRdtsc()
377 {
378#ifdef _MSC_VER
379 return __rdtsc();
380#else
381 unsigned int eax, edx;
382 __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
383 return ((uint64)edx << 32) | eax;
384#endif
385 }
387 : clock_(0)
388 , count_(0)
389 {
390 }
391 void begin()
392 {
393 clock_ -= getRdtsc();
394 }
395 void end()
396 {
397 clock_ += getRdtsc();
398 count_++;
399 }
400 int getCount() const { return count_; }
401 uint64 getClock() const { return clock_; }
402 void clear() { count_ = 0; clock_ = 0; }
403private:
404 uint64 clock_;
405 int count_;
406};
407
408#ifdef XBYAK64
409const int UseRCX = 1 << 6;
410const int UseRDX = 1 << 7;
411
412class Pack {
413 static const size_t maxTblNum = 10;
414 const Xbyak::Reg64 *tbl_[maxTblNum];
415 size_t n_;
416public:
417 Pack() : tbl_(), n_(0) {}
418 Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); }
419 Pack(const Pack& rhs)
420 : n_(rhs.n_)
421 {
422 for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
423 }
424 Pack& operator=(const Pack& rhs)
425 {
426 n_ = rhs.n_;
427 for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
428 return *this;
429 }
430 Pack(const Xbyak::Reg64& t0)
431 { n_ = 1; tbl_[0] = &t0; }
432 Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
433 { n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
434 Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
435 { n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
436 Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
437 { n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
438 Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
439 { n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
440 Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
441 { n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
442 Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
443 { n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
444 Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
445 { n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
446 Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
447 { n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
448 Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
449 { n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
450 Pack& append(const Xbyak::Reg64& t)
451 {
452 if (n_ == maxTblNum) {
453 fprintf(stderr, "ERR Pack::can't append\n");
454 throw Error(ERR_BAD_PARAMETER);
455 }
456 tbl_[n_++] = &t;
457 return *this;
458 }
459 void init(const Xbyak::Reg64 *tbl, size_t n)
460 {
461 if (n > maxTblNum) {
462 fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
463 throw Error(ERR_BAD_PARAMETER);
464 }
465 n_ = n;
466 for (size_t i = 0; i < n; i++) {
467 tbl_[i] = &tbl[i];
468 }
469 }
470 const Xbyak::Reg64& operator[](size_t n) const
471 {
472 if (n >= n_) {
473 fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
474 throw Error(ERR_BAD_PARAMETER);
475 }
476 return *tbl_[n];
477 }
478 size_t size() const { return n_; }
479 /*
480 get tbl[pos, pos + num)
481 */
482 Pack sub(size_t pos, size_t num = size_t(-1)) const
483 {
484 if (num == size_t(-1)) num = n_ - pos;
485 if (pos + num > n_) {
486 fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
487 throw Error(ERR_BAD_PARAMETER);
488 }
489 Pack pack;
490 pack.n_ = num;
491 for (size_t i = 0; i < num; i++) {
492 pack.tbl_[i] = tbl_[pos + i];
493 }
494 return pack;
495 }
496 void put() const
497 {
498 for (size_t i = 0; i < n_; i++) {
499 printf("%s ", tbl_[i]->toString());
500 }
501 printf("\n");
502 }
503};
504
505class StackFrame {
506#ifdef XBYAK64_WIN
507 static const int noSaveNum = 6;
508 static const int rcxPos = 0;
509 static const int rdxPos = 1;
510#else
511 static const int noSaveNum = 8;
512 static const int rcxPos = 3;
513 static const int rdxPos = 2;
514#endif
516 int pNum_;
517 int tNum_;
518 bool useRcx_;
519 bool useRdx_;
520 int saveNum_;
521 int P_;
522 bool makeEpilog_;
523 Xbyak::Reg64 pTbl_[4];
524 Xbyak::Reg64 tTbl_[10];
525 Pack p_;
526 Pack t_;
527 StackFrame(const StackFrame&);
528 void operator=(const StackFrame&);
529public:
530 const Pack& p;
531 const Pack& t;
532 /*
533 make stack frame
534 @param sf [in] this
535 @param pNum [in] num of function parameter(0 <= pNum <= 4)
536 @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
537 @param stackSizeByte [in] local stack size
538 @param makeEpilog [in] automatically call close() if true
539
540 you can use
541 rax
542 gp0, ..., gp(pNum - 1)
543 gt0, ..., gt(tNum-1)
544 rcx if tNum & UseRCX
545 rdx if tNum & UseRDX
546 rsp[0..stackSizeByte - 1]
547 */
548 StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true)
549 : code_(code)
550 , pNum_(pNum)
551 , tNum_(tNum & ~(UseRCX | UseRDX))
552 , useRcx_((tNum & UseRCX) != 0)
553 , useRdx_((tNum & UseRDX) != 0)
554 , saveNum_(0)
555 , P_(0)
556 , makeEpilog_(makeEpilog)
557 , p(p_)
558 , t(t_)
559 {
560 using namespace Xbyak;
561 if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
562 const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
563 if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
564 const Reg64& _rsp = code->rsp;
565 const AddressFrame& _ptr = code->ptr;
566 saveNum_ = (std::max)(0, allRegNum - noSaveNum);
567 const int *tbl = getOrderTbl() + noSaveNum;
568 P_ = saveNum_ + (stackSizeByte + 7) / 8;
569 if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
570 P_ *= 8;
571 if (P_ > 0) code->sub(_rsp, P_);
572#ifdef XBYAK64_WIN
573 for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
574 code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
575 }
576 for (int i = 4; i < saveNum_; i++) {
577 code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
578 }
579#else
580 for (int i = 0; i < saveNum_; i++) {
581 code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
582 }
583#endif
584 int pos = 0;
585 for (int i = 0; i < pNum; i++) {
586 pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
587 }
588 for (int i = 0; i < tNum_; i++) {
589 tTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
590 }
591 if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx);
592 if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx);
593 p_.init(pTbl_, pNum);
594 t_.init(tTbl_, tNum_);
595 }
596 /*
597 make epilog manually
598 @param callRet [in] call ret() if true
599 */
600 void close(bool callRet = true)
601 {
602 using namespace Xbyak;
603 const Reg64& _rsp = code_->rsp;
604 const AddressFrame& _ptr = code_->ptr;
605 const int *tbl = getOrderTbl() + noSaveNum;
606#ifdef XBYAK64_WIN
607 for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
608 code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
609 }
610 for (int i = 4; i < saveNum_; i++) {
611 code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
612 }
613#else
614 for (int i = 0; i < saveNum_; i++) {
615 code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
616 }
617#endif
618 if (P_ > 0) code_->add(_rsp, P_);
619
620 if (callRet) code_->ret();
621 }
622 ~StackFrame()
623 {
624 if (!makeEpilog_) return;
625 try {
626 close();
627 } catch (std::exception& e) {
628 printf("ERR:StackFrame %s\n", e.what());
629 exit(1);
630 } catch (...) {
631 printf("ERR:StackFrame otherwise\n");
632 exit(1);
633 }
634 }
635private:
636 const int *getOrderTbl() const
637 {
638 using namespace Xbyak;
639 static const int tbl[] = {
640#ifdef XBYAK64_WIN
641 Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI,
642#else
643 Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11,
644#endif
645 Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15
646 };
647 return &tbl[0];
648 }
649 int getRegIdx(int& pos) const
650 {
651 assert(pos < 14);
652 using namespace Xbyak;
653 const int *tbl = getOrderTbl();
654 int r = tbl[pos++];
655 if (useRcx_) {
656 if (r == Operand::RCX) { return Operand::R10; }
657 if (r == Operand::R10) { r = tbl[pos++]; }
658 }
659 if (useRdx_) {
660 if (r == Operand::RDX) { return Operand::R11; }
661 if (r == Operand::R11) { return tbl[pos++]; }
662 }
663 return r;
664 }
665};
666#endif
667
668} } // end of util
669#endif
const mie::Vuint & p
Definition bn.cpp:27
const mie::Vuint & r
Definition bn.cpp:28
Error
Definition calc.cpp:23
void add(const Operand &op, uint32 imm)
Definition xbyak.h:6
void ret(int imm=0)
Definition xbyak.h:667
void mov(const Operand &reg1, const Operand &reg2)
Definition xbyak.h:2210
const AddressFrame ptr
Definition xbyak.h:2090
int getCount() const
Definition xbyak_util.h:400
static uint64 getRdtsc()
Definition xbyak_util.h:376
uint64 getClock() const
Definition xbyak_util.h:401
static const Type tMOVBE
Definition xbyak_util.h:240
static const Type tAVX512PF
Definition xbyak_util.h:245
static const Type tRDRAND
Definition xbyak_util.h:233
static const unsigned int maxNumberCacheLevels
Definition xbyak_util.h:152
static const Type tMPX
Definition xbyak_util.h:257
static const Type tBMI1
Definition xbyak_util.h:225
static const Type tPREFETCHW
Definition xbyak_util.h:255
unsigned int data_cache_size[maxNumberCacheLevels]
Definition xbyak_util.h:153
unsigned int cores_sharing_data_cache[maxNumberCacheLevels]
Definition xbyak_util.h:154
static const Type tSSE41
Definition xbyak_util.h:210
static const Type NONE
Definition xbyak_util.h:202
unsigned int getDataCacheLevels() const
Definition xbyak_util.h:157
static const Type tFMA
Definition xbyak_util.h:218
static const Type tSSE4a
Definition xbyak_util.h:222
static const Type tLZCNT
Definition xbyak_util.h:227
static void getCpuid(unsigned int eaxIn, unsigned int data[4])
Definition xbyak_util.h:172
bool has(Type type) const
Definition xbyak_util.h:368
unsigned int getCoresSharingDataCache(unsigned int i) const
Definition xbyak_util.h:158
static const Type tAVX512_VBMI
Definition xbyak_util.h:250
static void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
Definition xbyak_util.h:180
static const Type tRTM
Definition xbyak_util.h:238
static const Type tAVX512_VPOPCNTDQ
Definition xbyak_util.h:264
static const Type tAVX512_BITALG
Definition xbyak_util.h:263
static const Type tOSXSAVE
Definition xbyak_util.h:215
static const Type tAVX
Definition xbyak_util.h:217
static const Type tPCLMULQDQ
Definition xbyak_util.h:216
static const Type tBMI2
Definition xbyak_util.h:226
static const Type tSSE5
Definition xbyak_util.h:214
static const Type tSMAP
Definition xbyak_util.h:236
static const Type tPREFETCHWT1
Definition xbyak_util.h:254
void putFamily() const
Definition xbyak_util.h:362
static const Type tADX
Definition xbyak_util.h:234
static const Type tAVX512CD
Definition xbyak_util.h:247
static const Type tMMX2
Definition xbyak_util.h:204
static const Type t3DN
Definition xbyak_util.h:220
static const Type tVPCLMULQDQ
Definition xbyak_util.h:261
unsigned int data_cache_levels
Definition xbyak_util.h:155
static const Type tAVX512_4VNNIW
Definition xbyak_util.h:252
static const Type tAVX512_IFMA
Definition xbyak_util.h:243
static const Type tHLE
Definition xbyak_util.h:237
static const Type tAVX512BW
Definition xbyak_util.h:248
static const Type tAVX512_VNNI
Definition xbyak_util.h:262
static const Type tCMOV
Definition xbyak_util.h:205
static const Type tAVX512ER
Definition xbyak_util.h:246
static const Type tAESNI
Definition xbyak_util.h:213
static const Type tSHA
Definition xbyak_util.h:256
static const Type tSSE42
Definition xbyak_util.h:211
static uint64 getXfeature()
Definition xbyak_util.h:188
static const Type tAVX512IFMA
Definition xbyak_util.h:244
static const Type tSSSE3
Definition xbyak_util.h:209
static const Type tAMD
Definition xbyak_util.h:230
static const Type tRDSEED
Definition xbyak_util.h:235
static const Type tSSE3
Definition xbyak_util.h:208
static const Type tPOPCNT
Definition xbyak_util.h:212
static const Type tE3DN
Definition xbyak_util.h:221
static const Type tSSE2
Definition xbyak_util.h:207
static const Type tMMX
Definition xbyak_util.h:203
static const Type tGFNI
Definition xbyak_util.h:259
static const Type tRDTSCP
Definition xbyak_util.h:223
static const Type tAVX512VBMI
Definition xbyak_util.h:251
unsigned int getDataCacheSize(unsigned int i) const
Definition xbyak_util.h:163
static const Type tENHANCED_REP
Definition xbyak_util.h:232
static const Type tVAES
Definition xbyak_util.h:260
static const Type tAVX512DQ
Definition xbyak_util.h:242
static const Type tAVX512_VBMI2
Definition xbyak_util.h:258
static const Type tSSE
Definition xbyak_util.h:206
static const Type tF16C
Definition xbyak_util.h:239
static const Type tAVX512VL
Definition xbyak_util.h:249
static const Type tAVX2
Definition xbyak_util.h:224
static const Type tINTEL
Definition xbyak_util.h:229
static const Type tAVX512F
Definition xbyak_util.h:241
static const Type tAVX512_4FMAPS
Definition xbyak_util.h:253
const struct Ptn tbl[]
void put()
Definition gen_code.cpp:234
void close(T *e, websocketpp::connection_hdl hdl)
void init()
Definition lib_test.cpp:3
const uint64 EAX
Definition make_512.cpp:16
LOGGING_API void printf(Category category, const char *format,...)
Definition Logging.cpp:30
static const Reg32 esp(Operand::ESP)
static const Reg32 ecx(Operand::ECX)
static const Reg32 edx(Operand::EDX)
static const AddressFrame dword(32)
static const Reg32 ebx(Operand::EBX)
static const Reg32 esi(Operand::ESI)
Definition xbyak.h:104
@ ERR_BAD_PARAMETER
Definition xbyak.h:150
uint64_t uint64
Definition xbyak.h:117
void pack(instruction_stream *stream, uint32_t field)
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
void cpuid()
void sub(const Operand &op, uint32 imm)
#define __cpuid(eaxIn, a, b, c, d)
Definition xbyak_util.h:45
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d)
Definition xbyak_util.h:46
CK_RV ret