Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
zm.cpp
Go to the documentation of this file.
1#include "zm.h"
2#include <cstdio>
3
4using namespace mie;
5
6#ifdef MIE_USE_X64ASM
7 #define XBYAK_NO_OP_NAMES
8#include "xbyak/xbyak.h"
9using namespace Xbyak;
10#endif
16static inline bool in_addN(Unit *out, const Unit *x, const Unit *y, size_t n)
17{
18 assert(n > 0);
19 Unit c = 0;
20 for (size_t i = 0; i < n; i++) {
21 Unit xc = x[i] + c;
22 if (xc < c) {
23 // x[i] = Unit(-1) and c = 1
24 out[i] = y[i];
25 } else {
26 xc += y[i];
27 c = y[i] > xc ? 1 : 0;
28 out[i] = xc;
29 }
30 }
31 return c != 0;
32}
33
37static inline bool in_add(Unit *out, const Unit *x, size_t n, Unit y)
38{
39 assert(n > 0);
40 Unit xc = x[0] + y;
41 Unit c = y > xc ? 1 : 0;
42 out[0] = xc;
43 for (size_t i = 1; i < n; i++) {
44 Unit xc = x[i] + c;
45 if (xc < c) {
46 out[i] = 0;
47 } else {
48 out[i] = xc;
49 c = 0;
50 }
51 }
52 return c != 0;
53}
57static inline bool in_subN(Unit *out, const Unit *x, const Unit *y, size_t n)
58{
59 assert(n > 0);
60 Unit c = 0;
61 for (size_t i = 0; i < n; i++) {
62 Unit yc = y[i] + c;
63 if (yc < c) {
64 // y[i] = Unit(-1) and c = 1
65 out[i] = x[i];
66 } else {
67 c = x[i] < yc ? 1 : 0;
68 out[i] = x[i] - yc;
69 }
70 }
71 return c != 0;
72}
73
77static inline bool in_sub(Unit *out, const Unit *x, size_t n, Unit y)
78{
79 assert(n > 0);
80 Unit c = x[0] < y ? 1 : 0;
81 out[0] = x[0] - y;
82 for (size_t i = 1; i < n; i++) {
83 if (x[i] < c) {
84 out[i] = Unit(-1);
85 } else {
86 out[i] = x[i] - c;
87 c = 0;
88 }
89 }
90 return c != 0;
91}
92
93/*
94 [H:L] <= a * b
95 @return L
96*/
97static inline Unit mulUnit(Unit *H, Unit a, Unit b)
98{
99#ifdef MIE_USE_UNIT32
100#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
101 uint64_t t = __emulu(a, b);
102#else
103 uint64_t t = uint64_t(a) * b;
104#endif
105 uint32_t L;
106 split64(H, &L, t);
107 return L;
108#else
109#if defined(_WIN64) && !defined(__INTEL_COMPILER)
110 return _umul128(a, b, H);
111#else
112 typedef __attribute__((mode(TI))) unsigned int uint128;
113 uint128 t = uint128(a) * b;
114 *H = uint64_t(t >> 64);
115 return uint64_t(t);
116#endif
117#endif
118}
119
120/*
121 out[0..n + 1] = x[0..n] * y
122 @note accept out == x
123*/
124static inline void in_mul(Unit *out, const Unit *x, size_t n, Unit y)
125{
126 assert(n > 0);
127 Unit H = 0;
128 for (size_t i = 0; i < n; i++) {
129 Unit t = H;
130 Unit L = mulUnit(&H, x[i], y);
131 out[i] = t + L;
132 if (out[i] < t) {
133 H++;
134 }
135 }
136 out[n] = H;
137}
138
139/*
140 q = [H:L] / y
141 r = [H:L] % y
142 return q
143*/
144static inline Unit divUnit(Unit *r, Unit H, Unit L, Unit y)
145{
146#ifdef MIE_USE_UNIT32
147 uint64_t t = make64(H, L);
148 uint32_t q = uint32_t(t / y);
149 *r = Unit(t % y);
150 return q;
151#elif defined(_MSC_VER)
152 #pragma
153 fprintf(stderr, "not implemented divUnit\n");
154 exit(1);
155#else
156 typedef __attribute__((mode(TI))) unsigned int uint128;
157 uint128 t = (uint128(H) << 64) | L;
158 uint64_t q = uint64_t(t / y);
159 *r = Unit(t % y);
160 return q;
161#endif
162}
163
164/*
165 q = x[] / y
166 @retval r = x[] % y
167 @note accept q == x
168*/
169static inline Unit in_div(Unit *q, const Unit *x, size_t xn, Unit y)
170{
171 Unit r = 0;
172 for (int i = (int)xn - 1; i >= 0; i--) {
173 q[i] = divUnit(&r, r, x[i], y);
174 }
175 return r;
176}
177
178static inline Unit in_mod(const Unit *x, size_t xn, Unit y)
179{
180 Unit r = 0;
181 for (int i = (int)xn - 1; i >= 0; i--) {
182 divUnit(&r, r, x[i], y);
183 }
184 return r;
185}
186
187bool (*mie::local::PrimitiveFunction::addN)(Unit *out, const Unit *x, const Unit *y, size_t n) = &in_addN;
188bool (*mie::local::PrimitiveFunction::add1)(Unit *out, const Unit *x, size_t n, Unit y) = &in_add;
189bool (*mie::local::PrimitiveFunction::subN)(Unit *out, const Unit *x, const Unit *y, size_t n) = &in_subN;
190bool (*mie::local::PrimitiveFunction::sub1)(Unit *out, const Unit *x, size_t n, Unit y) = &in_sub;
191void (*mie::local::PrimitiveFunction::mul1)(Unit *out, const Unit *x, size_t n, Unit y) = &in_mul;
192Unit (*mie::local::PrimitiveFunction::div1)(Unit *q, const Unit *x, size_t n, Unit y) = &in_div;
193Unit (*mie::local::PrimitiveFunction::mod1)(const Unit *x, size_t n, Unit y) = &in_mod;
194
195#ifdef MIE_USE_X64ASM
196class Code : public Xbyak::CodeGenerator {
197 void genAddSub(bool isAdd)
198 {
199 using namespace Xbyak;
200 inLocalLabel();
201 const Reg64& a = rax;
202#ifdef XBYAK64_WIN
203 const Reg64& out = rcx;
204 const Reg64& x = rdx;
205 const Reg64& y = r8;
206 const Reg64& n = r9;
207 const Reg64& t0 = r10;
208 const Reg64& t1 = r11;
209 const Reg64& t2 = rsi;
210#else
211 const Reg64& out = rdi;
212 const Reg64& x = rsi;
213 const Reg64& y = rdx;
214 const Reg64& n = rcx;
215 const Reg64& t0 = r8;
216 const Reg64& t1 = r9;
217 const Reg64& t2 = r10;
218#endif
219 cmp(n, 4);
220 jge(".main", T_NEAR);
221 cmp(n, 1);
222 jne("@f");
223 // n == 1
224 mov(a, ptr [x]);
225 if (isAdd) {
226 add(a, ptr [y]);
227 } else {
228 sub(a, ptr [y]);
229 }
230 mov(ptr [out], a);
231 mov(a, 0);
232 setc(al);
233 ret();
234 L("@@");
235 cmp(n, 2);
236 jne("@f");
237 // n == 2
238 mov(a , ptr [x + 8 * 0]);
239 mov(t0, ptr [x + 8 * 1]);
240 if (isAdd) {
241 add(a , ptr [y + 8 * 0]);
242 adc(t0, ptr [y + 8 * 1]);
243 } else {
244 sub(a , ptr [y + 8 * 0]);
245 sbb(t0, ptr [y + 8 * 1]);
246 }
247 mov(ptr [out + 8 * 0], a);
248 mov(ptr [out + 8 * 1], t0);
249 mov(a, 0);
250 setc(al);
251 ret();
252 L("@@");
253 // n == 3
254 mov(a , ptr [x + 8 * 0]);
255 mov(t0, ptr [x + 8 * 1]);
256 mov(t1, ptr [x + 8 * 2]);
257 if (isAdd) {
258 add(a , ptr [y + 8 * 0]);
259 adc(t0, ptr [y + 8 * 1]);
260 adc(t1, ptr [y + 8 * 2]);
261 } else {
262 sub(a , ptr [y + 8 * 0]);
263 sbb(t0, ptr [y + 8 * 1]);
264 sbb(t1, ptr [y + 8 * 2]);
265 }
266 mov(ptr [out + 8 * 0], a);
267 mov(ptr [out + 8 * 1], t0);
268 mov(ptr [out + 8 * 2], t1);
269 mov(a, 0);
270 setc(al);
271 ret();
272 L(".main"); // n >= 4
273#ifdef XBYAK64_WIN
274 mov(ptr [rsp + 8 * 1], t2);
275#endif
276 mov(a, n);
277 shr(n, 2);
278 and_(a, 3);
279 jz(".lp");
280 cmp(a, 1);
281 jne("@f");
282 // 4x + 1
283 mov(a, ptr [x + 8 * 0]);
284 if (isAdd) {
285 add(a, ptr [y + 8 * 0]);
286 } else {
287 sub(a, ptr [y + 8 * 0]);
288 }
289 mov(ptr [out + 8 * 0], a);
290 lea(x, ptr [x + 8]);
291 lea(y, ptr [y + 8]);
292 lea(out, ptr [out + 8]);
293 jmp(".lp");
294 L("@@");
295 cmp(a, 2);
296 jne("@f");
297 // 4x + 2
298 mov(a , ptr [x + 8 * 0]);
299 mov(t0, ptr [x + 8 * 1]);
300 if (isAdd) {
301 add(a , ptr [y + 8 * 0]);
302 adc(t0, ptr [y + 8 * 1]);
303 } else {
304 sub(a , ptr [y + 8 * 0]);
305 sbb(t0, ptr [y + 8 * 1]);
306 }
307 mov(ptr [out + 8 * 0], a);
308 mov(ptr [out + 8 * 1], t0);
309 lea(x, ptr [x + 8 * 2]);
310 lea(y, ptr [y + 8 * 2]);
311 lea(out, ptr [out + 8 * 2]);
312 jmp(".lp");
313 L("@@");
314 // 4x + 3
315 mov(a , ptr [x + 8 * 0]);
316 mov(t0, ptr [x + 8 * 1]);
317 mov(t1, ptr [x + 8 * 2]);
318 if (isAdd) {
319 add(a , ptr [y + 8 * 0]);
320 adc(t0, ptr [y + 8 * 1]);
321 adc(t1, ptr [y + 8 * 2]);
322 } else {
323 sub(a , ptr [y + 8 * 0]);
324 sbb(t0, ptr [y + 8 * 1]);
325 sbb(t1, ptr [y + 8 * 2]);
326 }
327 mov(ptr [out + 8 * 0], a);
328 mov(ptr [out + 8 * 1], t0);
329 mov(ptr [out + 8 * 2], t1);
330 lea(x, ptr [x + 8 * 3]);
331 lea(y, ptr [y + 8 * 3]);
332 lea(out, ptr [out + 8 * 3]);
333 align(16);
334 L(".lp");
335 mov(a , ptr [x + 8 * 0]);
336 mov(t0, ptr [x + 8 * 1]);
337 mov(t1, ptr [x + 8 * 2]);
338 mov(t2, ptr [x + 8 * 3]);
339 if (isAdd) {
340 adc(a , ptr [y + 8 * 0]);
341 adc(t0, ptr [y + 8 * 1]);
342 adc(t1, ptr [y + 8 * 2]);
343 adc(t2, ptr [y + 8 * 3]);
344 } else {
345 sbb(a , ptr [y + 8 * 0]);
346 sbb(t0, ptr [y + 8 * 1]);
347 sbb(t1, ptr [y + 8 * 2]);
348 sbb(t2, ptr [y + 8 * 3]);
349 }
350 mov(ptr [out + 8 * 0], a);
351 mov(ptr [out + 8 * 1], t0);
352 mov(ptr [out + 8 * 2], t1);
353 mov(ptr [out + 8 * 3], t2);
354 lea(x, ptr [x + 8 * 4]);
355 lea(y, ptr [y + 8 * 4]);
356 lea(out, ptr [out + 8 * 4]);
357 dec(n);
358 jnz(".lp");
359 L(".exit");
360 mov(a, 0);
361 setc(al);
362#ifdef XBYAK64_WIN
363 mov(t2, ptr [rsp + 8 * 1]);
364#endif
365 ret();
367 }
368 // add1(Unit *out, const Unit *x, size_t n, Unit y);
369 void genAddSub1(bool isAdd)
370 {
371 using namespace Xbyak;
372 inLocalLabel();
373 const Reg64& a = rax;
374 const Reg64& c = rcx;
375#ifdef XBYAK64_WIN
376 mov(r10, c);
377 mov(c, r8); // n
378 const Reg64& out = r10;
379 const Reg64& x = rdx;
380 const Reg64& y = r9;
381 const Reg64& t = r11;
382#else
383 mov(r10, c);
384 mov(c, rdx); // n
385 const Reg64& out = rdi;
386 const Reg64& x = rsi;
387 const Reg64& y = r10;
388 const Reg64& t = r8;
389#endif
390 lea(out, ptr [out + c * 8]);
391 lea(x, ptr [x + c * 8]);
392 xor_(a, a);
393 neg(c);
394 mov(t, ptr [x + c * 8]);
395 if (isAdd) {
396 add(t, y);
397 } else {
398 sub(t, y);
399 }
400 mov(ptr [out + c * 8], t);
401 inc(c);
402 // faster on Core i3
403 jz(".exit");
404 L(".lp");
405 mov(t, ptr [x + c * 8]);
406 if (isAdd) {
407 adc(t, a);
408 } else {
409 sbb(t, a);
410 }
411 mov(ptr [out + c * 8], t);
412 inc(c);
413 jnz(".lp");
414 L(".exit");
415 setc(al);
416 ret();
418 }
419 void genMul()
420 {
421 using namespace Xbyak;
422 inLocalLabel();
423
424 // void in_mul(Unit *out, const Unit *x, size_t n, Unit y)
425
426 const Reg64& a = rax;
427 const Reg64& d = rdx;
428 const Reg64& t = r11;
429 mov(r10, rdx);
430
431#ifdef XBYAK64_WIN
432
433 const Reg64& out = rcx;
434 const Reg64& x = r10; // rdx
435 const Reg64& n = r8;
436 const Reg64& y = r9;
437#else
438 const Reg64& out = rdi;
439 const Reg64& x = rsi;
440 const Reg64& n = r10; // rdx
441 const Reg64& y = rcx;
442#endif
443 const int s = (int)sizeof(Unit);
444 xor_(d, d);
445 L(".lp");
446 mov(t, d);
447 mov(a, ptr [x]);
448 mul(y); // [d:a] = [x] * y
449 add(t, a);
450 adc(d, 0);
451 mov(ptr [out], t);
452 add(x, s);
453 add(out, s);
454 sub(n, 1);
455 jnz(".lp");
456 mov(ptr [out], d);
457
458 ret();
460 }
461 void genDiv()
462 {
463 using namespace Xbyak;
464 inLocalLabel();
465
466 // Unit in_div(Unit *q, const Unit *x, size_t xn, Unit y)
467 const Reg64& a = rax;
468 const Reg64& d = rdx;
469 mov(r10, rdx);
470
471#ifdef XBYAK64_WIN
472
473 const Reg64& q = rcx;
474 const Reg64& x = r10; // rdx
475 const Reg64& n = r8;
476 const Reg64& y = r9;
477#else
478 const Reg64& q = rdi;
479 const Reg64& x = rsi;
480 const Reg64& n = r10; // rdx
481 const Reg64& y = rcx;
482#endif
483 const int s = (int)sizeof(Unit);
484 lea(x, ptr [x + n * s - s]); // x = &x[xn - 1]
485 lea(q, ptr [q + n * s - s]); // q = &q[xn - 1]
486 xor_(d, d); // r = 0
487 L(".lp");
488 mov(a, ptr [x]);
489 div(y); // [d:a] / y = a ... d ; q = a, r = d
490 mov(ptr [q], a);
491 sub(x, s);
492 sub(q, s);
493 sub(n, 1);
494 jnz(".lp");
495 mov(a, d);
496 ret();
498 }
499 void genMod()
500 {
501 using namespace Xbyak;
502 inLocalLabel();
503
504 // Unit mod1(const Unit *x, size_t n, Unit y);
505 const Reg64& a = rax;
506 const Reg64& d = rdx;
507 mov(r10, rdx);
508
509#ifdef XBYAK64_WIN
510
511 const Reg64& x = rcx;
512 const Reg64& n = r10; // rdx
513 const Reg64& y = r8;
514#else
515 const Reg64& x = rdi;
516 const Reg64& n = rsi;
517 const Reg64& y = r10; // rdx
518#endif
519 const int s = (int)sizeof(Unit);
520 lea(x, ptr [x + n * s - s]); // x = &x[xn - 1]
521 xor_(d, d); // r = 0
522 L(".lp");
523 mov(a, ptr [x]);
524 div(y); // [d:a] / y = a ... d ; q = a, r = d
525 sub(x, s);
526 sub(n, 1);
527 jnz(".lp");
528 mov(a, d);
529
530 ret();
532 }
533public:
534 Code()
535 {
537 genAddSub(true);
538 align(16);
540 genAddSub1(true);
541 align(16);
543 genAddSub(false);
544 align(16);
546 genMul();
547 align(16);
549 genDiv();
550 align(16);
552 genMod();
553 }
554};
555#endif
556
558{
559#ifdef MIE_USE_X64ASM
560 static bool isInit = false;
561 if (isInit) return;
562 isInit = true;
563 try {
564 static Code code;
565 } catch (std::exception& e) {
566 fprintf(stderr, "zmInit ERR:%s\n", e.what());
567 exit(1);
568 }
569#endif
570}
const mie::Vuint & r
Definition bn.cpp:28
const uint8 * getCurr() const
Definition xbyak.h:908
void shr(const Operand &op, const Reg8 &_cl)
Definition xbyak.h:730
void and_(const Operand &op, uint32 imm)
Definition xbyak.h:21
void xor_(const Operand &op, uint32 imm)
Definition xbyak.h:1279
void cmp(const Operand &op, uint32 imm)
Definition xbyak.h:94
void setc(const Operand &op)
Definition xbyak.h:691
const Reg8 al
Definition xbyak.h:2089
void align(size_t x=16, bool useMultiByteNop=true)
Definition xbyak.h:2475
void jmp(const Operand &op)
Definition xbyak.h:2144
void jge(const Label &label, LabelType type=T_AUTO)
Definition xbyak.h:337
void inc(const Operand &op)
Definition xbyak.h:307
void jne(const Label &label, LabelType type=T_AUTO)
Definition xbyak.h:369
void add(const Operand &op, uint32 imm)
Definition xbyak.h:6
void sub(const Operand &op, uint32 imm)
Definition xbyak.h:746
void neg(const Operand &op)
Definition xbyak.h:504
void adc(const Operand &op, uint32 imm)
Definition xbyak.h:3
void jz(const Label &label, LabelType type=T_AUTO)
Definition xbyak.h:425
void div(const Operand &op)
Definition xbyak.h:163
void mov(const Operand &reg1, const Operand &reg2)
Definition xbyak.h:2210
void mul(const Operand &op)
Definition xbyak.h:497
void lea(const Reg &reg, const Address &addr)
Definition xbyak.h:432
void jnz(const Label &label, LabelType type=T_AUTO)
Definition xbyak.h:401
void L(const std::string &label)
Definition xbyak.h:2126
void dec(const Operand &op)
Definition xbyak.h:162
void sbb(const Operand &op, uint32 imm)
Definition xbyak.h:685
const AddressFrame ptr
Definition xbyak.h:2090
Definition xbyak.h:104
uint64_t y
Definition sha3.cpp:34
Definition zm.h:60
void zmInit()
Definition zm.cpp:557
uint32_t Unit
Definition zm.h:66
struct sysio::chain::eosvmoc::code_cache_header __attribute__((packed))
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
unsigned int uint32_t
Definition stdint.h:126
unsigned __int64 uint64_t
Definition stdint.h:136
Definition bench.cpp:18
Code()
Definition memfunc.cpp:19
static Unit(* div1)(Unit *q, const Unit *x, size_t n, Unit y)
Definition zm.h:218
static bool(* sub1)(Unit *out, const Unit *x, size_t n, Unit y)
Definition zm.h:209
static bool(* add1)(Unit *out, const Unit *x, size_t n, Unit y)
Definition zm.h:200
static void(* mul1)(Unit *out, const Unit *x, size_t n, Unit y)
Definition zm.h:211
static Unit(* mod1)(const Unit *x, size_t n, Unit y)
Definition zm.h:223
static bool(* subN)(Unit *out, const Unit *x, const Unit *y, size_t n)
Definition zm.h:205
static bool(* addN)(Unit *out, const Unit *x, const Unit *y, size_t n)
Definition zm.h:196
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
CK_ULONG d
CK_RV ret
char * s