Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
bench.cpp
Go to the documentation of this file.
1#include "zm.h"
2#ifndef XBYAK_NO_OP_NAMES
3 #define XBYAK_NO_OP_NAMES
4#endif
5#include <xbyak/xbyak.h>
6#include <xbyak/xbyak_util.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <memory.h>
10#include <iostream>
11
12#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
13
14using namespace Xbyak;
15
16const int innerN = 1;
17
19
20 void makeBench(int N, int mode)
21 {
22#ifdef XBYAK64_WIN
23 const Reg64& pz = rcx;
24 const Reg64& px = rdx;
25 const Reg64& py = r8;
26#else
27 const Reg64& pz = rdi;
28 const Reg64& px = rsi;
29 const Reg64& py = rdx;
30#endif
31 mov(r10, pz);
32 mov(r9, px);
33 mov(r8, py);
34 push(r12);
35 push(r13);
36
37 mov(ecx, N);
38 L(".lp");
39 for (int i = 0; i < innerN; i++) {
40 switch (mode) {
41 case 0:
42 mov(r10, ptr [px]);
43 mov(r11, ptr [px + 8]);
44 mov(r12, ptr [px + 16]);
45 mov(r13, ptr [px + 24]);
46 add(r10, r10);
47 adc(r8, r11);
48 adc(r9, r12);
49 adc(py, r13);
50 break;
51 case 1:
52 add(r10, ptr [px]);
53 adc(r8, ptr [px + 8]);
54 adc(r9, ptr [px + 16]);
55 adc(py, ptr [px + 24]);
56 break;
57 }
58 }
59 sub(ecx, 1);
60 jnz(".lp");
61 xor_(eax, eax);
62 pop(r13);
63 pop(r12);
64 ret();
65 }
66 /*
67 [t4:t3:t2:t1:t0] <- py[3:2:1:0] * x
68 */
69 void makeMul4x1(const Reg64& t4, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& py, const Reg64& x)
70 {
71 const Reg64& a = rax;
72 const Reg64& d = rdx;
73
74 mov(a, ptr [py]);
75 mul(x);
76 mov(t0, a);
77 mov(t1, d);
78 mov(a, ptr [py + 8]);
79 mul(x);
80 xor_(t2, t2);
81 add(t1, a);
82 adc(t2, d);
83 mov(a, ptr [py + 16]);
84 mul(x);
85 xor_(t3, t3);
86 add(t2, a);
87 adc(t3, d);
88 mov(a, ptr [py + 24]);
89 mul(x);
90 xor_(t4, t4);
91 add(t3, a);
92 adc(t4, d);
93 }
94};
95
96mie::Vuint Put(const uint64_t *x, size_t n)
97{
98 mie::Vuint t;
99 t.set(x, n);
100 std::cout << t << std::endl;
101 return t;
102}
103
104void bench(int mode)
105{
106 const int N = 100000;
107 Code code;
108 code.makeBench(N, mode);
109 int (*p)(uint64_t*, const uint64_t*, const uint64_t*) = code.getCode<int (*)(uint64_t*, const uint64_t*, const uint64_t*)>();
110
111 uint64_t a[4] = { uint64_t(-1), uint64_t(-2), uint64_t(-3), 544443221 };
112 uint64_t b[4] = { uint64_t(-123), uint64_t(-3), uint64_t(-4), 222222222 };
113 uint64_t c[5] = { 0, 0, 0, 0, 0 };
114
115 const int M = 100;
117 for (int i = 0; i < M; i++) {
118 clk.begin();
119 p(c, a, b);
120 clk.end();
121 }
122 printf("%.2fclk\n", clk.getClock() / double(M) / double(N) / innerN);
123}
124
126 Call(const void **p)
127 {
128 const void *f = (const void *)getCurr();
129 sub();
130 align(16);
131 *p = (const void*)getCurr();
132 mov(eax, 3);
133 call(f);
134 ret();
135 }
136 void sub()
137 {
138 add(eax, eax);
139 ret();
140 }
141};
142
143int main(int argc, char *argv[])
144{
145 argc--, argv++;
146 /*
147 Core i7
148 add : 8.0clk
149 mul1: 10.7clk
150 mul2: 17.5clk
151 */
152 try {
153 puts("test0");
154 bench(0);
155 puts("test1");
156 bench(1);
157 int (*f)();
158 Call call((const void**)&f);
159 printf("%d\n", f());
160 } catch (std::exception& e) {
161 fprintf(stderr, "ExpCode ERR:%s\n", e.what());
162 }
163}
mie::Vuint Put(const uint64_t *x, size_t n)
Definition bench.cpp:96
const int innerN
Definition bench.cpp:16
const mie::Vuint & p
Definition bn.cpp:27
const uint8 * getCurr() const
Definition xbyak.h:908
void call(const Operand &op)
Definition xbyak.h:2150
void xor_(const Operand &op, uint32 imm)
Definition xbyak.h:1279
void align(size_t x=16, bool useMultiByteNop=true)
Definition xbyak.h:2475
const Reg32 eax
Definition xbyak.h:2087
const Reg32 ecx
Definition xbyak.h:2087
void add(const Operand &op, uint32 imm)
Definition xbyak.h:6
void sub(const Operand &op, uint32 imm)
Definition xbyak.h:746
void adc(const Operand &op, uint32 imm)
Definition xbyak.h:3
void mov(const Operand &reg1, const Operand &reg2)
Definition xbyak.h:2210
void pop(const Operand &op)
Definition xbyak.h:2190
void mul(const Operand &op)
Definition xbyak.h:497
void jnz(const Label &label, LabelType type=T_AUTO)
Definition xbyak.h:401
void L(const std::string &label)
Definition xbyak.h:2126
void push(const Operand &op)
Definition xbyak.h:2189
const AddressFrame ptr
Definition xbyak.h:2090
uint64 getClock() const
Definition xbyak_util.h:401
char ** argv
Definition xbyak.h:104
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
fc::variant call(const std::string &url, const std::string &path, const T &v)
Definition main.cpp:258
const int N
Definition quantize.cpp:54
unsigned __int64 uint64_t
Definition stdint.h:136
void sub()
Definition bench.cpp:136
Call(const void **p)
Definition bench.cpp:126
Definition bench.cpp:18
void makeBench(int N, int mode)
Definition bench.cpp:20
void makeMul4x1(const Reg64 &t4, const Reg64 &t3, const Reg64 &t2, const Reg64 &t1, const Reg64 &t0, const Reg64 &py, const Reg64 &x)
Definition bench.cpp:69
void set(T x)
Definition zm.h:341
void bench()
Definition test_zm.cpp:848
Xbyak ; JIT assembler for x86(IA32)/x64 by C++.
CK_ULONG d
CK_RV ret