Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
x86_64.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <sysio/vm/allocator.hpp>
4#include <sysio/vm/exceptions.hpp>
5#include <sysio/vm/signals.hpp>
6#include <sysio/vm/softfloat.hpp>
7#include <sysio/vm/types.hpp>
8#include <sysio/vm/utils.hpp>
9
10#include <cassert>
11#include <cstddef>
12#include <cstdint>
13#include <cstring>
14#include <variant>
15#include <vector>
16#include <cpuid.h>
17
18
19namespace sysio { namespace vm {
20
21
22
23 // Random notes:
24 // - branch instructions return the address that will need to be updated
25 // - label instructions return the address of the target
26 // - fix_branch will be called when the branch target is resolved
27 // - It would make everything more efficient to make RAX always represent the top of
28 // the stack.
29 //
30 // - The base of memory is stored in rsi
31 //
32 // - FIXME: Factor the machine instructions into a separate assembler class.
33 template<typename Context>
35 public:
36 machine_code_writer(growable_allocator& alloc, std::size_t source_bytes, module& mod) :
37 _mod(mod), _code_segment_base(alloc.start_code()) {
38 const std::size_t code_size = 4 * 16; // 4 error handlers, each is 16 bytes.
39 _code_start = _mod.allocator.alloc<unsigned char>(code_size);
40 _code_end = _code_start + code_size;
41 code = _code_start;
42
43 // always emit these functions
44 fpe_handler = emit_error_handler(&on_fp_error);
45 call_indirect_handler = emit_error_handler(&on_call_indirect_error);
46 type_error_handler = emit_error_handler(&on_type_error);
47 stack_overflow_handler = emit_error_handler(&on_stack_overflow);
48
49 assert(code == _code_end); // verify that the manual instruction count is correct
50
51 // emit host functions
52 const uint32_t num_imported = mod.get_imported_functions_size();
53 const std::size_t host_functions_size = (40 + 10 * Context::async_backtrace()) * num_imported;
54 _code_start = _mod.allocator.alloc<unsigned char>(host_functions_size);
55 _code_end = _code_start + host_functions_size;
56 // code already set
57 for(uint32_t i = 0; i < num_imported; ++i) {
58 start_function(code, i);
59 emit_host_call(i);
60 }
61 assert(code == _code_end);
62
63 jmp_table = code;
64 if (_mod.tables.size() > 0) {
65 // Each function table entry consumes exactly 17 bytes (counted
66 // manually). The size must be constant, so that call_indirect
67 // can use random access
68 _table_element_size = 17;
69 const std::size_t table_size = _table_element_size*_mod.tables[0].table.size();
70 _code_start = _mod.allocator.alloc<unsigned char>(table_size);
71 _code_end = _code_start + table_size;
72 // code already set
73 for(uint32_t i = 0; i < _mod.tables[0].table.size(); ++i) {
74 uint32_t fn_idx = _mod.tables[0].table[i];
75 if (fn_idx < _mod.fast_functions.size()) {
76 // cmp _mod.fast_functions[fn_idx], %edx
77 emit_bytes(0x81, 0xfa);
78 emit_operand32(_mod.fast_functions[fn_idx]);
79 // je fn
80 emit_bytes(0x0f, 0x84);
81 register_call(emit_branch_target32(), fn_idx);
82 // jmp ERROR
83 emit_bytes(0xe9);
84 fix_branch(emit_branch_target32(), type_error_handler);
85 } else {
86 // jmp ERROR
87 emit_bytes(0xe9);
88 // default for out-of-range functions
89 fix_branch(emit_branch_target32(), call_indirect_handler);
90 // padding
91 emit_bytes(0xcc, 0xcc, 0xcc, 0xcc);
92 emit_bytes(0xcc, 0xcc, 0xcc, 0xcc);
93 emit_bytes(0xcc, 0xcc, 0xcc, 0xcc);
94 }
95 }
96 assert(code == _code_end);
97 }
98 }
99 ~machine_code_writer() { _mod.allocator.end_code<true>(_code_segment_base); }
100
101 static constexpr std::size_t max_prologue_size = 21;
102 static constexpr std::size_t max_epilogue_size = 10;
104 _ft = &_mod.types[_mod.functions[funcnum]];
105 // FIXME: This is not a tight upper bound
106 const std::size_t instruction_size_ratio_upper_bound = use_softfloat?(Context::async_backtrace()?63:49):79;
107 std::size_t code_size = max_prologue_size + _mod.code[funcnum].size * instruction_size_ratio_upper_bound + max_epilogue_size;
108 _code_start = _mod.allocator.alloc<unsigned char>(code_size);
109 _code_end = _code_start + code_size;
110 code = _code_start;
111 start_function(code, funcnum + _mod.get_imported_functions_size());
112 // pushq RBP
113 emit_bytes(0x55);
114 // movq RSP, RBP
115 emit_bytes(0x48, 0x89, 0xe5);
116 // No more than 2^32-1 locals. Already validated by the parser.
117 uint32_t count = 0;
118 for(uint32_t i = 0; i < locals.size(); ++i) {
119 assert(uint64_t(count) + locals[i].count <= 0xFFFFFFFFu);
120 count += locals[i].count;
121 }
122 _local_count = count;
123 if (_local_count > 0) {
124 // xor %rax, %rax
125 emit_bytes(0x48, 0x31, 0xc0);
126 if (_local_count > 14) { // only use a loop if it would save space
127 // mov $count, %ecx
128 emit_bytes(0xb9);
129 emit_operand32(_local_count);
130 // loop:
131 void* loop = code;
132 // pushq %rax
133 emit_bytes(0x50);
134 // dec %ecx
135 emit_bytes(0xff, 0xc9);
136 // jnz loop
137 emit_bytes(0x0f, 0x85);
138 fix_branch(emit_branch_target32(), loop);
139 } else {
140 for (uint32_t i = 0; i < _local_count; ++i) {
141 // pushq %rax
142 emit_bytes(0x50);
143 }
144 }
145 }
146 assert((char*)code <= (char*)_code_start + max_prologue_size);
147 }
149#ifndef NDEBUG
150 void * epilogue_start = code;
151#endif
152 if(ft.return_count != 0) {
153 // pop RAX
154 emit_bytes(0x58);
155 }
156 if (_local_count & 0xF0000000u) unimplemented();
157 emit_multipop(_local_count);
158 // popq RBP
159 emit_bytes(0x5d);
160 // retq
161 emit_bytes(0xc3);
162 assert((char*)code <= (char*)epilogue_start + max_epilogue_size);
163 }
164
166 auto icount = fixed_size_instr(16);
167 emit_error_handler(&on_unreachable);
168 }
169 void emit_nop() {}
170 void* emit_end() { return code; }
171 void* emit_return(uint32_t depth_change) {
172 // Return is defined as equivalent to branching to the outermost label
173 return emit_br(depth_change);
174 }
175 void emit_block() {}
176 void* emit_loop() { return code; }
177 void* emit_if() {
178 auto icount = fixed_size_instr(9);
179 // pop RAX
180 emit_bytes(0x58);
181 // test EAX, EAX
182 emit_bytes(0x85, 0xC0);
183 // jz DEST
184 emit_bytes(0x0F, 0x84);
185 return emit_branch_target32();
186 }
187 void* emit_else(void* if_loc) {
188 auto icount = fixed_size_instr(5);
189 void* result = emit_br(0);
190 fix_branch(if_loc, code);
191 return result;
192 }
193 void* emit_br(uint32_t depth_change) {
194 auto icount = variable_size_instr(5, 17);
195 // add RSP, depth_change * 8
196 emit_multipop(depth_change);
197 // jmp DEST
198 emit_bytes(0xe9);
199 return emit_branch_target32();
200 }
201 void* emit_br_if(uint32_t depth_change) {
202 auto icount = variable_size_instr(9, 26);
203 // pop RAX
204 emit_bytes(0x58);
205 // test EAX, EAX
206 emit_bytes(0x85, 0xC0);
207
208 if(depth_change == 0u || depth_change == 0x80000001u) {
209 // jnz DEST
210 emit_bytes(0x0F, 0x85);
211 return emit_branch_target32();
212 } else {
213 // jz SKIP
214 emit_bytes(0x0f, 0x84);
215 void* skip = emit_branch_target32();
216 // add depth_change*8, %rsp
217 emit_multipop(depth_change);
218 // jmp DEST
219 emit_bytes(0xe9);
220 void* result = emit_branch_target32();
221 // SKIP:
222 fix_branch(skip, code);
223 return result;
224 }
225 }
226
227 // Generate a binary search.
229 void* emit_case(uint32_t depth_change) {
230 while(true) {
231 assert(!stack.empty() && "The parser is supposed to handle the number of elements in br_table.");
232 auto [min, max, label] = stack.back();
233 stack.pop_back();
234 if (label) {
235 fix_branch(label, _this->code);
236 }
237 if (max - min > 1) {
238 // Emit a comparison to the midpoint of the current range
239 uint32_t mid = min + (max - min)/2;
240 // cmp i, %mid
241 _this->emit_bytes(0x3d);
242 _this->emit_operand32(mid);
243 // jae MID
244 _this->emit_bytes(0x0f, 0x83);
245 void* mid_label = _this->emit_branch_target32();
246 stack.push_back({mid,max,mid_label});
247 stack.push_back({min,mid,nullptr});
248 } else {
249 assert(min == static_cast<uint32_t>(_i));
250 _i++;
251 if (depth_change == 0u || depth_change == 0x80000001u) {
252 if(label) {
253 return label;
254 } else {
255 // jmp TARGET
256 _this->emit_bytes(0xe9);
257 return _this->emit_branch_target32();
258 }
259 } else {
260 // jne NEXT
261 _this->emit_multipop(depth_change);
262 // jmp TARGET
263 _this->emit_bytes(0xe9);
264 return _this->emit_branch_target32();
265 }
266 }
267 }
268
269 }
270 void* emit_default(uint32_t depth_change) {
271 void* result = emit_case(depth_change);
272 assert(stack.empty() && "unexpected default.");
273 return result;
274 }
276 int _i = 0;
277 struct stack_item {
280 void* branch_target = nullptr;
281 };
282 // stores a stack of ranges to be handled.
283 // the ranges are strictly contiguous and non-ovelapping, with
284 // the lower values at the back.
285 std::vector<stack_item> stack;
286 };
288 // pop %rax
289 emit_bytes(0x58);
290 // Increase the size by one to account for the default.
291 // The current algorithm handles this correctly, without
292 // any special cases.
293 return { this, 0, { {0, table_size+1, nullptr} } };
294 }
295
296 void register_call(void* ptr, uint32_t funcnum) {
297 auto& vec = _function_relocations;
298 if(funcnum >= vec.size()) vec.resize(funcnum + 1);
299 if(void** addr = std::get_if<void*>(&vec[funcnum])) {
300 fix_branch(ptr, *addr);
301 } else {
302 std::get<std::vector<void*>>(vec[funcnum]).push_back(ptr);
303 }
304 }
305 void start_function(void* func_start, uint32_t funcnum) {
306 auto& vec = _function_relocations;
307 if(funcnum >= vec.size()) vec.resize(funcnum + 1);
308 for(void* branch : std::get<std::vector<void*>>(vec[funcnum])) {
309 fix_branch(branch, func_start);
310 }
311 vec[funcnum] = func_start;
312 }
313
314 void emit_call(const func_type& ft, uint32_t funcnum) {
315 auto icount = variable_size_instr(15, 23);
316 emit_check_call_depth();
317 // callq TARGET
318 emit_bytes(0xe8);
319 void * branch = emit_branch_target32();
320 emit_multipop(ft.param_types.size());
321 register_call(branch, funcnum);
322 if(ft.return_count != 0)
323 // pushq %rax
324 emit_bytes(0x50);
325 emit_check_call_depth_end();
326 }
327
328 void emit_call_indirect(const func_type& ft, uint32_t functypeidx) {
329 auto icount = variable_size_instr(43, 51);
330 emit_check_call_depth();
331 auto& table = _mod.tables[0].table;
332 functypeidx = _mod.type_aliases[functypeidx];
333 // pop %rax
334 emit_bytes(0x58);
335 // cmp $size, %rax
336 emit_bytes(0x48, 0x3d);
337 emit_operand32(table.size());
338 // jae ERROR
339 emit_bytes(0x0f, 0x83);
340 fix_branch(emit_branch_target32(), call_indirect_handler);
341 // leaq table(%rip), %rdx
342 emit_bytes(0x48, 0x8d, 0x15);
343 fix_branch(emit_branch_target32(), jmp_table);
344 // imul $17, %eax, %eax
345 assert(_table_element_size <= 127); // must fit in 8-bit signed value for imul
346 emit_bytes(0x6b, 0xc0, _table_element_size);
347 // addq %rdx, %rax
348 emit_bytes(0x48, 0x01, 0xd0);
349 // mov $funtypeidx, %edx
350 emit_bytes(0xba);
351 emit_operand32(functypeidx);
352 // callq *%rax
353 emit_bytes(0xff, 0xd0);
354 emit_multipop(ft.param_types.size());
355 if(ft.return_count != 0)
356 // pushq %rax
357 emit_bytes(0x50);
358 emit_check_call_depth_end();
359 }
360
361 void emit_drop() {
362 // pop RAX
363 emit_bytes(0x58);
364 }
365
366 void emit_select() {
367 auto icount = fixed_size_instr(13);
368 // popq RAX
369 emit_bytes(0x58);
370 // popq RCX
371 emit_bytes(0x59);
372 // test EAX, EAX
373 emit_bytes(0x85, 0xc0);
374 // cmovnzq RCX, (RSP)
375 emit_bytes(0x48, 0x0f, 0x45, 0x0c, 0x24);
376 // movq (RSP), RCX
377 emit_bytes(0x48, 0x89, 0x0c, 0x24);
378 }
379
380 void emit_get_local(uint32_t local_idx) {
381 auto icount = fixed_size_instr(8);
382 // stack layout:
383 // param0 <----- %rbp + 8*(nparams + 1)
384 // param1
385 // param2
386 // ...
387 // paramN
388 // return address
389 // old %rbp <------ %rbp
390 // local0 <------ %rbp - 8
391 // local1
392 // ...
393 // localN
394 if (local_idx < _ft->param_types.size()) {
395 // mov 8*(local_idx)(%RBP), RAX
396 emit_bytes(0x48, 0x8b, 0x85);
397 emit_operand32(8 * (_ft->param_types.size() - local_idx + 1));
398 // push RAX
399 emit_bytes(0x50);
400 } else {
401 // mov -8*(local_idx+1)(%RBP), RAX
402 emit_bytes(0x48, 0x8b, 0x85);
403 emit_operand32(-8 * (local_idx - _ft->param_types.size() + 1));
404 // push RAX
405 emit_bytes(0x50);
406 }
407 }
408
409 void emit_set_local(uint32_t local_idx) {
410 auto icount = fixed_size_instr(8);
411 if (local_idx < _ft->param_types.size()) {
412 // pop RAX
413 emit_bytes(0x58);
414 // mov RAX, -8*local_idx(EBP)
415 emit_bytes(0x48, 0x89, 0x85);
416 emit_operand32(8 * (_ft->param_types.size() - local_idx + 1));
417 } else {
418 // pop RAX
419 emit_bytes(0x58);
420 // mov RAX, -8*local_idx(EBP)
421 emit_bytes(0x48, 0x89, 0x85);
422 emit_operand32(-8 * (local_idx - _ft->param_types.size() + 1));
423 }
424 }
425
426 void emit_tee_local(uint32_t local_idx) {
427 auto icount = fixed_size_instr(9);
428 if (local_idx < _ft->param_types.size()) {
429 // pop RAX
430 emit_bytes(0x58);
431 // push RAX
432 emit_bytes(0x50);
433 // mov RAX, -8*local_idx(EBP)
434 emit_bytes(0x48, 0x89, 0x85);
435 emit_operand32(8 * (_ft->param_types.size() - local_idx + 1));
436 } else {
437 // pop RAX
438 emit_bytes(0x58);
439 // push RAX
440 emit_bytes(0x50);
441 // mov RAX, -8*local_idx(EBP)
442 emit_bytes(0x48, 0x89, 0x85);
443 emit_operand32(-8 * (local_idx - _ft->param_types.size() + 1));
444 }
445 }
446
447 void emit_get_global(uint32_t globalidx) {
448 auto icount = variable_size_instr(13, 14);
449 auto& gl = _mod.globals[globalidx];
450 void *ptr = &gl.current.value;
451 switch(gl.type.content_type) {
452 case types::i32:
453 case types::f32:
454 // movabsq $ptr, %rax
455 emit_bytes(0x48, 0xb8);
456 emit_operand_ptr(ptr);
457 // movl (%rax), eax
458 emit_bytes(0x8b, 0x00);
459 // push %rax
460 emit_bytes(0x50);
461 break;
462 case types::i64:
463 case types::f64:
464 // movabsq $ptr, %rax
465 emit_bytes(0x48, 0xb8);
466 emit_operand_ptr(ptr);
467 // movl (%rax), %rax
468 emit_bytes(0x48, 0x8b, 0x00);
469 // push %rax
470 emit_bytes(0x50);
471 break;
472 }
473 }
474 void emit_set_global(uint32_t globalidx) {
475 auto icount = fixed_size_instr(14);
476 auto& gl = _mod.globals[globalidx];
477 void *ptr = &gl.current.value;
478 // popq %rcx
479 emit_bytes(0x59);
480 // movabsq $ptr, %rax
481 emit_bytes(0x48, 0xb8);
482 emit_operand_ptr(ptr);
483 // movq %rcx, (%rax)
484 emit_bytes(0x48, 0x89, 0x08);
485 }
486
487 void emit_i32_load(uint32_t /*alignment*/, uint32_t offset) {
488 auto icount = variable_size_instr(7, 15);
489 // movl (RAX), EAX
490 emit_load_impl(offset, 0x8b, 0x00);
491 }
492
493 void emit_i64_load(uint32_t /*alignment*/, uint32_t offset) {
494 auto icount = variable_size_instr(8, 16);
495 // movq (RAX), RAX
496 emit_load_impl(offset, 0x48, 0x8b, 0x00);
497 }
498
499 void emit_f32_load(uint32_t /*alignment*/, uint32_t offset) {
500 auto icount = variable_size_instr(7, 15);
501 // movl (RAX), EAX
502 emit_load_impl(offset, 0x8b, 0x00);
503 }
504
505 void emit_f64_load(uint32_t /*alignment*/, uint32_t offset) {
506 auto icount = variable_size_instr(8, 16);
507 // movq (RAX), RAX
508 emit_load_impl(offset, 0x48, 0x8b, 0x00);
509 }
510
511 void emit_i32_load8_s(uint32_t /*alignment*/, uint32_t offset) {
512 auto icount = variable_size_instr(8, 16);
513 // movsbl (RAX), EAX;
514 emit_load_impl(offset, 0x0F, 0xbe, 0x00);
515 }
516
517 void emit_i32_load16_s(uint32_t /*alignment*/, uint32_t offset) {
518 auto icount = variable_size_instr(8, 16);
519 // movswl (RAX), EAX;
520 emit_load_impl(offset, 0x0F, 0xbf, 0x00);
521 }
522
523 void emit_i32_load8_u(uint32_t /*alignment*/, uint32_t offset) {
524 auto icount = variable_size_instr(8, 16);
525 // movzbl (RAX), EAX;
526 emit_load_impl(offset, 0x0f, 0xb6, 0x00);
527 }
528
529 void emit_i32_load16_u(uint32_t /*alignment*/, uint32_t offset) {
530 auto icount = variable_size_instr(8, 16);
531 // movzwl (RAX), EAX;
532 emit_load_impl(offset, 0x0f, 0xb7, 0x00);
533 }
534
535 void emit_i64_load8_s(uint32_t /*alignment*/, uint32_t offset) {
536 auto icount = variable_size_instr(9, 17);
537 // movsbq (RAX), RAX;
538 emit_load_impl(offset, 0x48, 0x0F, 0xbe, 0x00);
539 }
540
541 void emit_i64_load16_s(uint32_t /*alignment*/, uint32_t offset) {
542 auto icount = variable_size_instr(9, 17);
543 // movswq (RAX), RAX;
544 emit_load_impl(offset, 0x48, 0x0F, 0xbf, 0x00);
545 }
546
547 void emit_i64_load32_s(uint32_t /*alignment*/, uint32_t offset) {
548 auto icount = variable_size_instr(8, 16);
549 // movslq (RAX), RAX
550 emit_load_impl(offset, 0x48, 0x63, 0x00);
551 }
552
553 void emit_i64_load8_u(uint32_t /*alignment*/, uint32_t offset) {
554 auto icount = variable_size_instr(8, 16);
555 // movzbl (RAX), EAX;
556 emit_load_impl(offset, 0x0f, 0xb6, 0x00);
557 }
558
559 void emit_i64_load16_u(uint32_t /*alignment*/, uint32_t offset) {
560 auto icount = variable_size_instr(8, 16);
561 // movzwl (RAX), EAX;
562 emit_load_impl(offset, 0x0f, 0xb7, 0x00);
563 }
564
565 void emit_i64_load32_u(uint32_t /*alignment*/, uint32_t offset) {
566 auto icount = variable_size_instr(7, 15);
567 // movl (RAX), EAX
568 emit_load_impl(offset, 0x8b, 0x00);
569 }
570
571 void emit_i32_store(uint32_t /*alignment*/, uint32_t offset) {
572 auto icount = variable_size_instr(7, 15);
573 // movl ECX, (RAX)
574 emit_store_impl(offset, 0x89, 0x08);
575 }
576
577 void emit_i64_store(uint32_t /*alignment*/, uint32_t offset) {
578 auto icount = variable_size_instr(8, 16);
579 // movl ECX, (RAX)
580 emit_store_impl(offset, 0x48, 0x89, 0x08);
581 }
582
583 void emit_f32_store(uint32_t /*alignment*/, uint32_t offset) {
584 auto icount = variable_size_instr(7, 15);
585 // movl ECX, (RAX)
586 emit_store_impl(offset, 0x89, 0x08);
587 }
588
589 void emit_f64_store(uint32_t /*alignment*/, uint32_t offset) {
590 auto icount = variable_size_instr(8, 16);
591 // movl ECX, (RAX)
592 emit_store_impl(offset, 0x48, 0x89, 0x08);
593 }
594
595 void emit_i32_store8(uint32_t /*alignment*/, uint32_t offset) {
596 auto icount = variable_size_instr(7, 15);
597 // movb CL, (RAX)
598 emit_store_impl(offset, 0x88, 0x08);
599 }
600
601 void emit_i32_store16(uint32_t /*alignment*/, uint32_t offset) {
602 auto icount = variable_size_instr(8, 16);
603 // movb CX, (RAX)
604 emit_store_impl(offset, 0x66, 0x89, 0x08);
605 }
606
607 void emit_i64_store8(uint32_t /*alignment*/, uint32_t offset) {
608 auto icount = variable_size_instr(7, 15);
609 // movb CL, (RAX)
610 emit_store_impl(offset, 0x88, 0x08);
611 }
612
613 void emit_i64_store16(uint32_t /*alignment*/, uint32_t offset) {
614 auto icount = variable_size_instr(8, 16);
615 // movb CX, (RAX)
616 emit_store_impl(offset, 0x66, 0x89, 0x08);
617 }
618
619 void emit_i64_store32(uint32_t /*alignment*/, uint32_t offset) {
620 auto icount = variable_size_instr(7, 15);
621 // movl ECX, (RAX)
622 emit_store_impl(offset, 0x89, 0x08);
623 }
624
626 auto icount = variable_size_instr(17, 35);
627 emit_setup_backtrace();
628 // pushq %rdi
629 emit_bytes(0x57);
630 // pushq %rsi
631 emit_bytes(0x56);
632 // movabsq $current_memory, %rax
633 emit_bytes(0x48, 0xb8);
634 emit_operand_ptr(&current_memory);
635 // call *%rax
636 emit_bytes(0xff, 0xd0);
637 // pop %rsi
638 emit_bytes(0x5e);
639 // pop %rdi
640 emit_bytes(0x5f);
641 emit_restore_backtrace();
642 // push %rax
643 emit_bytes(0x50);
644 }
646 auto icount = variable_size_instr(21, 39);
647 // popq %rax
648 emit_bytes(0x58);
649 emit_setup_backtrace();
650 // pushq %rdi
651 emit_bytes(0x57);
652 // pushq %rsi
653 emit_bytes(0x56);
654 // movq %rax, %rsi
655 emit_bytes(0x48, 0x89, 0xc6);
656 // movabsq $grow_memory, %rax
657 emit_bytes(0x48, 0xb8);
658 emit_operand_ptr(&grow_memory);
659 // call *%rax
660 emit_bytes(0xff, 0xd0);
661 // pop %rsi
662 emit_bytes(0x5e);
663 // pop %rdi
664 emit_bytes(0x5f);
665 emit_restore_backtrace();
666 // push %rax
667 emit_bytes(0x50);
668 }
669
671 auto icount = fixed_size_instr(6);
672 // mov $value, %eax
673 emit_bytes(0xb8);
674 emit_operand32(value);
675 // push %rax
676 emit_bytes(0x50);
677 }
678
680 auto icount = fixed_size_instr(11);
681 // movabsq $value, %rax
682 emit_bytes(0x48, 0xb8);
683 emit_operand64(value);
684 // push %rax
685 emit_bytes(0x50);
686 }
687
688 void emit_f32_const(float value) {
689 auto icount = fixed_size_instr(6);
690 // mov $value, %eax
691 emit_bytes(0xb8);
692 emit_operandf32(value);
693 // push %rax
694 emit_bytes(0x50);
695 }
696 void emit_f64_const(double value) {
697 auto icount = fixed_size_instr(11);
698 // movabsq $value, %rax
699 emit_bytes(0x48, 0xb8);
700 emit_operandf64(value);
701 // push %rax
702 emit_bytes(0x50);
703 }
704
706 auto icount = fixed_size_instr(10);
707 // pop %rax
708 emit_bytes(0x58);
709 // xor %rcx, %rcx
710 emit_bytes(0x48, 0x31, 0xc9);
711 // test %eax, %eax
712 emit_bytes(0x85, 0xc0);
713 // setz %cl
714 emit_bytes(0x0f, 0x94, 0xc1);
715 // push %rcx
716 emit_bytes(0x51);
717 }
718
719 // i32 relops
720 void emit_i32_eq() {
721 auto icount = fixed_size_instr(11);
722 // sete %dl
723 emit_i32_relop(0x94);
724 }
725
726 void emit_i32_ne() {
727 auto icount = fixed_size_instr(11);
728 // sete %dl
729 emit_i32_relop(0x95);
730 }
731
733 auto icount = fixed_size_instr(11);
734 // setl %dl
735 emit_i32_relop(0x9c);
736 }
737
739 auto icount = fixed_size_instr(11);
740 // setl %dl
741 emit_i32_relop(0x92);
742 }
743
745 auto icount = fixed_size_instr(11);
746 // setg %dl
747 emit_i32_relop(0x9f);
748 }
749
751 auto icount = fixed_size_instr(11);
752 // seta %dl
753 emit_i32_relop(0x97);
754 }
755
757 auto icount = fixed_size_instr(11);
758 // setle %dl
759 emit_i32_relop(0x9e);
760 }
761
763 auto icount = fixed_size_instr(11);
764 // setbe %dl
765 emit_i32_relop(0x96);
766 }
767
769 auto icount = fixed_size_instr(11);
770 // setge %dl
771 emit_i32_relop(0x9d);
772 }
773
775 auto icount = fixed_size_instr(11);
776 // setae %dl
777 emit_i32_relop(0x93);
778 }
779
781 auto icount = fixed_size_instr(11);
782 // pop %rax
783 emit_bytes(0x58);
784 // xor %rcx, %rcx
785 emit_bytes(0x48, 0x31, 0xc9);
786 // test %rax, %rax
787 emit_bytes(0x48, 0x85, 0xc0);
788 // setz %cl
789 emit_bytes(0x0f, 0x94, 0xc1);
790 // push %rcx
791 emit_bytes(0x51);
792 }
793 // i64 relops
794 void emit_i64_eq() {
795 auto icount = fixed_size_instr(12);
796 // sete %dl
797 emit_i64_relop(0x94);
798 }
799
800 void emit_i64_ne() {
801 auto icount = fixed_size_instr(12);
802 // sete %dl
803 emit_i64_relop(0x95);
804 }
805
807 auto icount = fixed_size_instr(12);
808 // setl %dl
809 emit_i64_relop(0x9c);
810 }
811
813 auto icount = fixed_size_instr(12);
814 // setl %dl
815 emit_i64_relop(0x92);
816 }
817
819 auto icount = fixed_size_instr(12);
820 // setg %dl
821 emit_i64_relop(0x9f);
822 }
823
825 auto icount = fixed_size_instr(12);
826 // seta %dl
827 emit_i64_relop(0x97);
828 }
829
831 auto icount = fixed_size_instr(12);
832 // setle %dl
833 emit_i64_relop(0x9e);
834 }
835
837 auto icount = fixed_size_instr(12);
838 // setbe %dl
839 emit_i64_relop(0x96);
840 }
841
843 auto icount = fixed_size_instr(12);
844 // setge %dl
845 emit_i64_relop(0x9d);
846 }
847
849 auto icount = fixed_size_instr(12);
850 // setae %dl
851 emit_i64_relop(0x93);
852 }
853
854#ifdef SYS_VM_SOFTFLOAT
855 // Make sure that the result doesn't contain any garbage bits in rax
856 static uint64_t adapt_result(bool val) {
857 return val?1:0;
858 }
859 static uint64_t adapt_result(float32_t val) {
860 uint64_t result = 0;
861 std::memcpy(&result, &val, sizeof(float32_t));
862 return result;
863 }
864 static float64_t adapt_result(float64_t val) {
865 return val;
866 }
867
868 template<auto F>
869 static auto adapt_f32_unop(float32_t arg) {
870 return ::to_softfloat32(static_cast<decltype(F)>(F)(::from_softfloat32(arg)));
871 }
872 template<auto F>
873 static auto adapt_f32_binop(float32_t lhs, float32_t rhs) {
874 return ::to_softfloat32(static_cast<decltype(F)>(F)(::from_softfloat32(lhs), ::from_softfloat32(rhs)));
875 }
876 template<auto F>
877 static auto adapt_f32_cmp(float32_t lhs, float32_t rhs) {
878 return adapt_result(static_cast<decltype(F)>(F)(::from_softfloat32(lhs), ::from_softfloat32(rhs)));
879 }
880
881 template<auto F>
882 static auto adapt_f64_unop(float64_t arg) {
883 return ::to_softfloat64(static_cast<decltype(F)>(F)(::from_softfloat64(arg)));
884 }
885 template<auto F>
886 static auto adapt_f64_binop(float64_t lhs, float64_t rhs) {
887 return ::to_softfloat64(static_cast<decltype(F)>(F)(::from_softfloat64(lhs), ::from_softfloat64(rhs)));
888 }
889 template<auto F>
890 static auto adapt_f64_cmp(float64_t lhs, float64_t rhs) {
891 return adapt_result(static_cast<decltype(F)>(F)(::from_softfloat64(lhs), ::from_softfloat64(rhs)));
892 }
893
894 static float32_t to_softfloat(float arg) { return ::to_softfloat32(arg); }
895 static float64_t to_softfloat(double arg) { return ::to_softfloat64(arg); }
896 template<typename T>
897 static T to_softfloat(T arg) { return arg; }
898 static float from_softfloat(float32_t arg) { return ::from_softfloat32(arg); }
899 static double from_softfloat(float64_t arg) { return ::from_softfloat64(arg); }
900 template<typename T>
901 static T from_softfloat(T arg) { return arg; }
902
903 template<typename T>
904 using softfloat_arg_t = decltype(to_softfloat(T{}));
905
906 template<auto F, typename T>
907 static auto adapt_float_convert(softfloat_arg_t<T> arg) {
908 auto result = to_softfloat(F(from_softfloat(arg)));
909 if constexpr (sizeof(result) == 4 && sizeof(T) == 8) {
910 uint64_t buffer = 0;
911 std::memcpy(&buffer, &result, sizeof(result));
912 return buffer;
913 } else {
914 return result;
915 }
916 }
917
918 template<auto F, typename R, typename T>
919 static constexpr auto choose_unop(R(*)(T)) {
920 if constexpr(sizeof(R) == 4 && sizeof(T) == 8) {
921 return static_cast<uint64_t(*)(softfloat_arg_t<T>)>(&adapt_float_convert<F, T>);
922 } else {
923 return static_cast<softfloat_arg_t<R>(*)(softfloat_arg_t<T>)>(&adapt_float_convert<F, T>);
924 }
925 }
926
927 // HACK: avoid linking to softfloat if we aren't using it
928 // and also avoid passing arguments in floating point registers,
929 // since softfloat uses integer registers.
930 template<auto F>
931 constexpr auto choose_fn() {
932 if constexpr (use_softfloat) {
933 if constexpr (std::is_same_v<decltype(F), float(*)(float)>) {
934 return &adapt_f32_unop<F>;
935 } else if constexpr(std::is_same_v<decltype(F), float(*)(float,float)>) {
936 return &adapt_f32_binop<F>;
937 } else if constexpr(std::is_same_v<decltype(F), bool(*)(float,float)>) {
938 return &adapt_f32_cmp<F>;
939 } else if constexpr (std::is_same_v<decltype(F), double(*)(double)>) {
940 return &adapt_f64_unop<F>;
941 } else if constexpr(std::is_same_v<decltype(F), double(*)(double,double)>) {
942 return &adapt_f64_binop<F>;
943 } else if constexpr(std::is_same_v<decltype(F), bool(*)(double,double)>) {
944 return &adapt_f64_cmp<F>;
945 } else {
946 return choose_unop<F>(F);
947 }
948 } else {
949 return nullptr;
950 }
951 }
952
953 template<auto F, typename R, typename... A>
954 static R softfloat_trap_fn(A... a) {
955 R result;
957 result = F(a...);
958 });
959 return result;
960 }
961
962 template<auto F, typename R, typename... A>
963 static constexpr auto make_softfloat_trap_fn(R(*)(A...)) -> R(*)(A...) {
964 return softfloat_trap_fn<F, R, A...>;
965 }
966
967 template<auto F>
968 static constexpr decltype(auto) softfloat_trap() {
969 return *make_softfloat_trap_fn<F>(F);
970 }
971
972 #define CHOOSE_FN(name) choose_fn<&name>()
973#else
974 using float32_t = float;
975 using float64_t = double;
976 #define CHOOSE_FN(name) nullptr
977#endif
978
979 // --------------- f32 relops ----------------------
980 void emit_f32_eq() {
981 auto icount = softfloat_instr(25,45,59);
982 emit_f32_relop(0x00, CHOOSE_FN(_sysio_f32_eq), false, false);
983 }
984
985 void emit_f32_ne() {
986 auto icount = softfloat_instr(24,47,61);
987 emit_f32_relop(0x00, CHOOSE_FN(_sysio_f32_eq), false, true);
988 }
989
990 void emit_f32_lt() {
991 auto icount = softfloat_instr(25,45,59);
992 emit_f32_relop(0x01, CHOOSE_FN(_sysio_f32_lt), false, false);
993 }
994
995 void emit_f32_gt() {
996 auto icount = softfloat_instr(25,45,59);
997 emit_f32_relop(0x01, CHOOSE_FN(_sysio_f32_lt), true, false);
998 }
999
1001 auto icount = softfloat_instr(25,45,59);
1002 emit_f32_relop(0x02, CHOOSE_FN(_sysio_f32_le), false, false);
1003 }
1004
1006 auto icount = softfloat_instr(25,45,59);
1007 emit_f32_relop(0x02, CHOOSE_FN(_sysio_f32_le), true, false);
1008 }
1009
1010 // --------------- f64 relops ----------------------
1012 auto icount = softfloat_instr(25,47,61);
1013 emit_f64_relop(0x00, CHOOSE_FN(_sysio_f64_eq), false, false);
1014 }
1015
1017 auto icount = softfloat_instr(24,49,63);
1018 emit_f64_relop(0x00, CHOOSE_FN(_sysio_f64_eq), false, true);
1019 }
1020
1022 auto icount = softfloat_instr(25,47,61);
1023 emit_f64_relop(0x01, CHOOSE_FN(_sysio_f64_lt), false, false);
1024 }
1025
1027 auto icount = softfloat_instr(25,47,61);
1028 emit_f64_relop(0x01, CHOOSE_FN(_sysio_f64_lt), true, false);
1029 }
1030
1032 auto icount = softfloat_instr(25,47,61);
1033 emit_f64_relop(0x02, CHOOSE_FN(_sysio_f64_le), false, false);
1034 }
1035
1037 auto icount = softfloat_instr(25,47,61);
1038 emit_f64_relop(0x02, CHOOSE_FN(_sysio_f64_le), true, false);
1039 }
1040
1041 // --------------- i32 unops ----------------------
1042
1044 unsigned a, b, c, d;
1045 return __get_cpuid_count(7, 0, &a, &b, &c, &d) && (b & bit_BMI) &&
1046 __get_cpuid(0x80000001, &a, &b, &c, &d) && (c & bit_LZCNT);
1047 }
1048
1049 bool has_tzcnt() {
1050 static bool result = has_tzcnt_impl();
1051 return result;
1052 }
1053
1055 auto icount = fixed_size_instr(has_tzcnt()?6:18);
1056 if(!has_tzcnt()) {
1057 // pop %rax
1058 emit_bytes(0x58);
1059 // mov $-1, %ecx
1060 emit_bytes(0xb9, 0xff, 0xff, 0xff, 0xff);
1061 // bsr %eax, %eax
1062 emit_bytes(0x0f, 0xbd, 0xc0);
1063 // cmovz %ecx, %eax
1064 emit_bytes(0x0f, 0x44, 0xc1);
1065 // sub $31, %eax
1066 emit_bytes(0x83, 0xe8, 0x1f);
1067 // neg %eax
1068 emit_bytes(0xf7, 0xd8);
1069 // push %rax
1070 emit_bytes(0x50);
1071 } else {
1072 // popq %rax
1073 emit_bytes(0x58);
1074 // lzcntl %eax, %eax
1075 emit_bytes(0xf3, 0x0f, 0xbd, 0xc0);
1076 // pushq %rax
1077 emit_bytes(0x50);
1078 }
1079 }
1080
1082 auto icount = fixed_size_instr(has_tzcnt()?6:13);
1083 if(!has_tzcnt()) {
1084 // pop %rax
1085 emit_bytes(0x58);
1086 // mov $32, %ecx
1087 emit_bytes(0xb9, 0x20, 0x00, 0x00, 0x00);
1088 // bsf %eax, %eax
1089 emit_bytes(0x0f, 0xbc, 0xc0);
1090 // cmovz %ecx, %eax
1091 emit_bytes(0x0f, 0x44, 0xc1);
1092 // push %rax
1093 emit_bytes(0x50);
1094 } else {
1095 // popq %rax
1096 emit_bytes(0x58);
1097 // tzcntl %eax, %eax
1098 emit_bytes(0xf3, 0x0f, 0xbc, 0xc0);
1099 // pushq %rax
1100 emit_bytes(0x50);
1101 }
1102 }
1103
1105 auto icount = fixed_size_instr(6);
1106 // popq %rax
1107 emit_bytes(0x58);
1108 // popcntl %eax, %eax
1109 emit_bytes(0xf3, 0x0f, 0xb8, 0xc0);
1110 // pushq %rax
1111 emit_bytes(0x50);
1112 }
1113
1114 // --------------- i32 binops ----------------------
1115
1117 auto icount = fixed_size_instr(5);
1118 emit_i32_binop(0x01, 0xc8, 0x50);
1119 }
1121 auto icount = fixed_size_instr(5);
1122 emit_i32_binop(0x29, 0xc8, 0x50);
1123 }
1125 auto icount = fixed_size_instr(6);
1126 emit_i32_binop(0x0f, 0xaf, 0xc1, 0x50);
1127 }
1128 // cdq; idiv %ecx; pushq %rax
1130 auto icount = fixed_size_instr(6);
1131 emit_i32_binop(0x99, 0xf7, 0xf9, 0x50);
1132 }
1134 auto icount = fixed_size_instr(7);
1135 emit_i32_binop(0x31, 0xd2, 0xf7, 0xf1, 0x50);
1136 }
1138 auto icount = fixed_size_instr(22);
1139 // pop %rcx
1140 emit_bytes(0x59);
1141 // pop %rax
1142 emit_bytes(0x58);
1143 // cmp $-1, %edx
1144 emit_bytes(0x83, 0xf9, 0xff);
1145 // je MINUS1
1146 emit_bytes(0x0f, 0x84);
1147 void* minus1 = emit_branch_target32();
1148 // cdq
1149 emit_bytes(0x99);
1150 // idiv %ecx
1151 emit_bytes(0xf7, 0xf9);
1152 // jmp END
1153 emit_bytes(0xe9);
1154 void* end = emit_branch_target32();
1155 // MINUS1:
1156 fix_branch(minus1, code);
1157 // xor %edx, %edx
1158 emit_bytes(0x31, 0xd2);
1159 // END:
1160 fix_branch(end, code);
1161 // push %rdx
1162 emit_bytes(0x52);
1163 }
1165 auto icount = fixed_size_instr(7);
1166 emit_i32_binop(0x31, 0xd2, 0xf7, 0xf1, 0x52);
1167 }
1169 auto icount = fixed_size_instr(5);
1170 emit_i32_binop(0x21, 0xc8, 0x50);
1171 }
1173 auto icount = fixed_size_instr(5);
1174 emit_i32_binop(0x09, 0xc8, 0x50);
1175 }
1177 auto icount = fixed_size_instr(5);
1178 emit_i32_binop(0x31, 0xc8, 0x50);
1179 }
1181 auto icount = fixed_size_instr(5);
1182 emit_i32_binop(0xd3, 0xe0, 0x50);
1183 }
1185 auto icount = fixed_size_instr(5);
1186 emit_i32_binop(0xd3, 0xf8, 0x50);
1187 }
1189 auto icount = fixed_size_instr(5);
1190 emit_i32_binop(0xd3, 0xe8, 0x50);
1191 }
1193 auto icount = fixed_size_instr(5);
1194 emit_i32_binop(0xd3, 0xc0, 0x50);
1195 }
1197 auto icount = fixed_size_instr(5);
1198 emit_i32_binop(0xd3, 0xc8, 0x50);
1199 }
1200
1201 // --------------- i64 unops ----------------------
1202
1204 auto icount = fixed_size_instr(has_tzcnt()?7:24);
1205 if(!has_tzcnt()) {
1206 // pop %rax
1207 emit_bytes(0x58);
1208 // mov $-1, %ecx
1209 emit_bytes(0x48, 0xc7, 0xc1, 0xff, 0xff, 0xff, 0xff);
1210 // bsr %eax, %eax
1211 emit_bytes(0x48, 0x0f, 0xbd, 0xc0);
1212 // cmovz %ecx, %eax
1213 emit_bytes(0x48, 0x0f, 0x44, 0xc1);
1214 // sub $63, %eax
1215 emit_bytes(0x48, 0x83, 0xe8, 0x3f);
1216 // neg %eax
1217 emit_bytes(0x48, 0xf7, 0xd8);
1218 // push %rax
1219 emit_bytes(0x50);
1220 } else {
1221 // popq %rax
1222 emit_bytes(0x58);
1223 // lzcntq %eax, %eax
1224 emit_bytes(0xf3, 0x48, 0x0f, 0xbd, 0xc0);
1225 // pushq %rax
1226 emit_bytes(0x50);
1227 }
1228 }
1229
1231 auto icount = fixed_size_instr(has_tzcnt()?7:17);
1232 if(!has_tzcnt()) {
1233 // pop %rax
1234 emit_bytes(0x58);
1235 // mov $64, %ecx
1236 emit_bytes(0x48, 0xc7, 0xc1, 0x40, 0x00, 0x00, 0x00);
1237 // bsf %eax, %eax
1238 emit_bytes(0x48, 0x0f, 0xbc, 0xc0);
1239 // cmovz %ecx, %eax
1240 emit_bytes(0x48, 0x0f, 0x44, 0xc1);
1241 // push %rax
1242 emit_bytes(0x50);
1243 } else {
1244 // popq %rax
1245 emit_bytes(0x58);
1246 // tzcntq %eax, %eax
1247 emit_bytes(0xf3, 0x48, 0x0f, 0xbc, 0xc0);
1248 // pushq %rax
1249 emit_bytes(0x50);
1250 }
1251 }
1252
1254 auto icount = fixed_size_instr(7);
1255 // popq %rax
1256 emit_bytes(0x58);
1257 // popcntq %rax, %rax
1258 emit_bytes(0xf3, 0x48, 0x0f, 0xb8, 0xc0);
1259 // pushq %rax
1260 emit_bytes(0x50);
1261 }
1262
1263 // --------------- i64 binops ----------------------
1264
1266 auto icount = fixed_size_instr(6);
1267 emit_i64_binop(0x48, 0x01, 0xc8, 0x50);
1268 }
1270 auto icount = fixed_size_instr(6);
1271 emit_i64_binop(0x48, 0x29, 0xc8, 0x50);
1272 }
1274 auto icount = fixed_size_instr(7);
1275 emit_i64_binop(0x48, 0x0f, 0xaf, 0xc1, 0x50);
1276 }
1277 // cdq; idiv %rcx; pushq %rax
1279 auto icount = fixed_size_instr(8);
1280 emit_i64_binop(0x48, 0x99, 0x48, 0xf7, 0xf9, 0x50);
1281 }
1283 auto icount = fixed_size_instr(9);
1284 emit_i64_binop(0x48, 0x31, 0xd2, 0x48, 0xf7, 0xf1, 0x50);
1285 }
1287 auto icount = fixed_size_instr(25);
1288 // pop %rcx
1289 emit_bytes(0x59);
1290 // pop %rax
1291 emit_bytes(0x58);
1292 // cmp $-1, %rcx
1293 emit_bytes(0x48, 0x83, 0xf9, 0xff);
1294 // je MINUS1
1295 emit_bytes(0x0f, 0x84);
1296 void* minus1 = emit_branch_target32();
1297 // cqo
1298 emit_bytes(0x48, 0x99);
1299 // idiv %rcx
1300 emit_bytes(0x48, 0xf7, 0xf9);
1301 // jmp END
1302 emit_bytes(0xe9);
1303 void* end = emit_branch_target32();
1304 // MINUS1:
1305 fix_branch(minus1, code);
1306 // xor %edx, %edx
1307 emit_bytes(0x31, 0xd2);
1308 // END:
1309 fix_branch(end, code);
1310 // push %rdx
1311 emit_bytes(0x52);
1312 }
1314 auto icount = fixed_size_instr(9);
1315 emit_i64_binop(0x48, 0x31, 0xd2, 0x48, 0xf7, 0xf1, 0x52);
1316 }
1318 auto icount = fixed_size_instr(6);
1319 emit_i64_binop(0x48, 0x21, 0xc8, 0x50);
1320 }
1322 auto icount = fixed_size_instr(6);
1323 emit_i64_binop(0x48, 0x09, 0xc8, 0x50);
1324 }
1326 auto icount = fixed_size_instr(6);
1327 emit_i64_binop(0x48, 0x31, 0xc8, 0x50);
1328 }
1330 auto icount = fixed_size_instr(6);
1331 emit_i64_binop(0x48, 0xd3, 0xe0, 0x50);
1332 }
1334 auto icount = fixed_size_instr(6);
1335 emit_i64_binop(0x48, 0xd3, 0xf8, 0x50);
1336 }
1338 auto icount = fixed_size_instr(6);
1339 emit_i64_binop(0x48, 0xd3, 0xe8, 0x50);
1340 }
1342 auto icount = fixed_size_instr(6);
1343 emit_i64_binop(0x48, 0xd3, 0xc0, 0x50);
1344 }
1346 auto icount = fixed_size_instr(6);
1347 emit_i64_binop(0x48, 0xd3, 0xc8, 0x50);
1348 }
1349
1350 // --------------- f32 unops ----------------------
1351
1353 auto icount = fixed_size_instr(7);
1354 // popq %rax;
1355 emit_bytes(0x58);
1356 // andl 0x7fffffff, %eax
1357 emit_bytes(0x25);
1358 emit_operand32(0x7fffffff);
1359 // pushq %rax
1360 emit_bytes(0x50);
1361 }
1362
1364 auto icount = fixed_size_instr(7);
1365 // popq %rax
1366 emit_bytes(0x58);
1367 // xorl 0x80000000, %eax
1368 emit_bytes(0x35);
1369 emit_operand32(0x80000000);
1370 // pushq %rax
1371 emit_bytes(0x50);
1372 }
1373
1375 auto icount = softfloat_instr(12, 36, 54);
1376 if constexpr (use_softfloat) {
1377 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_ceil));
1378 }
1379 // roundss 0b1010, (%rsp), %xmm0
1380 emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x0a);
1381 // movss %xmm0, (%rsp)
1382 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1383 }
1384
1386 auto icount = softfloat_instr(12, 36, 54);
1387 if constexpr (use_softfloat) {
1388 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_floor));
1389 }
1390 // roundss 0b1001, (%rsp), %xmm0
1391 emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x09);
1392 // movss %xmm0, (%rsp)
1393 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1394 }
1395
1397 auto icount = softfloat_instr(12, 36, 54);
1398 if constexpr (use_softfloat) {
1399 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_trunc));
1400 }
1401 // roundss 0b1011, (%rsp), %xmm0
1402 emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x0b);
1403 // movss %xmm0, (%rsp)
1404 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1405 }
1406
1408 auto icount = softfloat_instr(12, 36, 54);
1409 if constexpr (use_softfloat) {
1410 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_nearest));
1411 }
1412 // roundss 0b1000, (%rsp), %xmm0
1413 emit_bytes(0x66, 0x0f, 0x3a, 0x0a, 0x04, 0x24, 0x08);
1414 // movss %xmm0, (%rsp)
1415 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1416 }
1417
1419 auto icount = softfloat_instr(10, 36, 54);
1420 if constexpr (use_softfloat) {
1421 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_sqrt));
1422 }
1423 // sqrtss (%rsp), %xmm0
1424 emit_bytes(0xf3, 0x0f, 0x51, 0x04, 0x24);
1425 // movss %xmm0, (%rsp)
1426 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1427 }
1428
1429 // --------------- f32 binops ----------------------
1430
1432 auto icount = softfloat_instr(21, 44, 58);
1433 emit_f32_binop(0x58, CHOOSE_FN(_sysio_f32_add));
1434 }
1436 auto icount = softfloat_instr(21, 44, 58);
1437 emit_f32_binop(0x5c, CHOOSE_FN(_sysio_f32_sub));
1438 }
1440 auto icount = softfloat_instr(21, 44, 58);
1441 emit_f32_binop(0x59, CHOOSE_FN(_sysio_f32_mul));
1442 }
1444 auto icount = softfloat_instr(21, 44, 58);
1445 emit_f32_binop(0x5e, CHOOSE_FN(_sysio_f32_div));
1446 }
1448 auto icount = softfloat_instr(47, 44, 58);
1449 if constexpr(use_softfloat) {
1450 emit_f32_binop_softfloat(CHOOSE_FN(_sysio_f32_min));
1451 return;
1452 }
1453 // mov (%rsp), %eax
1454 emit_bytes(0x8b, 0x04, 0x24);
1455 // test %eax, %eax
1456 emit_bytes(0x85, 0xc0);
1457 // je ZERO
1458 emit_bytes(0x0f, 0x84);
1459 void* zero = emit_branch_target32();
1460 // movss 8(%rsp), %xmm0
1461 emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
1462 // minss (%rsp), %xmm0
1463 emit_bytes(0xf3, 0x0f, 0x5d, 0x04, 0x24);
1464 // jmp DONE
1465 emit_bytes(0xe9);
1466 void* done = emit_branch_target32();
1467 // ZERO:
1468 fix_branch(zero, code);
1469 // movss (%rsp), %xmm0
1470 emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
1471 // minss 8(%rsp), %xmm0
1472 emit_bytes(0xf3, 0x0f, 0x5d, 0x44, 0x24, 0x08);
1473 // DONE:
1474 fix_branch(done, code);
1475 // add $8, %rsp
1476 emit_bytes(0x48, 0x83, 0xc4, 0x08);
1477 // movss %xmm0, (%rsp)
1478 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1479 }
1481 auto icount = softfloat_instr(47, 44, 58);
1482 if(use_softfloat) {
1483 emit_f32_binop_softfloat(CHOOSE_FN(_sysio_f32_max));
1484 return;
1485 }
1486 // mov (%rsp), %eax
1487 emit_bytes(0x8b, 0x04, 0x24);
1488 // test %eax, %eax
1489 emit_bytes(0x85, 0xc0);
1490 // je ZERO
1491 emit_bytes(0x0f, 0x84);
1492 void* zero = emit_branch_target32();
1493 // movss (%rsp), %xmm0
1494 emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
1495 // maxss 8(%rsp), %xmm0
1496 emit_bytes(0xf3, 0x0f, 0x5f, 0x44, 0x24, 0x08);
1497 // jmp DONE
1498 emit_bytes(0xe9);
1499 void* done = emit_branch_target32();
1500 // ZERO:
1501 fix_branch(zero, code);
1502 // movss 8(%rsp), %xmm0
1503 emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
1504 // maxss (%rsp), %xmm0
1505 emit_bytes(0xf3, 0x0f, 0x5f, 0x04, 0x24);
1506 // DONE:
1507 fix_branch(done, code);
1508 // add $8, %rsp
1509 emit_bytes(0x48, 0x83, 0xc4, 0x08);
1510 // movss %xmm0, (%rsp)
1511 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1512 }
1513
1515 auto icount = fixed_size_instr(16);
1516 // popq %rax;
1517 emit_bytes(0x58);
1518 // andl 0x80000000, %eax
1519 emit_bytes(0x25);
1520 emit_operand32(0x80000000);
1521 // popq %rcx
1522 emit_bytes(0x59);
1523 // andl 0x7fffffff, %ecx
1524 emit_bytes(0x81, 0xe1);
1525 emit_operand32(0x7fffffff);
1526 // orl %ecx, %eax
1527 emit_bytes(0x09, 0xc8);
1528 // pushq %rax
1529 emit_bytes(0x50);
1530 }
1531
1532 // --------------- f64 unops ----------------------
1533
1535 auto icount = fixed_size_instr(15);
1536 // popq %rcx;
1537 emit_bytes(0x59);
1538 // movabsq $0x7fffffffffffffff, %rax
1539 emit_bytes(0x48, 0xb8);
1540 emit_operand64(0x7fffffffffffffffull);
1541 // andq %rcx, %rax
1542 emit_bytes(0x48, 0x21, 0xc8);
1543 // pushq %rax
1544 emit_bytes(0x50);
1545 }
1546
1548 auto icount = fixed_size_instr(15);
1549 // popq %rcx;
1550 emit_bytes(0x59);
1551 // movabsq $0x8000000000000000, %rax
1552 emit_bytes(0x48, 0xb8);
1553 emit_operand64(0x8000000000000000ull);
1554 // xorq %rcx, %rax
1555 emit_bytes(0x48, 0x31, 0xc8);
1556 // pushq %rax
1557 emit_bytes(0x50);
1558 }
1559
1561 auto icount = softfloat_instr(12, 38, 56);
1562 if constexpr (use_softfloat) {
1563 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_ceil));
1564 }
1565 // roundsd 0b1010, (%rsp), %xmm0
1566 emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x0a);
1567 // movsd %xmm0, (%rsp)
1568 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1569 }
1570
1572 auto icount = softfloat_instr(12, 38, 56);
1573 if constexpr (use_softfloat) {
1574 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_floor));
1575 }
1576 // roundsd 0b1001, (%rsp), %xmm0
1577 emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x09);
1578 // movss %xmm0, (%rsp)
1579 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1580 }
1581
1583 auto icount = softfloat_instr(12, 38, 56);
1584 if constexpr (use_softfloat) {
1585 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_trunc));
1586 }
1587 // roundsd 0b1011, (%rsp), %xmm0
1588 emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x0b);
1589 // movss %xmm0, (%rsp)
1590 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1591 }
1592
1594 auto icount = softfloat_instr(12, 38, 56);
1595 if constexpr (use_softfloat) {
1596 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_nearest));
1597 }
1598 // roundsd 0b1000, (%rsp), %xmm0
1599 emit_bytes(0x66, 0x0f, 0x3a, 0x0b, 0x04, 0x24, 0x08);
1600 // movss %xmm0, (%rsp)
1601 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1602 }
1603
1605 auto icount = softfloat_instr(10, 38, 56);
1606 if constexpr (use_softfloat) {
1607 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_sqrt));
1608 }
1609 // sqrtss (%rsp), %xmm0
1610 emit_bytes(0xf2, 0x0f, 0x51, 0x04, 0x24);
1611 // movss %xmm0, (%rsp)
1612 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1613 }
1614
1615 // --------------- f64 binops ----------------------
1616
1618 auto icount = softfloat_instr(21, 47, 61);
1619 emit_f64_binop(0x58, CHOOSE_FN(_sysio_f64_add));
1620 }
1622 auto icount = softfloat_instr(21, 47, 61);
1623 emit_f64_binop(0x5c, CHOOSE_FN(_sysio_f64_sub));
1624 }
1626 auto icount = softfloat_instr(21, 47, 61);
1627 emit_f64_binop(0x59, CHOOSE_FN(_sysio_f64_mul));
1628 }
1630 auto icount = softfloat_instr(21, 47, 61);
1631 emit_f64_binop(0x5e, CHOOSE_FN(_sysio_f64_div));
1632 }
1634 auto icount = softfloat_instr(49, 47, 61);
1635 if(use_softfloat) {
1636 emit_f64_binop_softfloat(CHOOSE_FN(_sysio_f64_min));
1637 return;
1638 }
1639 // mov (%rsp), %rax
1640 emit_bytes(0x48, 0x8b, 0x04, 0x24);
1641 // test %rax, %rax
1642 emit_bytes(0x48, 0x85, 0xc0);
1643 // je ZERO
1644 emit_bytes(0x0f, 0x84);
1645 void* zero = emit_branch_target32();
1646 // movsd 8(%rsp), %xmm0
1647 emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
1648 // minsd (%rsp), %xmm0
1649 emit_bytes(0xf2, 0x0f, 0x5d, 0x04, 0x24);
1650 // jmp DONE
1651 emit_bytes(0xe9);
1652 void* done = emit_branch_target32();
1653 // ZERO:
1654 fix_branch(zero, code);
1655 // movsd (%rsp), %xmm0
1656 emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
1657 // minsd 8(%rsp), %xmm0
1658 emit_bytes(0xf2, 0x0f, 0x5d, 0x44, 0x24, 0x08);
1659 // DONE:
1660 fix_branch(done, code);
1661 // add $8, %rsp
1662 emit_bytes(0x48, 0x83, 0xc4, 0x08);
1663 // movsd %xmm0, (%rsp)
1664 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1665 }
1667 auto icount = softfloat_instr(49, 47, 61);
1668 if(use_softfloat) {
1669 emit_f64_binop_softfloat(CHOOSE_FN(_sysio_f64_max));
1670 return;
1671 }
1672 // mov (%rsp), %rax
1673 emit_bytes(0x48, 0x8b, 0x04, 0x24);
1674 // test %rax, %rax
1675 emit_bytes(0x48, 0x85, 0xc0);
1676 // je ZERO
1677 emit_bytes(0x0f, 0x84);
1678 void* zero = emit_branch_target32();
1679 // maxsd (%rsp), %xmm0
1680 emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
1681 // maxsd 8(%rsp), %xmm0
1682 emit_bytes(0xf2, 0x0f, 0x5f, 0x44, 0x24, 0x08);
1683 // jmp DONE
1684 emit_bytes(0xe9);
1685 void* done = emit_branch_target32();
1686 // ZERO:
1687 fix_branch(zero, code);
1688 // movsd 8(%rsp), %xmm0
1689 emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
1690 // maxsd (%rsp), %xmm0
1691 emit_bytes(0xf2, 0x0f, 0x5f, 0x04, 0x24);
1692 // DONE:
1693 fix_branch(done, code);
1694 // add $8, %rsp
1695 emit_bytes(0x48, 0x83, 0xc4, 0x08);
1696 // movsd %xmm0, (%rsp)
1697 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
1698 }
1699
1701 auto icount = fixed_size_instr(25);
1702 // popq %rcx;
1703 emit_bytes(0x59);
1704 // movabsq 0x8000000000000000, %rax
1705 emit_bytes(0x48, 0xb8);
1706 emit_operand64(0x8000000000000000ull);
1707 // andq %rax, %rcx
1708 emit_bytes(0x48, 0x21, 0xc1);
1709 // popq %rdx
1710 emit_bytes(0x5a);
1711 // notq %rax
1712 emit_bytes(0x48, 0xf7, 0xd0);
1713 // andq %rdx, %rax
1714 emit_bytes(0x48, 0x21, 0xd0);
1715 // orq %rcx, %rax
1716 emit_bytes(0x48, 0x09, 0xc8);
1717 // pushq %rax
1718 emit_bytes(0x50);
1719 }
1720
1721 // --------------- conversions --------------------
1722
1723
1725 auto icount = fixed_size_instr(6);
1726 // Zero out the high 4 bytes
1727 // xor %eax, %eax
1728 emit_bytes(0x31, 0xc0);
1729 // mov %eax, 4(%rsp)
1730 emit_bytes(0x89, 0x44, 0x24, 0x04);
1731 }
1732
1734 auto icount = softfloat_instr(33, 36, 54);
1735 if constexpr (use_softfloat) {
1736 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f32_trunc_i32s>()));
1737 }
1738 // cvttss2si 8(%rsp), %eax
1739 emit_f2i(0xf3, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1740 // mov %eax, (%rsp)
1741 emit_bytes(0x89, 0x04 ,0x24);
1742 }
1743
1745 auto icount = softfloat_instr(46, 36, 54);
1746 if constexpr (use_softfloat) {
1747 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f32_trunc_i32u>()));
1748 }
1749 // cvttss2si 8(%rsp), %rax
1750 emit_f2i(0xf3, 0x48, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1751 // mov %eax, (%rsp)
1752 emit_bytes(0x89, 0x04 ,0x24);
1753 // shr $32, %rax
1754 emit_bytes(0x48, 0xc1, 0xe8, 0x20);
1755 // test %eax, %eax
1756 emit_bytes(0x85, 0xc0);
1757 // jnz FP_ERROR_HANDLER
1758 emit_bytes(0x0f, 0x85);
1759 fix_branch(emit_branch_target32(), fpe_handler);
1760 }
1762 auto icount = softfloat_instr(34, 38, 56);
1763 if constexpr (use_softfloat) {
1764 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f64_trunc_i32s>()));
1765 }
1766 // cvttsd2si 8(%rsp), %eax
1767 emit_f2i(0xf2, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1768 // movq %rax, (%rsp)
1769 emit_bytes(0x48, 0x89, 0x04 ,0x24);
1770 }
1771
1773 auto icount = softfloat_instr(47, 38, 56);
1774 if constexpr (use_softfloat) {
1775 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f64_trunc_i32u>()));
1776 }
1777 // cvttsd2si 8(%rsp), %rax
1778 emit_f2i(0xf2, 0x48, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1779 // movq %rax, (%rsp)
1780 emit_bytes(0x48, 0x89, 0x04 ,0x24);
1781 // shr $32, %rax
1782 emit_bytes(0x48, 0xc1, 0xe8, 0x20);
1783 // test %eax, %eax
1784 emit_bytes(0x85, 0xc0);
1785 // jnz FP_ERROR_HANDLER
1786 emit_bytes(0x0f, 0x85);
1787 fix_branch(emit_branch_target32(), fpe_handler);
1788 }
1789
1791 auto icount = fixed_size_instr(8);
1792 // movslq (%rsp), %rax
1793 emit_bytes(0x48, 0x63, 0x04, 0x24);
1794 // mov %rax, (%rsp)
1795 emit_bytes(0x48, 0x89, 0x04, 0x24);
1796 }
1797
1798 void emit_i64_extend_u_i32() { /* Nothing to do */ }
1799
1801 auto icount = softfloat_instr(35, 37, 55);
1802 if constexpr (use_softfloat) {
1803 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f32_trunc_i64s>()));
1804 }
1805 // cvttss2si (%rsp), %rax
1806 emit_f2i(0xf3, 0x48, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1807 // mov %rax, (%rsp)
1808 emit_bytes(0x48, 0x89, 0x04 ,0x24);
1809 }
1811 auto icount = softfloat_instr(101, 37, 55);
1812 if constexpr (use_softfloat) {
1813 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f32_trunc_i64u>()));
1814 }
1815 // mov $0x5f000000, %eax
1816 emit_bytes(0xb8);
1817 emit_operand32(0x5f000000);
1818 // movss (%rsp), %xmm0
1819 emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
1820 // mov %eax, (%rsp)
1821 emit_bytes(0x89, 0x04, 0x24);
1822 // movss (%rsp), %xmm1
1823 emit_bytes(0xf3, 0x0f, 0x10, 0x0c, 0x24);
1824 // movaps %xmm0, %xmm2
1825 emit_bytes(0x0f, 0x28, 0xd0);
1826 // subss %xmm1, %xmm2
1827 emit_bytes(0xf3, 0x0f, 0x5c, 0xd1);
1828 // cvttss2siq %xmm2, %rax
1829 emit_f2i(0xf3, 0x48, 0x0f, 0x2c, 0xc2);
1830 // movabsq $0x8000000000000000, %rcx
1831 emit_bytes(0x48, 0xb9);
1832 emit_operand64(0x8000000000000000);
1833 // xorq %rax, %rcx
1834 emit_bytes(0x48, 0x31, 0xc1);
1835 // cvttss2siq %xmm0, %rax
1836 emit_bytes(0xf3, 0x48, 0x0f, 0x2c, 0xc0);
1837 // xor %rdx, %rdx
1838 emit_bytes(0x48, 0x31, 0xd2);
1839 // ucomiss %xmm0, %xmm1
1840 emit_bytes(0x0f, 0x2e, 0xc8);
1841 // cmovaq %rax, %rdx
1842 emit_bytes(0x48, 0x0f, 0x47, 0xd0);
1843 // cmovbeq %rcx, %rax
1844 emit_bytes(0x48, 0x0f, 0x46, 0xc1);
1845 // mov %rax, (%rsp)
1846 emit_bytes(0x48, 0x89, 0x04, 0x24);
1847 // bt $63, %rdx
1848 emit_bytes(0x48, 0x0f, 0xba, 0xe2, 0x3f);
1849 // jc FP_ERROR_HANDLER
1850 emit_bytes(0x0f, 0x82);
1851 fix_branch(emit_branch_target32(), fpe_handler);
1852 }
1854 auto icount = softfloat_instr(35, 38, 56);
1855 if constexpr (use_softfloat) {
1856 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f64_trunc_i64s>()));
1857 }
1858 // cvttsd2si (%rsp), %rax
1859 emit_f2i(0xf2, 0x48, 0x0f, 0x2c, 0x44, 0x24, 0x08);
1860 // mov %rax, (%rsp)
1861 emit_bytes(0x48, 0x89, 0x04 ,0x24);
1862 }
1864 auto icount = softfloat_instr(109, 38, 56);
1865 if constexpr (use_softfloat) {
1866 return emit_softfloat_unop(CHOOSE_FN(softfloat_trap<&_sysio_f64_trunc_i64u>()));
1867 }
1868 // movabsq $0x43e0000000000000, %rax
1869 emit_bytes(0x48, 0xb8);
1870 emit_operand64(0x43e0000000000000);
1871 // movsd (%rsp), %xmm0
1872 emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
1873 // movq %rax, (%rsp)
1874 emit_bytes(0x48, 0x89, 0x04, 0x24);
1875 // movsd (%rsp), %xmm1
1876 emit_bytes(0xf2, 0x0f, 0x10, 0x0c, 0x24);
1877 // movapd %xmm0, %xmm2
1878 emit_bytes(0x66, 0x0f, 0x28, 0xd0);
1879 // subsd %xmm1, %xmm2
1880 emit_bytes(0xf2, 0x0f, 0x5c, 0xd1);
1881 // cvttsd2siq %xmm2, %rax
1882 emit_f2i(0xf2, 0x48, 0x0f, 0x2c, 0xc2);
1883 // movabsq $0x8000000000000000, %rcx
1884 emit_bytes(0x48, 0xb9);
1885 emit_operand64(0x8000000000000000);
1886 // xorq %rax, %rcx
1887 emit_bytes(0x48, 0x31, 0xc1);
1888 // cvttsd2siq %xmm0, %rax
1889 emit_bytes(0xf2, 0x48, 0x0f, 0x2c, 0xc0);
1890 // xor %rdx, %rdx
1891 emit_bytes(0x48, 0x31, 0xd2);
1892 // ucomisd %xmm0, %xmm1
1893 emit_bytes(0x66, 0x0f, 0x2e, 0xc8);
1894 // cmovaq %rax, %rdx
1895 emit_bytes(0x48, 0x0f, 0x47, 0xd0);
1896 // cmovbeq %rcx, %rax
1897 emit_bytes(0x48, 0x0f, 0x46, 0xc1);
1898 // mov %rax, (%rsp)
1899 emit_bytes(0x48, 0x89, 0x04, 0x24);
1900 // bt $63, %rdx
1901 emit_bytes(0x48, 0x0f, 0xba, 0xe2, 0x3f);
1902 // jc FP_ERROR_HANDLER
1903 emit_bytes(0x0f, 0x82);
1904 fix_branch(emit_branch_target32(), fpe_handler);
1905 }
1906
1908 auto icount = softfloat_instr(10, 36, 54);
1909 if constexpr (use_softfloat) {
1910 return emit_softfloat_unop(CHOOSE_FN(_sysio_i32_to_f32));
1911 }
1912 // cvtsi2ssl (%rsp), %xmm0
1913 emit_bytes(0xf3, 0x0f, 0x2a, 0x04, 0x24);
1914 // movss %xmm0, (%rsp)
1915 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1916 }
1918 auto icount = softfloat_instr(11, 36, 54);
1919 if constexpr (use_softfloat) {
1920 return emit_softfloat_unop(CHOOSE_FN(_sysio_ui32_to_f32));
1921 }
1922 // zero-extend to 64-bits
1923 // cvtsi2sslq (%rsp), %xmm0
1924 emit_bytes(0xf3, 0x48, 0x0f, 0x2a, 0x04, 0x24);
1925 // movss %xmm0, (%rsp)
1926 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1927 }
1929 auto icount = softfloat_instr(11, 38, 56);
1930 if constexpr (use_softfloat) {
1931 return emit_softfloat_unop(CHOOSE_FN(_sysio_i64_to_f32));
1932 }
1933 // cvtsi2sslq (%rsp), %xmm0
1934 emit_bytes(0xf3, 0x48, 0x0f, 0x2a, 0x04, 0x24);
1935 // movss %xmm0, (%rsp)
1936 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1937 }
1939 auto icount = softfloat_instr(55, 38, 56);
1940 if constexpr (use_softfloat) {
1941 return emit_softfloat_unop(CHOOSE_FN(_sysio_ui64_to_f32));
1942 }
1943 // movq (%rsp), %rax
1944 emit_bytes(0x48, 0x8b, 0x04, 0x24);
1945 // testq %rax, %rax
1946 emit_bytes(0x48, 0x85, 0xc0);
1947 // js LARGE
1948 emit_bytes(0x0f, 0x88);
1949 void * large = emit_branch_target32();
1950 // cvtsi2ssq %rax, %xmm0
1951 emit_bytes(0xf3, 0x48, 0x0f, 0x2a, 0xc0);
1952 // jmp done
1953 emit_bytes(0xe9);
1954 void* done = emit_branch_target32();
1955 // LARGE:
1956 fix_branch(large, code);
1957 // movq %rax, %rcx
1958 emit_bytes(0x48, 0x89, 0xc1);
1959 // shrq %rax
1960 emit_bytes(0x48, 0xd1, 0xe8);
1961 // andl $1, %ecx
1962 emit_bytes(0x83, 0xe1, 0x01);
1963 // orq %rcx, %rax
1964 emit_bytes(0x48, 0x09, 0xc8);
1965 // cvtsi2ssq %rax, %xmm0
1966 emit_bytes(0xf3, 0x48, 0x0f, 0x2a, 0xc0);
1967 // addss %xmm0, %xmm0
1968 emit_bytes(0xf3, 0x0f, 0x58, 0xc0);
1969 // DONE:
1970 fix_branch(done, code);
1971 // xorl %eax, %eax
1972 emit_bytes(0x31, 0xc0);
1973 // movl %eax, 4(%rsp)
1974 emit_bytes(0x89, 0x44, 0x24, 0x04);
1975 // movss %xmm0, (%rsp)
1976 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1977 }
1979 auto icount = softfloat_instr(16, 38, 56);
1980 if constexpr (use_softfloat) {
1981 return emit_softfloat_unop(CHOOSE_FN(_sysio_f64_demote));
1982 }
1983 // cvtsd2ss (%rsp), %xmm0
1984 emit_bytes(0xf2, 0x0f, 0x5a, 0x04, 0x24);
1985 // movss %xmm0, (%rsp)
1986 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
1987 // Zero out the high 4 bytes
1988 // xor %eax, %eax
1989 emit_bytes(0x31, 0xc0);
1990 // mov %eax, 4(%rsp)
1991 emit_bytes(0x89, 0x44, 0x24, 0x04);
1992 }
1994 auto icount = softfloat_instr(10, 37, 55);
1995 if constexpr (use_softfloat) {
1996 return emit_softfloat_unop(CHOOSE_FN(_sysio_i32_to_f64));
1997 }
1998 // cvtsi2sdl (%rsp), %xmm0
1999 emit_bytes(0xf2, 0x0f, 0x2a, 0x04, 0x24);
2000 // movsd %xmm0, (%rsp)
2001 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2002 }
2004 auto icount = softfloat_instr(11, 37, 55);
2005 if constexpr (use_softfloat) {
2006 return emit_softfloat_unop(CHOOSE_FN(_sysio_ui32_to_f64));
2007 }
2008 // cvtsi2sdq (%rsp), %xmm0
2009 emit_bytes(0xf2, 0x48, 0x0f, 0x2a, 0x04, 0x24);
2010 // movsd %xmm0, (%rsp)
2011 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2012 }
2014 auto icount = softfloat_instr(11, 38, 56);
2015 if constexpr (use_softfloat) {
2016 return emit_softfloat_unop(CHOOSE_FN(_sysio_i64_to_f64));
2017 }
2018 // cvtsi2sdq (%rsp), %xmm0
2019 emit_bytes(0xf2, 0x48, 0x0f, 0x2a, 0x04, 0x24);
2020 // movsd %xmm0, (%rsp)
2021 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2022 }
2024 auto icount = softfloat_instr(49, 38, 56);
2025 if constexpr (use_softfloat) {
2026 return emit_softfloat_unop(CHOOSE_FN(_sysio_ui64_to_f64));
2027 }
2028 // movq (%rsp), %rax
2029 emit_bytes(0x48, 0x8b, 0x04, 0x24);
2030 // testq %rax, %rax
2031 emit_bytes(0x48, 0x85, 0xc0);
2032 // js LARGE
2033 emit_bytes(0x0f, 0x88);
2034 void * large = emit_branch_target32();
2035 // cvtsi2sdq %rax, %xmm0
2036 emit_bytes(0xf2, 0x48, 0x0f, 0x2a, 0xc0);
2037 // jmp done
2038 emit_bytes(0xe9);
2039 void* done = emit_branch_target32();
2040 // LARGE:
2041 fix_branch(large, code);
2042 // movq %rax, %rcx
2043 emit_bytes(0x48, 0x89, 0xc1);
2044 // shrq %rax
2045 emit_bytes(0x48, 0xd1, 0xe8);
2046 // andl $1, %ecx
2047 emit_bytes(0x83, 0xe1, 0x01);
2048 // orq %rcx, %rax
2049 emit_bytes(0x48, 0x09, 0xc8);
2050 // cvtsi2sdq %rax, %xmm0
2051 emit_bytes(0xf2, 0x48, 0x0f, 0x2a, 0xc0);
2052 // addsd %xmm0, %xmm0
2053 emit_bytes(0xf2, 0x0f, 0x58, 0xc0);
2054 // DONE:
2055 fix_branch(done, code);
2056 // movsd %xmm0, (%rsp)
2057 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2058 }
2060 auto icount = softfloat_instr(10, 37, 55);
2061 if constexpr (use_softfloat) {
2062 return emit_softfloat_unop(CHOOSE_FN(_sysio_f32_promote));
2063 }
2064 // cvtss2sd (%rsp), %xmm0
2065 emit_bytes(0xf3, 0x0f, 0x5a, 0x04, 0x24);
2066 // movsd %xmm0, (%rsp)
2067 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2068 }
2069
2070 void emit_i32_reinterpret_f32() { /* Nothing to do */ }
2071 void emit_i64_reinterpret_f64() { /* Nothing to do */ }
2072 void emit_f32_reinterpret_i32() { /* Nothing to do */ }
2073 void emit_f64_reinterpret_i64() { /* Nothing to do */ }
2074
2075#undef CHOOSE_FN
2076
2077 void emit_error() { unimplemented(); }
2078
2079 // --------------- random ------------------------
2080 static void fix_branch(void* branch, void* target) {
2081 auto branch_ = static_cast<uint8_t*>(branch);
2082 auto target_ = static_cast<uint8_t*>(target);
2083 auto relative = static_cast<uint32_t>(target_ - (branch_ + 4));
2084 if((target_ - (branch_ + 4)) > 0x7FFFFFFFll ||
2085 (target_ - (branch_ + 4)) < -0x80000000ll) unimplemented();
2086 memcpy(branch, &relative, 4);
2087 }
2088
2089 // A 64-bit absolute address is used for function calls whose
2090 // address is too far away for a 32-bit relative call.
2091 static void fix_branch64(void* branch, void* target) {
2092 memcpy(branch, &target, 8);
2093 }
2094
2095 using fn_type = native_value(*)(void* context, void* memory);
2097 _mod.allocator.reclaim(code, _code_end - code);
2098 body.jit_code_offset = _code_start - (unsigned char*)_code_segment_base;
2099 }
2100
2101 // returns the current write address
2102 const void* get_addr() const {
2103 return code;
2104 }
2105
2106 const void* get_base_addr() const { return _code_segment_base; }
2107
2108 private:
2109
2110 auto fixed_size_instr(std::size_t expected_bytes) {
2111 return scope_guard{[this, expected_code=code+expected_bytes](){
2112#ifdef SYS_VM_VALIDATE_JIT_SIZE
2113 assert(code == expected_code);
2114#endif
2115 ignore_unused_variable_warning(code, expected_code);
2116 }};
2117 }
2118 auto variable_size_instr(std::size_t min, std::size_t max) {
2119 return scope_guard{[this, min_code=code+min,max_code=code+max](){
2120#ifdef SYS_VM_VALIDATE_JIT_SIZE
2121 assert(min_code <= code && code <= max_code);
2122#endif
2123 ignore_unused_variable_warning(code, min_code, max_code);
2124 }};
2125 }
2126 auto softfloat_instr(std::size_t hard_expected, std::size_t soft_expected, std::size_t softbt_expected) {
2127 return fixed_size_instr(use_softfloat?(Context::async_backtrace()?softbt_expected:soft_expected):hard_expected);
2128 }
2129
2130 module& _mod;
2131 void * _code_segment_base;
2132 const func_type* _ft;
2133 unsigned char * _code_start;
2134 unsigned char * _code_end;
2135 unsigned char * code;
2136 std::vector<std::variant<std::vector<void*>, void*>> _function_relocations;
2137 void* fpe_handler;
2138 void* call_indirect_handler;
2139 void* type_error_handler;
2140 void* stack_overflow_handler;
2141 void* jmp_table;
2142 uint32_t _local_count;
2143 uint32_t _table_element_size;
2144
2145 void emit_byte(uint8_t val) { *code++ = val; }
2146 void emit_bytes() {}
2147 template<class... T>
2148 void emit_bytes(uint8_t val0, T... vals) {
2149 emit_byte(val0);
2150 emit_bytes(vals...);
2151 }
2152 void emit_operand32(uint32_t val) { memcpy(code, &val, sizeof(val)); code += sizeof(val); }
2153 void emit_operand64(uint64_t val) { memcpy(code, &val, sizeof(val)); code += sizeof(val); }
2154 void emit_operandf32(float val) { memcpy(code, &val, sizeof(val)); code += sizeof(val); }
2155 void emit_operandf64(double val) { memcpy(code, &val, sizeof(val)); code += sizeof(val); }
2156 template<class T>
2157 void emit_operand_ptr(T* val) { memcpy(code, &val, sizeof(val)); code += sizeof(val); }
2158
2159 void* emit_branch_target32() {
2160 void * result = code;
2161 emit_operand32(3735928555u - static_cast<uint32_t>(reinterpret_cast<uintptr_t>(code)));
2162 return result;
2163 }
2164
2165 void emit_check_call_depth() {
2166 // decl %ebx
2167 emit_bytes(0xff, 0xcb);
2168 // jz stack_overflow
2169 emit_bytes(0x0f, 0x84);
2170 fix_branch(emit_branch_target32(), stack_overflow_handler);
2171 }
2172 void emit_check_call_depth_end() {
2173 // incl %ebx
2174 emit_bytes(0xff, 0xc3);
2175 }
2176
2177 static void unimplemented() { SYS_VM_ASSERT(false, wasm_parse_exception, "Sorry, not implemented."); }
2178
2179 // clobbers %rax if the high bit of count is set.
2180 void emit_multipop(uint32_t count) {
2181 if(count > 0 && count != 0x80000001) {
2182 if (count & 0x80000000) {
2183 // mov (%rsp), %rax
2184 emit_bytes(0x48, 0x8b, 0x04, 0x24);
2185 }
2186 if(count & 0x70000000) {
2187 // This code is probably unreachable.
2188 // int3
2189 emit_bytes(0xCC);
2190 }
2191 // add depth_change*8, %rsp
2192 emit_bytes(0x48, 0x81, 0xc4); // TODO: Prefer imm8 where appropriate
2193 emit_operand32(count * 8); // FIXME: handle overflow
2194 if (count & 0x80000000) {
2195 // push %rax
2196 emit_bytes(0x50);
2197 }
2198 }
2199 }
2200
2201 template<class... T>
2202 void emit_load_impl(uint32_t offset, T... loadop) {
2203 // pop %rax
2204 emit_bytes(0x58);
2205 if (offset & 0x80000000) {
2206 // mov $offset, %ecx
2207 emit_bytes(0xb9);
2208 emit_operand32(offset);
2209 // add %rcx, %rax
2210 emit_bytes(0x48, 0x01, 0xc8);
2211 } else if (offset != 0) {
2212 // add offset, %rax
2213 emit_bytes(0x48, 0x05);
2214 emit_operand32(offset);
2215 }
2216 // add %rsi, %rax
2217 emit_bytes(0x48, 0x01, 0xf0);
2218 // from the caller
2219 emit_bytes(static_cast<uint8_t>(loadop)...);
2220 // push RAX
2221 emit_bytes(0x50);
2222 }
2223
2224 template<class... T>
2225 void emit_store_impl(uint32_t offset, T... storeop) {
2226 // pop RCX
2227 emit_bytes(0x59);
2228 // pop RAX
2229 emit_bytes(0x58);
2230 if (offset & 0x80000000) {
2231 // mov $offset, %ecx
2232 emit_bytes(0xb9);
2233 emit_operand32(offset);
2234 // add %rcx, %rax
2235 emit_bytes(0x48, 0x01, 0xc8);
2236 } else if (offset != 0) {
2237 // add offset, %rax
2238 emit_bytes(0x48, 0x05);
2239 emit_operand32(offset);
2240 }
2241 // add %rsi, %rax
2242 emit_bytes(0x48, 0x01, 0xf0);
2243 // from the caller
2244 emit_bytes(static_cast<uint8_t>(storeop)...);;
2245 }
2246
2247 void emit_i32_relop(uint8_t opcode) {
2248 // popq %rax
2249 emit_bytes(0x58);
2250 // popq %rcx
2251 emit_bytes(0x59);
2252 // xorq %rdx, %rdx
2253 emit_bytes(0x48, 0x31, 0xd2);
2254 // cmpl %eax, %ecx
2255 emit_bytes(0x39, 0xc1);
2256 // SETcc %dl
2257 emit_bytes(0x0f, opcode, 0xc2);
2258 // pushq %rdx
2259 emit_bytes(0x52);
2260 }
2261
2262 template<class... T>
2263 void emit_i64_relop(uint8_t opcode) {
2264 // popq %rax
2265 emit_bytes(0x58);
2266 // popq %rcx
2267 emit_bytes(0x59);
2268 // xorq %rdx, %rdx
2269 emit_bytes(0x48, 0x31, 0xd2);
2270 // cmpq %rax, %rcx
2271 emit_bytes(0x48, 0x39, 0xc1);
2272 // SETcc %dl
2273 emit_bytes(0x0f, opcode, 0xc2);
2274 // pushq %rdx
2275 emit_bytes(0x52);
2276 }
2277
2278 template<typename T, typename U>
2279 void emit_softfloat_unop(T(*softfloatfun)(U)) {
2280 auto extra = emit_setup_backtrace();
2281 // pushq %rdi
2282 emit_bytes(0x57);
2283 // pushq %rsi
2284 emit_bytes(0x56);
2285 if constexpr(sizeof(U) == 4) {
2286 // movq 16(%rsp), %edi
2287 emit_bytes(0x8b, 0x7c, 0x24, 0x10 + extra);
2288 } else {
2289 // movq 16(%rsp), %rdi
2290 emit_bytes(0x48, 0x8b, 0x7c, 0x24, 0x10 + extra);
2291 }
2292 emit_align_stack();
2293 // movabsq $softfloatfun, %rax
2294 emit_bytes(0x48, 0xb8);
2295 emit_operand_ptr(softfloatfun);
2296 // callq *%rax
2297 emit_bytes(0xff, 0xd0);
2298 emit_restore_stack();
2299 // popq %rsi
2300 emit_bytes(0x5e);
2301 // popq %rdi
2302 emit_bytes(0x5f);
2303 emit_restore_backtrace();
2304 if constexpr(sizeof(T) == 4) {
2305 static_assert(sizeof(U) == 4, "Can only push 4-byte item if the upper 4 bytes are already 0");
2306 // movq %eax, (%rsp)
2307 emit_bytes(0x89, 0x04, 0x24);
2308 } else {
2309 // movq %rax, (%rsp)
2310 emit_bytes(0x48, 0x89, 0x04, 0x24);
2311 }
2312 }
2313
2314 void emit_f32_binop_softfloat(float32_t (*softfloatfun)(float32_t, float32_t)) {
2315 auto extra = emit_setup_backtrace();
2316 // pushq %rdi
2317 emit_bytes(0x57);
2318 // pushq %rsi
2319 emit_bytes(0x56);
2320 // movq 16(%rsp), %esi
2321 emit_bytes(0x8b, 0x74, 0x24, 0x10 + extra);
2322 // movq 24(%rsp), %edi
2323 emit_bytes(0x8b, 0x7c, 0x24, 0x18 + extra);
2324 emit_align_stack();
2325 // movabsq $softfloatfun, %rax
2326 emit_bytes(0x48, 0xb8);
2327 emit_operand_ptr(softfloatfun);
2328 // callq *%rax
2329 emit_bytes(0xff, 0xd0);
2330 emit_restore_stack();
2331 // popq %rsi
2332 emit_bytes(0x5e);
2333 // popq %rdi
2334 emit_bytes(0x5f);
2335 emit_restore_backtrace_basic();
2336 // addq $8, %rsp
2337 emit_bytes(0x48, 0x83, 0xc4, 0x08 + extra);
2338 // movq %eax, (%rsp)
2339 emit_bytes(0x89, 0x04, 0x24);
2340 }
2341
2342 void emit_f64_binop_softfloat(float64_t (*softfloatfun)(float64_t, float64_t)) {
2343 auto extra = emit_setup_backtrace();
2344 // pushq %rdi
2345 emit_bytes(0x57);
2346 // pushq %rsi
2347 emit_bytes(0x56);
2348 // movq 16(%rsp), %rsi
2349 emit_bytes(0x48, 0x8b, 0x74, 0x24, 0x10 + extra);
2350 // movq 24(%rsp), %rdi
2351 emit_bytes(0x48, 0x8b, 0x7c, 0x24, 0x18 + extra);
2352 emit_align_stack();
2353 // movabsq $softfloatfun, %rax
2354 emit_bytes(0x48, 0xb8);
2355 emit_operand_ptr(softfloatfun);
2356 // callq *%rax
2357 emit_bytes(0xff, 0xd0);
2358 emit_restore_stack();
2359 // popq %rsi
2360 emit_bytes(0x5e);
2361 // popq %rdi
2362 emit_bytes(0x5f);
2363 emit_restore_backtrace_basic();
2364 // addq $8, %rsp
2365 emit_bytes(0x48, 0x83, 0xc4, 0x08 + extra);
2366 // movq %rax, (%rsp)
2367 emit_bytes(0x48, 0x89, 0x04, 0x24);
2368 }
2369
2370 void emit_f32_relop(uint8_t opcode, uint64_t (*softfloatfun)(float32_t, float32_t), bool switch_params, bool flip_result) {
2371 if constexpr (use_softfloat) {
2372 auto extra = emit_setup_backtrace();
2373 // pushq %rdi
2374 emit_bytes(0x57);
2375 // pushq %rsi
2376 emit_bytes(0x56);
2377 if(switch_params) {
2378 // movq 24(%rsp), %esi
2379 emit_bytes(0x8b, 0x74, 0x24, 0x18 + extra);
2380 // movq 16(%rsp), %edi
2381 emit_bytes(0x8b, 0x7c, 0x24, 0x10 + extra);
2382 } else {
2383 // movq 16(%rsp), %esi
2384 emit_bytes(0x8b, 0x74, 0x24, 0x10 + extra);
2385 // movq 24(%rsp), %edi
2386 emit_bytes(0x8b, 0x7c, 0x24, 0x18 + extra);
2387 }
2388 emit_align_stack();
2389 // movabsq $softfloatfun, %rax
2390 emit_bytes(0x48, 0xb8);
2391 emit_operand_ptr(softfloatfun);
2392 // callq *%rax
2393 emit_bytes(0xff, 0xd0);
2394 emit_restore_stack();
2395 // popq %rsi
2396 emit_bytes(0x5e);
2397 // popq %rdi
2398 emit_bytes(0x5f);
2399 emit_restore_backtrace_basic();
2400 if (flip_result) {
2401 // xor $0x1, %al
2402 emit_bytes(0x34, 0x01);
2403 }
2404 // addq $8, %rsp
2405 emit_bytes(0x48, 0x83, 0xc4, 0x08 + extra);
2406 // movq %rax, (%rsp)
2407 emit_bytes(0x48, 0x89, 0x04, 0x24);
2408 } else {
2409 // ucomiss+seta/setae is shorter but can't handle eq/ne
2410 if(switch_params) {
2411 // movss (%rsp), %xmm0
2412 emit_bytes(0xf3, 0x0f, 0x10, 0x04, 0x24);
2413 // cmpCCss 8(%rsp), %xmm0
2414 emit_bytes(0xf3, 0x0f, 0xc2, 0x44, 0x24, 0x08, opcode);
2415 } else {
2416 // movss 8(%rsp), %xmm0
2417 emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
2418 // cmpCCss (%rsp), %xmm0
2419 emit_bytes(0xf3, 0x0f, 0xc2, 0x04, 0x24, opcode);
2420 }
2421 // movd %xmm0, %eax
2422 emit_bytes(0x66, 0x0f, 0x7e, 0xc0);
2423 if (!flip_result) {
2424 // andl $1, %eax
2425 emit_bytes(0x83, 0xe0, 0x01);
2426 } else {
2427 // incl %eax {0xffffffff, 0} -> {0, 1}
2428 emit_bytes(0xff, 0xc0);
2429 }
2430 // leaq 16(%rsp), %rsp
2431 emit_bytes(0x48, 0x8d, 0x64, 0x24, 0x10);
2432 // pushq %rax
2433 emit_bytes(0x50);
2434 }
2435 }
2436
2437 void emit_f64_relop(uint8_t opcode, uint64_t (*softfloatfun)(float64_t, float64_t), bool switch_params, bool flip_result) {
2438 if constexpr (use_softfloat) {
2439 auto extra = emit_setup_backtrace();
2440 // pushq %rdi
2441 emit_bytes(0x57);
2442 // pushq %rsi
2443 emit_bytes(0x56);
2444 if(switch_params) {
2445 // movq 24(%rsp), %rsi
2446 emit_bytes(0x48, 0x8b, 0x74, 0x24, 0x18 + extra);
2447 // movq 16(%rsp), %rdi
2448 emit_bytes(0x48, 0x8b, 0x7c, 0x24, 0x10 + extra);
2449 } else {
2450 // movq 16(%rsp), %rsi
2451 emit_bytes(0x48, 0x8b, 0x74, 0x24, 0x10 + extra);
2452 // movq 24(%rsp), %rdi
2453 emit_bytes(0x48, 0x8b, 0x7c, 0x24, 0x18 + extra);
2454 }
2455 emit_align_stack();
2456 // movabsq $softfloatfun, %rax
2457 emit_bytes(0x48, 0xb8);
2458 emit_operand_ptr(softfloatfun);
2459 // callq *%rax
2460 emit_bytes(0xff, 0xd0);
2461 emit_restore_stack();
2462 // popq %rsi
2463 emit_bytes(0x5e);
2464 // popq %rdi
2465 emit_bytes(0x5f);
2466 emit_restore_backtrace_basic();
2467 if (flip_result) {
2468 // xor $0x1, %al
2469 emit_bytes(0x34, 0x01);
2470 }
2471 // addq $8, %rsp
2472 emit_bytes(0x48, 0x83, 0xc4, 0x08 + extra);
2473 // movq %rax, (%rsp)
2474 emit_bytes(0x48, 0x89, 0x04, 0x24);
2475 } else {
2476 // ucomisd+seta/setae is shorter but can't handle eq/ne
2477 if(switch_params) {
2478 // movsd (%rsp), %xmm0
2479 emit_bytes(0xf2, 0x0f, 0x10, 0x04, 0x24);
2480 // cmpCCsd 8(%rsp), %xmm0
2481 emit_bytes(0xf2, 0x0f, 0xc2, 0x44, 0x24, 0x08, opcode);
2482 } else {
2483 // movsd 8(%rsp), %xmm0
2484 emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
2485 // cmpCCsd (%rsp), %xmm0
2486 emit_bytes(0xf2, 0x0f, 0xc2, 0x04, 0x24, opcode);
2487 }
2488 // movd %xmm0, %eax
2489 emit_bytes(0x66, 0x0f, 0x7e, 0xc0);
2490 if (!flip_result) {
2491 // andl $1, eax
2492 emit_bytes(0x83, 0xe0, 0x01);
2493 } else {
2494 // incl %eax {0xffffffff, 0} -> {0, 1}
2495 emit_bytes(0xff, 0xc0);
2496 }
2497 // leaq 16(%rsp), %rsp
2498 emit_bytes(0x48, 0x8d, 0x64, 0x24, 0x10);
2499 // pushq %rax
2500 emit_bytes(0x50);
2501 }
2502 }
2503
2504 template<class... T>
2505 void emit_i32_binop(T... op) {
2506 // popq %rcx
2507 emit_bytes(0x59);
2508 // popq %rax
2509 emit_bytes(0x58);
2510 // OP %eax, %ecx
2511 emit_bytes(static_cast<uint8_t>(op)...);
2512 // pushq %rax
2513 // emit_bytes(0x50);
2514 }
2515
2516 template<class... T>
2517 void emit_i64_binop(T... op) {
2518 // popq %rcx
2519 emit_bytes(0x59);
2520 // popq %rax
2521 emit_bytes(0x58);
2522 // OP %eax, %ecx
2523 emit_bytes(static_cast<uint8_t>(op)...);
2524 }
2525
2526 void emit_f32_binop(uint8_t op, float32_t (*softfloatfun)(float32_t, float32_t)) {
2527 if constexpr (use_softfloat) {
2528 return emit_f32_binop_softfloat(softfloatfun);
2529 }
2530 // movss 8(%rsp), %xmm0
2531 emit_bytes(0xf3, 0x0f, 0x10, 0x44, 0x24, 0x08);
2532 // OPss (%rsp), %xmm0
2533 emit_bytes(0xf3, 0x0f, op, 0x04, 0x24);
2534 // leaq 8(%rsp), %rsp
2535 emit_bytes(0x48, 0x8d, 0x64, 0x24, 0x08);
2536 // movss %xmm0, (%rsp)
2537 emit_bytes(0xf3, 0x0f, 0x11, 0x04, 0x24);
2538 }
2539
2540 void emit_f64_binop(uint8_t op, float64_t (*softfloatfun)(float64_t, float64_t)) {
2541 if constexpr (use_softfloat) {
2542 return emit_f64_binop_softfloat(softfloatfun);
2543 }
2544 // movsd 8(%rsp), %xmm0
2545 emit_bytes(0xf2, 0x0f, 0x10, 0x44, 0x24, 0x08);
2546 // OPsd (%rsp), %xmm0
2547 emit_bytes(0xf2, 0x0f, op, 0x04, 0x24);
2548 // leaq 8(%rsp), %rsp
2549 emit_bytes(0x48, 0x8d, 0x64, 0x24, 0x08);
2550 // movsd %xmm0, (%rsp)
2551 emit_bytes(0xf2, 0x0f, 0x11, 0x04, 0x24);
2552 }
2553
2554 // Beware: This pushes and pops mxcsr around the user op. Remember to adjust access to %rsp in the caller.
2555 // Note uses %rcx after the user instruction
2556 template<class... T>
2557 void emit_f2i(T... op) {
2558 // mov 0x0x1f80, %eax // round-to-even/all exceptions masked/no exceptions set
2559 emit_bytes(0xb8, 0x80, 0x1f, 0x00, 0x00);
2560 // push %rax
2561 emit_bytes(0x50);
2562 // ldmxcsr (%rsp)
2563 emit_bytes(0x0f, 0xae, 0x14, 0x24);
2564 // user op
2565 emit_bytes(op...);
2566 // stmxcsr (%rsp)
2567 emit_bytes(0x0f, 0xae, 0x1c, 0x24);
2568 // pop %rcx
2569 emit_bytes(0x59);
2570 // test %cl, 0x1 // invalid
2571 emit_bytes(0xf6, 0xc1, 0x01);
2572 // jnz FP_ERROR_HANDLER
2573 emit_bytes(0x0f, 0x85);
2574 fix_branch(emit_branch_target32(), fpe_handler);
2575 }
2576
2577 void* emit_error_handler(void (*handler)()) {
2578 void* result = code;
2579 // andq $-16, %rsp;
2580 emit_bytes(0x48, 0x83, 0xe4, 0xf0);
2581 // movabsq &on_unreachable, %rax
2582 emit_bytes(0x48, 0xb8);
2583 emit_operand_ptr(handler);
2584 // callq *%rax
2585 emit_bytes(0xff, 0xd0);
2586 return result;
2587 }
2588
2589 void emit_align_stack() {
2590 // mov %rsp, rcx; andq $-16, %rsp; push rcx; push %rcx
2591 emit_bytes(0x48, 0x89, 0xe1);
2592 emit_bytes(0x48, 0x83, 0xe4, 0xf0);
2593 emit_bytes(0x51);
2594 emit_bytes(0x51);
2595 }
2596
2597 void emit_restore_stack() {
2598 // mov (%rsp), %rsp
2599 emit_bytes(0x48, 0x8b, 0x24, 0x24);
2600 }
2601
2602 void emit_host_call(uint32_t funcnum) {
2603 uint32_t extra = 0;
2604 if constexpr (Context::async_backtrace()) {
2605 // pushq %rbp
2606 emit_bytes(0x55);
2607 // movq %rsp, (%rdi)
2608 emit_bytes(0x48, 0x89, 0x27);
2609 extra = 8;
2610 }
2611 // mov $funcnum, %edx
2612 emit_bytes(0xba);
2613 emit_operand32(funcnum);
2614 // pushq %rdi
2615 emit_bytes(0x57);
2616 // pushq %rsi
2617 emit_bytes(0x56);
2618 // lea 24(%rsp), %rsi
2619 emit_bytes(0x48, 0x8d, 0x74, 0x24, 0x18 + extra);
2620 emit_align_stack();
2621 // movabsq $call_host_function, %rax
2622 emit_bytes(0x48, 0xb8);
2623 emit_operand_ptr(&call_host_function);
2624 // callq *%rax
2625 emit_bytes(0xff, 0xd0);
2626 emit_restore_stack();
2627 // popq %rsi
2628 emit_bytes(0x5e);
2629 // popq %rdi
2630 emit_bytes(0x5f);
2631 if constexpr (Context::async_backtrace()) {
2632 emit_restore_backtrace_basic();
2633 // popq %rbp
2634 emit_bytes(0x5d);
2635 }
2636 // retq
2637 emit_bytes(0xc3);
2638 }
2639
2640 // Needs to run before saving %rdi. Returns the number of bytes pushed onto the stack.
2641 uint32_t emit_setup_backtrace() {
2642 if constexpr (Context::async_backtrace()) {
2643 // callq next
2644 emit_bytes(0xe8);
2645 emit_operand32(0);
2646 // next:
2647 // pushq %rbp
2648 emit_bytes(0x55);
2649 // movq %rsp, (%rdi)
2650 emit_bytes(0x48, 0x89, 0x27);
2651 return 16;
2652 } else {
2653 return 0;
2654 }
2655 }
2656 // Does not adjust the stack pointer. Use this if the
2657 // stack pointer adjustment is combined with another instruction.
2658 void emit_restore_backtrace_basic() {
2659 if constexpr (Context::async_backtrace()) {
2660 // xorl %edx, %edx
2661 emit_bytes(0x31, 0xd2);
2662 // movq %rdx, (%rdi)
2663 emit_bytes(0x48, 0x89, 0x17);
2664 }
2665 }
2666 void emit_restore_backtrace() {
2667 if constexpr (Context::async_backtrace()) {
2668 emit_restore_backtrace_basic();
2669 // addq $16, %rsp
2670 emit_bytes(0x48, 0x83, 0xc4, 0x10);
2671 }
2672 }
2673
2674 bool is_host_function(uint32_t funcnum) { return funcnum < _mod.get_imported_functions_size(); }
2675
2676 static native_value call_host_function(Context* context /*rdi*/, native_value* stack /*rsi*/, uint32_t idx /*edx*/) {
2677 // It's currently unsafe to throw through a jit frame, because we don't set up
2678 // the exception tables for them.
2679 native_value result;
2681 result = context->call_host_function(stack, idx);
2682 });
2683 return result;
2684 }
2685
2686 static int32_t current_memory(Context* context /*rdi*/) {
2687 return context->current_linear_memory();
2688 }
2689
2690 static int32_t grow_memory(Context* context /*rdi*/, int32_t pages) {
2691 return context->grow_linear_memory(pages);
2692 }
2693
2694 static void on_unreachable() { vm::throw_<wasm_interpreter_exception>( "unreachable" ); }
2695 static void on_fp_error() { vm::throw_<wasm_interpreter_exception>( "floating point error" ); }
2696 static void on_call_indirect_error() { vm::throw_<wasm_interpreter_exception>( "call_indirect out of range" ); }
2697 static void on_type_error() { vm::throw_<wasm_interpreter_exception>( "call_indirect incorrect function type" ); }
2698 static void on_stack_overflow() { vm::throw_<wasm_interpreter_exception>( "stack overflow" ); }
2699 };
2700
2701}}
void reclaim(const T *ptr, size_t size=0)
void end_code(void *code_base)
void * emit_br_if(uint32_t depth_change)
Definition x86_64.hpp:201
void emit_call_indirect(const func_type &ft, uint32_t functypeidx)
Definition x86_64.hpp:328
void emit_i64_store8(uint32_t, uint32_t offset)
Definition x86_64.hpp:607
void emit_i64_load32_s(uint32_t, uint32_t offset)
Definition x86_64.hpp:547
void emit_call(const func_type &ft, uint32_t funcnum)
Definition x86_64.hpp:314
void emit_i32_load8_s(uint32_t, uint32_t offset)
Definition x86_64.hpp:511
void emit_f32_load(uint32_t, uint32_t offset)
Definition x86_64.hpp:499
void emit_f64_store(uint32_t, uint32_t offset)
Definition x86_64.hpp:589
void emit_prologue(const func_type &, const guarded_vector< local_entry > &locals, uint32_t funcnum)
Definition x86_64.hpp:103
void emit_get_global(uint32_t globalidx)
Definition x86_64.hpp:447
void emit_i64_load(uint32_t, uint32_t offset)
Definition x86_64.hpp:493
void emit_f64_const(double value)
Definition x86_64.hpp:696
void emit_i64_load16_s(uint32_t, uint32_t offset)
Definition x86_64.hpp:541
static void fix_branch64(void *branch, void *target)
Definition x86_64.hpp:2091
machine_code_writer(growable_allocator &alloc, std::size_t source_bytes, module &mod)
Definition x86_64.hpp:36
void emit_f64_load(uint32_t, uint32_t offset)
Definition x86_64.hpp:505
void finalize(function_body &body)
Definition x86_64.hpp:2096
void emit_i64_load16_u(uint32_t, uint32_t offset)
Definition x86_64.hpp:559
void emit_tee_local(uint32_t local_idx)
Definition x86_64.hpp:426
void emit_epilogue(const func_type &ft, const guarded_vector< local_entry > &locals, uint32_t)
Definition x86_64.hpp:148
const void * get_base_addr() const
Definition x86_64.hpp:2106
void * emit_br(uint32_t depth_change)
Definition x86_64.hpp:193
const void * get_addr() const
Definition x86_64.hpp:2102
void emit_i64_load8_s(uint32_t, uint32_t offset)
Definition x86_64.hpp:535
void emit_i64_store(uint32_t, uint32_t offset)
Definition x86_64.hpp:577
void emit_i32_load8_u(uint32_t, uint32_t offset)
Definition x86_64.hpp:523
void emit_f32_store(uint32_t, uint32_t offset)
Definition x86_64.hpp:583
void emit_i64_store16(uint32_t, uint32_t offset)
Definition x86_64.hpp:613
void emit_f32_const(float value)
Definition x86_64.hpp:688
static constexpr std::size_t max_prologue_size
Definition x86_64.hpp:101
void emit_set_global(uint32_t globalidx)
Definition x86_64.hpp:474
void * emit_else(void *if_loc)
Definition x86_64.hpp:187
void emit_i64_const(uint64_t value)
Definition x86_64.hpp:679
void emit_i32_load16_u(uint32_t, uint32_t offset)
Definition x86_64.hpp:529
void emit_i64_load8_u(uint32_t, uint32_t offset)
Definition x86_64.hpp:553
static constexpr std::size_t max_epilogue_size
Definition x86_64.hpp:102
void emit_set_local(uint32_t local_idx)
Definition x86_64.hpp:409
void emit_i64_store32(uint32_t, uint32_t offset)
Definition x86_64.hpp:619
void emit_i32_store8(uint32_t, uint32_t offset)
Definition x86_64.hpp:595
void start_function(void *func_start, uint32_t funcnum)
Definition x86_64.hpp:305
void emit_i32_store16(uint32_t, uint32_t offset)
Definition x86_64.hpp:601
void * emit_return(uint32_t depth_change)
Definition x86_64.hpp:171
void emit_i32_const(uint32_t value)
Definition x86_64.hpp:670
void register_call(void *ptr, uint32_t funcnum)
Definition x86_64.hpp:296
br_table_generator emit_br_table(uint32_t table_size)
Definition x86_64.hpp:287
void emit_i32_load(uint32_t, uint32_t offset)
Definition x86_64.hpp:487
void emit_i64_load32_u(uint32_t, uint32_t offset)
Definition x86_64.hpp:565
void emit_i32_store(uint32_t, uint32_t offset)
Definition x86_64.hpp:571
void emit_get_local(uint32_t local_idx)
Definition x86_64.hpp:380
static void fix_branch(void *branch, void *target)
Definition x86_64.hpp:2080
void emit_i32_load16_s(uint32_t, uint32_t offset)
Definition x86_64.hpp:517
native_value(*)(void *context, void *memory) fn_type
Definition x86_64.hpp:2095
int * count
const T & min(const T &a, const T &b)
Definition utility.hpp:140
void ignore_unused_variable_warning(T &...)
Definition utils.hpp:101
void longjmp_on_exception(F &&f)
Definition signals.hpp:62
void throw_(const char *msg)
Definition signals.hpp:84
constexpr bool use_softfloat
Definition config.hpp:16
variant< > opcode
Definition opcodes.hpp:79
#define value
Definition pkcs11.h:157
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
#define T(meth, val, expected)
sysio::client::http::http_context context
Definition main.cpp:200
double from_softfloat64(float64_t d)
float from_softfloat32(float32_t f)
_W64 unsigned int uintptr_t
Definition stdint.h:165
unsigned int uint32_t
Definition stdint.h:126
signed int int32_t
Definition stdint.h:123
unsigned char uint8_t
Definition stdint.h:124
unsigned __int64 uint64_t
Definition stdint.h:136
uint8_t return_count
Definition types.hpp:45
guarded_vector< value_type > param_types
Definition types.hpp:44
std::size_t jit_code_offset
Definition types.hpp:138
void * emit_default(uint32_t depth_change)
Definition x86_64.hpp:270
void * emit_case(uint32_t depth_change)
Definition x86_64.hpp:229
guarded_vector< uint32_t > functions
Definition types.hpp:171
guarded_vector< uint32_t > type_aliases
Definition types.hpp:185
guarded_vector< uint32_t > fast_functions
Definition types.hpp:186
guarded_vector< global_variable > globals
Definition types.hpp:174
uint32_t get_imported_functions_size() const
Definition types.hpp:197
guarded_vector< table_type > tables
Definition types.hpp:172
growable_allocator allocator
Definition types.hpp:167
guarded_vector< func_type > types
Definition types.hpp:169
guarded_vector< function_body > code
Definition types.hpp:177
#define SYS_VM_ASSERT(expr, exc_type, msg)
Definition exceptions.hpp:8
#define R
#define CHOOSE_FN(name)
Definition x86_64.hpp:976
Definition dtoa.c:306
CK_ULONG d
char * label
memcpy((char *) pInfo->slotDescription, s, l)