Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
f128_sqrt.c File Reference
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
Include dependency graph for f128_sqrt.c:

Go to the source code of this file.

Functions

float128_t f128_sqrt (float128_t a)
 

Function Documentation

◆ f128_sqrt()

float128_t f128_sqrt ( float128_t a)

Definition at line 44 of file f128_sqrt.c.

45{
46 union ui128_f128 uA;
47 uint_fast64_t uiA64, uiA0;
48 bool signA;
49 int_fast32_t expA;
50 struct uint128 sigA, uiZ;
51 struct exp32_sig128 normExpSig;
52 int_fast32_t expZ;
53 uint_fast32_t sig32A, recipSqrt32, sig32Z;
54 struct uint128 rem;
55 uint32_t qs[3];
57 uint_fast64_t x64, sig64Z;
58 struct uint128 y, term;
59 uint_fast64_t sigZExtra;
60 struct uint128 sigZ;
61 union ui128_f128 uZ;
62
63 /*------------------------------------------------------------------------
64 *------------------------------------------------------------------------*/
65 uA.f = a;
66 uiA64 = uA.ui.v64;
67 uiA0 = uA.ui.v0;
68 signA = signF128UI64( uiA64 );
69 expA = expF128UI64( uiA64 );
70 sigA.v64 = fracF128UI64( uiA64 );
71 sigA.v0 = uiA0;
72 /*------------------------------------------------------------------------
73 *------------------------------------------------------------------------*/
74 if ( expA == 0x7FFF ) {
75 if ( sigA.v64 | sigA.v0 ) {
76 uiZ = softfloat_propagateNaNF128UI( uiA64, uiA0, 0, 0 );
77 goto uiZ;
78 }
79 if ( ! signA ) return a;
80 goto invalid;
81 }
82 /*------------------------------------------------------------------------
83 *------------------------------------------------------------------------*/
84 if ( signA ) {
85 if ( ! (expA | sigA.v64 | sigA.v0) ) return a;
86 goto invalid;
87 }
88 /*------------------------------------------------------------------------
89 *------------------------------------------------------------------------*/
90 if ( ! expA ) {
91 if ( ! (sigA.v64 | sigA.v0) ) return a;
92 normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
93 expA = normExpSig.exp;
94 sigA = normExpSig.sig;
95 }
96 /*------------------------------------------------------------------------
97 | (`sig32Z' is guaranteed to be a lower bound on the square root of
98 | `sig32A', which makes `sig32Z' also a lower bound on the square root of
99 | `sigA'.)
100 *------------------------------------------------------------------------*/
101 expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
102 expA &= 1;
103 sigA.v64 |= UINT64_C( 0x0001000000000000 );
104 sig32A = sigA.v64>>17;
105 recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
106 sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
107 if ( expA ) {
108 sig32Z >>= 1;
109 rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 12 );
110 } else {
111 rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 13 );
112 }
113 qs[2] = sig32Z;
114 rem.v64 -= (uint_fast64_t) sig32Z * sig32Z;
115 /*------------------------------------------------------------------------
116 *------------------------------------------------------------------------*/
117 q = ((uint32_t) (rem.v64>>2) * (uint_fast64_t) recipSqrt32)>>32;
118 x64 = (uint_fast64_t) sig32Z<<32;
119 sig64Z = x64 + ((uint_fast64_t) q<<3);
120 y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
121 /*------------------------------------------------------------------------
122 | (Repeating this loop is a rare occurrence.)
123 *------------------------------------------------------------------------*/
124 for (;;) {
125 term = softfloat_mul64ByShifted32To128( x64 + sig64Z, q );
126 rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
127 if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
128 --q;
129 sig64Z -= 1<<3;
130 }
131 qs[1] = q;
132 /*------------------------------------------------------------------------
133 *------------------------------------------------------------------------*/
134 q = ((rem.v64>>2) * recipSqrt32)>>32;
135 y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
136 sig64Z <<= 1;
137 /*------------------------------------------------------------------------
138 | (Repeating this loop is a rare occurrence.)
139 *------------------------------------------------------------------------*/
140 for (;;) {
141 term = softfloat_shortShiftLeft128( 0, sig64Z, 32 );
142 term = softfloat_add128( term.v64, term.v0, 0, (uint_fast64_t) q<<6 );
143 term = softfloat_mul128By32( term.v64, term.v0, q );
144 rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
145 if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
146 --q;
147 }
148 qs[0] = q;
149 /*------------------------------------------------------------------------
150 *------------------------------------------------------------------------*/
151 q = (((rem.v64>>2) * recipSqrt32)>>32) + 2;
152 sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
153 term = softfloat_shortShiftLeft128( 0, qs[1], 53 );
154 sigZ =
156 (uint_fast64_t) qs[2]<<18, ((uint_fast64_t) qs[0]<<24) + (q>>5),
157 term.v64, term.v0
158 );
159 /*------------------------------------------------------------------------
160 *------------------------------------------------------------------------*/
161 if ( (q & 0xF) <= 2 ) {
162 q &= ~3;
163 sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
164 y = softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, 6 );
165 y.v0 |= sigZExtra>>58;
166 term = softfloat_sub128( y.v64, y.v0, 0, q );
167 y = softfloat_mul64ByShifted32To128( term.v0, q );
168 term = softfloat_mul64ByShifted32To128( term.v64, q );
169 term = softfloat_add128( term.v64, term.v0, 0, y.v64 );
170 rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 20 );
171 term = softfloat_sub128( term.v64, term.v0, rem.v64, rem.v0 );
172 /*--------------------------------------------------------------------
173 | The concatenation of `term' and `y.v0' is now the negative remainder
174 | (3 words altogether).
175 *--------------------------------------------------------------------*/
176 if ( term.v64 & UINT64_C( 0x8000000000000000 ) ) {
177 sigZExtra |= 1;
178 } else {
179 if ( term.v64 | term.v0 | y.v0 ) {
180 if ( sigZExtra ) {
181 --sigZExtra;
182 } else {
183 sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 );
184 sigZExtra = ~0;
185 }
186 }
187 }
188 }
189 return softfloat_roundPackToF128( 0, expZ, sigZ.v64, sigZ.v0, sigZExtra );
190 /*------------------------------------------------------------------------
191 *------------------------------------------------------------------------*/
192 invalid:
194 uiZ.v64 = defaultNaNF128UI64;
195 uiZ.v0 = defaultNaNF128UI0;
196 uiZ:
197 uZ.ui = uiZ;
198 return uZ.f;
199
200}
uint64_t y
Definition sha3.cpp:34
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0)
void softfloat_raiseFlags(uint_fast8_t flags)
#define defaultNaNF128UI64
Definition specialize.h:337
#define defaultNaNF128UI0
Definition specialize.h:339
uint32_t softfloat_approxRecipSqrt32_1(unsigned int oddExpA, uint32_t a)
@ softfloat_flag_invalid
Definition softfloat.h:89
struct uint128 softfloat_add128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0)
Definition s_add128.c:44
struct uint128 softfloat_mul128By32(uint64_t a64, uint64_t a0, uint32_t b)
struct uint128 softfloat_mul64ByShifted32To128(uint64_t a, uint32_t b)
struct exp32_sig128 softfloat_normSubnormalF128Sig(uint_fast64_t sig64, uint_fast64_t sig0)
float128_t softfloat_roundPackToF128(bool sign, int_fast32_t exp, uint_fast64_t sig64, uint_fast64_t sig0, uint_fast64_t sigExtra)
struct uint128 softfloat_shortShiftLeft128(uint64_t a64, uint64_t a0, uint_fast8_t dist)
struct uint128 softfloat_sub128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0)
Definition s_sub128.c:44
unsigned int uint32_t
Definition stdint.h:126
uint64_t uint_fast64_t
Definition stdint.h:157
#define UINT64_C(val)
Definition stdint.h:284
uint32_t uint_fast32_t
Definition stdint.h:156
int32_t int_fast32_t
Definition stdint.h:152
unsigned __int64 uint64_t
Definition stdint.h:136
Here is the call graph for this function: