gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fplib.cc
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2012-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Edmund Grimley Evans
38 * Thomas Grocutt
39 */
40 
41 #include <stdint.h>
42 
43 #include <cassert>
44 
45 #include "fplib.hh"
46 
47 namespace ArmISA
48 {
49 
50 #define FPLIB_RN 0
51 #define FPLIB_RP 1
52 #define FPLIB_RM 2
53 #define FPLIB_RZ 3
54 #define FPLIB_FZ 4
55 #define FPLIB_DN 8
56 #define FPLIB_AHP 16
57 
58 #define FPLIB_IDC 128 // Input Denormal
59 #define FPLIB_IXC 16 // Inexact
60 #define FPLIB_UFC 8 // Underflow
61 #define FPLIB_OFC 4 // Overflow
62 #define FPLIB_DZC 2 // Division by Zero
63 #define FPLIB_IOC 1 // Invalid Operation
64 
65 static inline uint16_t
66 lsl16(uint16_t x, uint32_t shift)
67 {
68  return shift < 16 ? x << shift : 0;
69 }
70 
71 static inline uint16_t
72 lsr16(uint16_t x, uint32_t shift)
73 {
74  return shift < 16 ? x >> shift : 0;
75 }
76 
77 static inline uint32_t
78 lsl32(uint32_t x, uint32_t shift)
79 {
80  return shift < 32 ? x << shift : 0;
81 }
82 
83 static inline uint32_t
84 lsr32(uint32_t x, uint32_t shift)
85 {
86  return shift < 32 ? x >> shift : 0;
87 }
88 
89 static inline uint64_t
90 lsl64(uint64_t x, uint32_t shift)
91 {
92  return shift < 64 ? x << shift : 0;
93 }
94 
95 static inline uint64_t
96 lsr64(uint64_t x, uint32_t shift)
97 {
98  return shift < 64 ? x >> shift : 0;
99 }
100 
101 static inline void
102 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103 {
104  if (shift == 0) {
105  *r1 = x1;
106  *r0 = x0;
107  } else if (shift < 64) {
108  *r1 = x1 << shift | x0 >> (64 - shift);
109  *r0 = x0 << shift;
110  } else if (shift < 128) {
111  *r1 = x0 << (shift - 64);
112  *r0 = 0;
113  } else {
114  *r1 = 0;
115  *r0 = 0;
116  }
117 }
118 
119 static inline void
120 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
121 {
122  if (shift == 0) {
123  *r1 = x1;
124  *r0 = x0;
125  } else if (shift < 64) {
126  *r0 = x0 >> shift | x1 << (64 - shift);
127  *r1 = x1 >> shift;
128  } else if (shift < 128) {
129  *r0 = x1 >> (shift - 64);
130  *r1 = 0;
131  } else {
132  *r0 = 0;
133  *r1 = 0;
134  }
135 }
136 
137 static inline void
138 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
139 {
140  uint32_t mask = ((uint32_t)1 << 31) - 1;
141  uint64_t a0 = a & mask;
142  uint64_t a1 = a >> 31 & mask;
143  uint64_t b0 = b & mask;
144  uint64_t b1 = b >> 31 & mask;
145  uint64_t p0 = a0 * b0;
146  uint64_t p2 = a1 * b1;
147  uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
148  uint64_t s0 = p0;
149  uint64_t s1 = (s0 >> 31) + p1;
150  uint64_t s2 = (s1 >> 31) + p2;
151  *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
152  *x1 = s2 >> 2;
153 }
154 
155 static inline
156 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
157 {
158  uint64_t t0 = (uint64_t)(uint32_t)a * b;
159  uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
160  *x0 = t1 << 32 | (uint32_t)t0;
161  *x1 = t1 >> 32;
162 }
163 
164 static inline void
165 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
166  uint64_t b1)
167 {
168  *x0 = a0 + b0;
169  *x1 = a1 + b1 + (*x0 < a0);
170 }
171 
172 static inline void
173 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
174  uint64_t b1)
175 {
176  *x0 = a0 - b0;
177  *x1 = a1 - b1 - (*x0 > a0);
178 }
179 
180 static inline int
181 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
182 {
183  return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
184 }
185 
186 static inline uint16_t
187 fp16_normalise(uint16_t mnt, int *exp)
188 {
189  int shift;
190 
191  if (!mnt) {
192  return 0;
193  }
194 
195  for (shift = 8; shift; shift >>= 1) {
196  if (!(mnt >> (16 - shift))) {
197  mnt <<= shift;
198  *exp -= shift;
199  }
200  }
201  return mnt;
202 }
203 
204 static inline uint32_t
205 fp32_normalise(uint32_t mnt, int *exp)
206 {
207  int shift;
208 
209  if (!mnt) {
210  return 0;
211  }
212 
213  for (shift = 16; shift; shift >>= 1) {
214  if (!(mnt >> (32 - shift))) {
215  mnt <<= shift;
216  *exp -= shift;
217  }
218  }
219  return mnt;
220 }
221 
222 static inline uint64_t
223 fp64_normalise(uint64_t mnt, int *exp)
224 {
225  int shift;
226 
227  if (!mnt) {
228  return 0;
229  }
230 
231  for (shift = 32; shift; shift >>= 1) {
232  if (!(mnt >> (64 - shift))) {
233  mnt <<= shift;
234  *exp -= shift;
235  }
236  }
237  return mnt;
238 }
239 
240 static inline void
241 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
242 {
243  uint64_t x0 = *mnt0;
244  uint64_t x1 = *mnt1;
245  int shift;
246 
247  if (!x0 && !x1) {
248  return;
249  }
250 
251  if (!x1) {
252  x1 = x0;
253  x0 = 0;
254  *exp -= 64;
255  }
256 
257  for (shift = 32; shift; shift >>= 1) {
258  if (!(x1 >> (64 - shift))) {
259  x1 = x1 << shift | x0 >> (64 - shift);
260  x0 <<= shift;
261  *exp -= shift;
262  }
263  }
264 
265  *mnt0 = x0;
266  *mnt1 = x1;
267 }
268 
269 static inline uint16_t
270 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
271 {
272  return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
273 }
274 
275 static inline uint32_t
276 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
277 {
278  return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
279 }
280 
281 static inline uint64_t
282 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
283 {
284  return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
285 }
286 
287 static inline uint16_t
288 fp16_zero(int sgn)
289 {
290  return fp16_pack(sgn, 0, 0);
291 }
292 
293 static inline uint32_t
294 fp32_zero(int sgn)
295 {
296  return fp32_pack(sgn, 0, 0);
297 }
298 
299 static inline uint64_t
300 fp64_zero(int sgn)
301 {
302  return fp64_pack(sgn, 0, 0);
303 }
304 
305 static inline uint16_t
307 {
308  return fp16_pack(sgn, 30, -1);
309 }
310 
311 static inline uint32_t
313 {
314  return fp32_pack(sgn, 254, -1);
315 }
316 
317 static inline uint64_t
319 {
320  return fp64_pack(sgn, 2046, -1);
321 }
322 
323 static inline uint16_t
325 {
326  return fp16_pack(sgn, 31, 0);
327 }
328 
329 static inline uint32_t
331 {
332  return fp32_pack(sgn, 255, 0);
333 }
334 
335 static inline uint64_t
337 {
338  return fp64_pack(sgn, 2047, 0);
339 }
340 
341 static inline uint16_t
343 {
344  return fp16_pack(0, 31, (uint16_t)1 << 9);
345 }
346 
347 static inline uint32_t
349 {
350  return fp32_pack(0, 255, (uint32_t)1 << 22);
351 }
352 
353 static inline uint64_t
355 {
356  return fp64_pack(0, 2047, (uint64_t)1 << 51);
357 }
358 
359 static inline void
360 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
361  int *flags)
362 {
363  *sgn = x >> 15;
364  *exp = x >> 10 & 31;
365  *mnt = x & (((uint16_t)1 << 10) - 1);
366 
367  // Handle subnormals:
368  if (*exp) {
369  *mnt |= (uint16_t)1 << 10;
370  } else {
371  ++*exp;
372  // There is no flush to zero in this case!
373  }
374 }
375 
376 static inline void
377 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
378  int *flags)
379 {
380  *sgn = x >> 31;
381  *exp = x >> 23 & 255;
382  *mnt = x & (((uint32_t)1 << 23) - 1);
383 
384  // Handle subnormals:
385  if (*exp) {
386  *mnt |= (uint32_t)1 << 23;
387  } else {
388  ++*exp;
389  if ((mode & FPLIB_FZ) && *mnt) {
390  *flags |= FPLIB_IDC;
391  *mnt = 0;
392  }
393  }
394 }
395 
396 static inline void
397 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
398  int *flags)
399 {
400  *sgn = x >> 63;
401  *exp = x >> 52 & 2047;
402  *mnt = x & (((uint64_t)1 << 52) - 1);
403 
404  // Handle subnormals:
405  if (*exp) {
406  *mnt |= (uint64_t)1 << 52;
407  } else {
408  ++*exp;
409  if ((mode & FPLIB_FZ) && *mnt) {
410  *flags |= FPLIB_IDC;
411  *mnt = 0;
412  }
413  }
414 }
415 
416 static inline uint32_t
417 fp32_process_NaN(uint32_t a, int mode, int *flags)
418 {
419  if (!(a >> 22 & 1)) {
420  *flags |= FPLIB_IOC;
421  a |= (uint32_t)1 << 22;
422  }
423  return mode & FPLIB_DN ? fp32_defaultNaN() : a;
424 }
425 
426 static inline uint64_t
427 fp64_process_NaN(uint64_t a, int mode, int *flags)
428 {
429  if (!(a >> 51 & 1)) {
430  *flags |= FPLIB_IOC;
431  a |= (uint64_t)1 << 51;
432  }
433  return mode & FPLIB_DN ? fp64_defaultNaN() : a;
434 }
435 
436 static uint32_t
437 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
438 {
439  int a_exp = a >> 23 & 255;
440  uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
441  int b_exp = b >> 23 & 255;
442  uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
443 
444  // Handle signalling NaNs:
445  if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
446  return fp32_process_NaN(a, mode, flags);
447  if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
448  return fp32_process_NaN(b, mode, flags);
449 
450  // Handle quiet NaNs:
451  if (a_exp == 255 && a_mnt)
452  return fp32_process_NaN(a, mode, flags);
453  if (b_exp == 255 && b_mnt)
454  return fp32_process_NaN(b, mode, flags);
455 
456  return 0;
457 }
458 
459 static uint64_t
460 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
461 {
462  int a_exp = a >> 52 & 2047;
463  uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
464  int b_exp = b >> 52 & 2047;
465  uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
466 
467  // Handle signalling NaNs:
468  if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
469  return fp64_process_NaN(a, mode, flags);
470  if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
471  return fp64_process_NaN(b, mode, flags);
472 
473  // Handle quiet NaNs:
474  if (a_exp == 2047 && a_mnt)
475  return fp64_process_NaN(a, mode, flags);
476  if (b_exp == 2047 && b_mnt)
477  return fp64_process_NaN(b, mode, flags);
478 
479  return 0;
480 }
481 
482 static uint32_t
483 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
484 {
485  int a_exp = a >> 23 & 255;
486  uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
487  int b_exp = b >> 23 & 255;
488  uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
489  int c_exp = c >> 23 & 255;
490  uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
491 
492  // Handle signalling NaNs:
493  if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
494  return fp32_process_NaN(a, mode, flags);
495  if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
496  return fp32_process_NaN(b, mode, flags);
497  if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
498  return fp32_process_NaN(c, mode, flags);
499 
500  // Handle quiet NaNs:
501  if (a_exp == 255 && a_mnt)
502  return fp32_process_NaN(a, mode, flags);
503  if (b_exp == 255 && b_mnt)
504  return fp32_process_NaN(b, mode, flags);
505  if (c_exp == 255 && c_mnt)
506  return fp32_process_NaN(c, mode, flags);
507 
508  return 0;
509 }
510 
511 static uint64_t
512 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
513 {
514  int a_exp = a >> 52 & 2047;
515  uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
516  int b_exp = b >> 52 & 2047;
517  uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
518  int c_exp = c >> 52 & 2047;
519  uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
520 
521  // Handle signalling NaNs:
522  if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
523  return fp64_process_NaN(a, mode, flags);
524  if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
525  return fp64_process_NaN(b, mode, flags);
526  if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
527  return fp64_process_NaN(c, mode, flags);
528 
529  // Handle quiet NaNs:
530  if (a_exp == 2047 && a_mnt)
531  return fp64_process_NaN(a, mode, flags);
532  if (b_exp == 2047 && b_mnt)
533  return fp64_process_NaN(b, mode, flags);
534  if (c_exp == 2047 && c_mnt)
535  return fp64_process_NaN(c, mode, flags);
536 
537  return 0;
538 }
539 
540 static uint16_t
541 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
542 {
543  int biased_exp; // non-negative exponent value for result
544  uint16_t int_mant; // mantissa for result, less than (1 << 11)
545  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
546 
547  assert(rm != FPRounding_TIEAWAY);
548 
549  // There is no flush to zero in this case!
550 
551  // The bottom 5 bits of mnt are orred together:
552  mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
553 
554  if (exp > 0) {
555  biased_exp = exp;
556  int_mant = mnt >> 2;
557  error = mnt & 3;
558  } else {
559  biased_exp = 0;
560  int_mant = lsr16(mnt, 3 - exp);
561  error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
562  }
563 
564  if (!biased_exp && error) { // xx should also check fpscr_val<11>
565  *flags |= FPLIB_UFC;
566  }
567 
568  // Round up:
569  if ((rm == FPLIB_RN && (error == 3 ||
570  (error == 2 && (int_mant & 1)))) ||
571  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
572  ++int_mant;
573  if (int_mant == (uint32_t)1 << 10) {
574  // Rounded up from denormalized to normalized
575  biased_exp = 1;
576  }
577  if (int_mant == (uint32_t)1 << 11) {
578  // Rounded up to next exponent
579  ++biased_exp;
580  int_mant >>= 1;
581  }
582  }
583 
584  // Handle rounding to odd aka Von Neumann rounding:
585  if (error && rm == FPRounding_ODD)
586  int_mant |= 1;
587 
588  // Handle overflow:
589  if (!(mode & FPLIB_AHP)) {
590  if (biased_exp >= 31) {
591  *flags |= FPLIB_OFC | FPLIB_IXC;
592  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
593  (rm == FPLIB_RM && sgn)) {
594  return fp16_infinity(sgn);
595  } else {
596  return fp16_max_normal(sgn);
597  }
598  }
599  } else {
600  if (biased_exp >= 32) {
601  *flags |= FPLIB_IOC;
602  return fp16_pack(sgn, 31, -1);
603  }
604  }
605 
606  if (error) {
607  *flags |= FPLIB_IXC;
608  }
609 
610  return fp16_pack(sgn, biased_exp, int_mant);
611 }
612 
613 static uint32_t
614 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
615 {
616  int biased_exp; // non-negative exponent value for result
617  uint32_t int_mant; // mantissa for result, less than (1 << 24)
618  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
619 
620  assert(rm != FPRounding_TIEAWAY);
621 
622  // Flush to zero:
623  if ((mode & FPLIB_FZ) && exp < 1) {
624  *flags |= FPLIB_UFC;
625  return fp32_zero(sgn);
626  }
627 
628  // The bottom 8 bits of mnt are orred together:
629  mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
630 
631  if (exp > 0) {
632  biased_exp = exp;
633  int_mant = mnt >> 2;
634  error = mnt & 3;
635  } else {
636  biased_exp = 0;
637  int_mant = lsr32(mnt, 3 - exp);
638  error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
639  }
640 
641  if (!biased_exp && error) { // xx should also check fpscr_val<11>
642  *flags |= FPLIB_UFC;
643  }
644 
645  // Round up:
646  if ((rm == FPLIB_RN && (error == 3 ||
647  (error == 2 && (int_mant & 1)))) ||
648  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
649  ++int_mant;
650  if (int_mant == (uint32_t)1 << 23) {
651  // Rounded up from denormalized to normalized
652  biased_exp = 1;
653  }
654  if (int_mant == (uint32_t)1 << 24) {
655  // Rounded up to next exponent
656  ++biased_exp;
657  int_mant >>= 1;
658  }
659  }
660 
661  // Handle rounding to odd aka Von Neumann rounding:
662  if (error && rm == FPRounding_ODD)
663  int_mant |= 1;
664 
665  // Handle overflow:
666  if (biased_exp >= 255) {
667  *flags |= FPLIB_OFC | FPLIB_IXC;
668  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
669  (rm == FPLIB_RM && sgn)) {
670  return fp32_infinity(sgn);
671  } else {
672  return fp32_max_normal(sgn);
673  }
674  }
675 
676  if (error) {
677  *flags |= FPLIB_IXC;
678  }
679 
680  return fp32_pack(sgn, biased_exp, int_mant);
681 }
682 
683 static uint32_t
684 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
685 {
686  return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
687 }
688 
689 static uint64_t
690 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
691 {
692  int biased_exp; // non-negative exponent value for result
693  uint64_t int_mant; // mantissa for result, less than (1 << 52)
694  int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
695 
696  assert(rm != FPRounding_TIEAWAY);
697 
698  // Flush to zero:
699  if ((mode & FPLIB_FZ) && exp < 1) {
700  *flags |= FPLIB_UFC;
701  return fp64_zero(sgn);
702  }
703 
704  // The bottom 11 bits of mnt are orred together:
705  mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
706 
707  if (exp > 0) {
708  biased_exp = exp;
709  int_mant = mnt >> 2;
710  error = mnt & 3;
711  } else {
712  biased_exp = 0;
713  int_mant = lsr64(mnt, 3 - exp);
714  error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
715  }
716 
717  if (!biased_exp && error) { // xx should also check fpscr_val<11>
718  *flags |= FPLIB_UFC;
719  }
720 
721  // Round up:
722  if ((rm == FPLIB_RN && (error == 3 ||
723  (error == 2 && (int_mant & 1)))) ||
724  (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
725  ++int_mant;
726  if (int_mant == (uint64_t)1 << 52) {
727  // Rounded up from denormalized to normalized
728  biased_exp = 1;
729  }
730  if (int_mant == (uint64_t)1 << 53) {
731  // Rounded up to next exponent
732  ++biased_exp;
733  int_mant >>= 1;
734  }
735  }
736 
737  // Handle rounding to odd aka Von Neumann rounding:
738  if (error && rm == FPRounding_ODD)
739  int_mant |= 1;
740 
741  // Handle overflow:
742  if (biased_exp >= 2047) {
743  *flags |= FPLIB_OFC | FPLIB_IXC;
744  if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
745  (rm == FPLIB_RM && sgn)) {
746  return fp64_infinity(sgn);
747  } else {
748  return fp64_max_normal(sgn);
749  }
750  }
751 
752  if (error) {
753  *flags |= FPLIB_IXC;
754  }
755 
756  return fp64_pack(sgn, biased_exp, int_mant);
757 }
758 
759 static uint64_t
760 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
761 {
762  return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
763 }
764 
765 static int
766 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
767 {
768  int a_sgn, a_exp, b_sgn, b_exp;
769  uint32_t a_mnt, b_mnt;
770 
771  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
772  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
773 
774  if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
775  (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
776  if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
777  (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
778  *flags |= FPLIB_IOC;
779  return 0;
780  }
781  return a == b || (!a_mnt && !b_mnt);
782 }
783 
784 static int
785 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
786 {
787  int a_sgn, a_exp, b_sgn, b_exp;
788  uint32_t a_mnt, b_mnt;
789 
790  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
791  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
792 
793  if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
794  (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
795  *flags |= FPLIB_IOC;
796  return 0;
797  }
798  if (!a_mnt && !b_mnt)
799  return 1;
800  if (a_sgn != b_sgn)
801  return b_sgn;
802  if (a_exp != b_exp)
803  return a_sgn ^ (a_exp > b_exp);
804  if (a_mnt != b_mnt)
805  return a_sgn ^ (a_mnt > b_mnt);
806  return 1;
807 }
808 
809 static int
810 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
811 {
812  int a_sgn, a_exp, b_sgn, b_exp;
813  uint32_t a_mnt, b_mnt;
814 
815  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
816  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
817 
818  if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
819  (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
820  *flags |= FPLIB_IOC;
821  return 0;
822  }
823  if (!a_mnt && !b_mnt)
824  return 0;
825  if (a_sgn != b_sgn)
826  return b_sgn;
827  if (a_exp != b_exp)
828  return a_sgn ^ (a_exp > b_exp);
829  if (a_mnt != b_mnt)
830  return a_sgn ^ (a_mnt > b_mnt);
831  return 0;
832 }
833 
834 static int
835 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
836 {
837  int a_sgn, a_exp, b_sgn, b_exp;
838  uint64_t a_mnt, b_mnt;
839 
840  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
841  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
842 
843  if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
844  (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
845  if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
846  (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
847  *flags |= FPLIB_IOC;
848  return 0;
849  }
850  return a == b || (!a_mnt && !b_mnt);
851 }
852 
853 static int
854 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
855 {
856  int a_sgn, a_exp, b_sgn, b_exp;
857  uint64_t a_mnt, b_mnt;
858 
859  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
860  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
861 
862  if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
863  (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
864  *flags |= FPLIB_IOC;
865  return 0;
866  }
867  if (!a_mnt && !b_mnt)
868  return 1;
869  if (a_sgn != b_sgn)
870  return b_sgn;
871  if (a_exp != b_exp)
872  return a_sgn ^ (a_exp > b_exp);
873  if (a_mnt != b_mnt)
874  return a_sgn ^ (a_mnt > b_mnt);
875  return 1;
876 }
877 
878 static int
879 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
880 {
881  int a_sgn, a_exp, b_sgn, b_exp;
882  uint64_t a_mnt, b_mnt;
883 
884  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
885  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
886 
887  if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
888  (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
889  *flags |= FPLIB_IOC;
890  return 0;
891  }
892  if (!a_mnt && !b_mnt)
893  return 0;
894  if (a_sgn != b_sgn)
895  return b_sgn;
896  if (a_exp != b_exp)
897  return a_sgn ^ (a_exp > b_exp);
898  if (a_mnt != b_mnt)
899  return a_sgn ^ (a_mnt > b_mnt);
900  return 0;
901 }
902 
903 static uint32_t
904 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
905 {
906  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
907  uint32_t a_mnt, b_mnt, x, x_mnt;
908 
909  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
910  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
911 
912  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
913  return x;
914  }
915 
916  b_sgn ^= neg;
917 
918  // Handle infinities and zeroes:
919  if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
920  *flags |= FPLIB_IOC;
921  return fp32_defaultNaN();
922  } else if (a_exp == 255) {
923  return fp32_infinity(a_sgn);
924  } else if (b_exp == 255) {
925  return fp32_infinity(b_sgn);
926  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
927  return fp32_zero(a_sgn);
928  }
929 
930  a_mnt <<= 3;
931  b_mnt <<= 3;
932  if (a_exp >= b_exp) {
933  b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
934  !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
935  b_exp = a_exp;
936  } else {
937  a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
938  !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
939  a_exp = b_exp;
940  }
941  x_sgn = a_sgn;
942  x_exp = a_exp;
943  if (a_sgn == b_sgn) {
944  x_mnt = a_mnt + b_mnt;
945  } else if (a_mnt >= b_mnt) {
946  x_mnt = a_mnt - b_mnt;
947  } else {
948  x_sgn ^= 1;
949  x_mnt = b_mnt - a_mnt;
950  }
951 
952  if (!x_mnt) {
953  // Sign of exact zero result depends on rounding mode
954  return fp32_zero((mode & 3) == 2);
955  }
956 
957  x_mnt = fp32_normalise(x_mnt, &x_exp);
958 
959  return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
960 }
961 
962 static uint64_t
963 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
964 {
965  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
966  uint64_t a_mnt, b_mnt, x, x_mnt;
967 
968  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970 
971  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
972  return x;
973  }
974 
975  b_sgn ^= neg;
976 
977  // Handle infinities and zeroes:
978  if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
979  *flags |= FPLIB_IOC;
980  return fp64_defaultNaN();
981  } else if (a_exp == 2047) {
982  return fp64_infinity(a_sgn);
983  } else if (b_exp == 2047) {
984  return fp64_infinity(b_sgn);
985  } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
986  return fp64_zero(a_sgn);
987  }
988 
989  a_mnt <<= 3;
990  b_mnt <<= 3;
991  if (a_exp >= b_exp) {
992  b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
993  !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
994  b_exp = a_exp;
995  } else {
996  a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
997  !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
998  a_exp = b_exp;
999  }
1000  x_sgn = a_sgn;
1001  x_exp = a_exp;
1002  if (a_sgn == b_sgn) {
1003  x_mnt = a_mnt + b_mnt;
1004  } else if (a_mnt >= b_mnt) {
1005  x_mnt = a_mnt - b_mnt;
1006  } else {
1007  x_sgn ^= 1;
1008  x_mnt = b_mnt - a_mnt;
1009  }
1010 
1011  if (!x_mnt) {
1012  // Sign of exact zero result depends on rounding mode
1013  return fp64_zero((mode & 3) == 2);
1014  }
1015 
1016  x_mnt = fp64_normalise(x_mnt, &x_exp);
1017 
1018  return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1019 }
1020 
1021 static uint32_t
1022 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1023 {
1024  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1025  uint32_t a_mnt, b_mnt, x;
1026  uint64_t x_mnt;
1027 
1028  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1029  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1030 
1031  if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1032  return x;
1033  }
1034 
1035  // Handle infinities and zeroes:
1036  if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1037  *flags |= FPLIB_IOC;
1038  return fp32_defaultNaN();
1039  } else if (a_exp == 255 || b_exp == 255) {
1040  return fp32_infinity(a_sgn ^ b_sgn);
1041  } else if (!a_mnt || !b_mnt) {
1042  return fp32_zero(a_sgn ^ b_sgn);
1043  }
1044 
1045  // Multiply and normalise:
1046  x_sgn = a_sgn ^ b_sgn;
1047  x_exp = a_exp + b_exp - 110;
1048  x_mnt = (uint64_t)a_mnt * b_mnt;
1049  x_mnt = fp64_normalise(x_mnt, &x_exp);
1050 
1051  // Convert to 32 bits, collapsing error into bottom bit:
1052  x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1053 
1054  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1055 }
1056 
1057 static uint64_t
1058 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1059 {
1060  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1061  uint64_t a_mnt, b_mnt, x;
1062  uint64_t x0_mnt, x1_mnt;
1063 
1064  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1065  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1066 
1067  if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1068  return x;
1069  }
1070 
1071  // Handle infinities and zeroes:
1072  if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1073  *flags |= FPLIB_IOC;
1074  return fp64_defaultNaN();
1075  } else if (a_exp == 2047 || b_exp == 2047) {
1076  return fp64_infinity(a_sgn ^ b_sgn);
1077  } else if (!a_mnt || !b_mnt) {
1078  return fp64_zero(a_sgn ^ b_sgn);
1079  }
1080 
1081  // Multiply and normalise:
1082  x_sgn = a_sgn ^ b_sgn;
1083  x_exp = a_exp + b_exp - 1000;
1084  mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1085  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1086 
1087  // Convert to 64 bits, collapsing error into bottom bit:
1088  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1089 
1090  return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1091 }
1092 
1093 static uint32_t
1094 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1095  int mode, int *flags)
1096 {
1097  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1098  uint32_t a_mnt, b_mnt, c_mnt, x;
1099  uint64_t x_mnt, y_mnt;
1100 
1101  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1102  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1103  fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1104 
1105  x = fp32_process_NaNs3(a, b, c, mode, flags);
1106 
1107  // Quiet NaN added to product of zero and infinity:
1108  if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1109  ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1110  (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1111  x = fp32_defaultNaN();
1112  *flags |= FPLIB_IOC;
1113  }
1114 
1115  if (x) {
1116  return x;
1117  }
1118 
1119  // Handle infinities and zeroes:
1120  if ((b_exp == 255 && !c_mnt) ||
1121  (c_exp == 255 && !b_mnt) ||
1122  (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1123  (a_sgn != (b_sgn ^ c_sgn)))) {
1124  *flags |= FPLIB_IOC;
1125  return fp32_defaultNaN();
1126  }
1127  if (a_exp == 255)
1128  return fp32_infinity(a_sgn);
1129  if (b_exp == 255 || c_exp == 255)
1130  return fp32_infinity(b_sgn ^ c_sgn);
1131  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1132  return fp32_zero(a_sgn);
1133 
1134  x_sgn = a_sgn;
1135  x_exp = a_exp + 13;
1136  x_mnt = (uint64_t)a_mnt << 27;
1137 
1138  // Multiply:
1139  y_sgn = b_sgn ^ c_sgn;
1140  y_exp = b_exp + c_exp - 113;
1141  y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1142  if (!y_mnt) {
1143  y_exp = x_exp;
1144  }
1145 
1146  // Add:
1147  if (x_exp >= y_exp) {
1148  y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1149  !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1150  y_exp = x_exp;
1151  } else {
1152  x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1153  !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1154  x_exp = y_exp;
1155  }
1156  if (x_sgn == y_sgn) {
1157  x_mnt = x_mnt + y_mnt;
1158  } else if (x_mnt >= y_mnt) {
1159  x_mnt = x_mnt - y_mnt;
1160  } else {
1161  x_sgn ^= 1;
1162  x_mnt = y_mnt - x_mnt;
1163  }
1164 
1165  if (!x_mnt) {
1166  // Sign of exact zero result depends on rounding mode
1167  return fp32_zero((mode & 3) == 2);
1168  }
1169 
1170  // Normalise and convert to 32 bits, collapsing error into bottom bit:
1171  x_mnt = fp64_normalise(x_mnt, &x_exp);
1172  x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1173 
1174  return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1175 }
1176 
1177 static uint64_t
1178 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1179  int mode, int *flags)
1180 {
1181  int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1182  uint64_t a_mnt, b_mnt, c_mnt, x;
1183  uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1184 
1185  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1186  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1187  fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1188 
1189  x = fp64_process_NaNs3(a, b, c, mode, flags);
1190 
1191  // Quiet NaN added to product of zero and infinity:
1192  if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1193  ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1194  (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1195  x = fp64_defaultNaN();
1196  *flags |= FPLIB_IOC;
1197  }
1198 
1199  if (x) {
1200  return x;
1201  }
1202 
1203  // Handle infinities and zeroes:
1204  if ((b_exp == 2047 && !c_mnt) ||
1205  (c_exp == 2047 && !b_mnt) ||
1206  (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1207  (a_sgn != (b_sgn ^ c_sgn)))) {
1208  *flags |= FPLIB_IOC;
1209  return fp64_defaultNaN();
1210  }
1211  if (a_exp == 2047)
1212  return fp64_infinity(a_sgn);
1213  if (b_exp == 2047 || c_exp == 2047)
1214  return fp64_infinity(b_sgn ^ c_sgn);
1215  if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1216  return fp64_zero(a_sgn);
1217 
1218  x_sgn = a_sgn;
1219  x_exp = a_exp + 11;
1220  x0_mnt = 0;
1221  x1_mnt = a_mnt;
1222 
1223  // Multiply:
1224  y_sgn = b_sgn ^ c_sgn;
1225  y_exp = b_exp + c_exp - 1003;
1226  mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1227  if (!y0_mnt && !y1_mnt) {
1228  y_exp = x_exp;
1229  }
1230 
1231  // Add:
1232  if (x_exp >= y_exp) {
1233  uint64_t t0, t1;
1234  lsl128(&t0, &t1, y0_mnt, y1_mnt,
1235  x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1236  lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1237  y0_mnt |= !!(t0 | t1);
1238  y_exp = x_exp;
1239  } else {
1240  uint64_t t0, t1;
1241  lsl128(&t0, &t1, x0_mnt, x1_mnt,
1242  y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1243  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1244  x0_mnt |= !!(t0 | t1);
1245  x_exp = y_exp;
1246  }
1247  if (x_sgn == y_sgn) {
1248  add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1249  } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1250  sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1251  } else {
1252  x_sgn ^= 1;
1253  sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1254  }
1255 
1256  if (!x0_mnt && !x1_mnt) {
1257  // Sign of exact zero result depends on rounding mode
1258  return fp64_zero((mode & 3) == 2);
1259  }
1260 
1261  // Normalise and convert to 64 bits, collapsing error into bottom bit:
1262  fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1263  x0_mnt = x1_mnt << 1 | !!x0_mnt;
1264 
1265  return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1266 }
1267 
1268 static uint32_t
1269 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1270 {
1271  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272  uint32_t a_mnt, b_mnt, x;
1273  uint64_t x_mnt;
1274 
1275  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1276  fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1277 
1278  if ((x = fp32_process_NaNs(a, b, mode, flags)))
1279  return x;
1280 
1281  // Handle infinities and zeroes:
1282  if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1283  *flags |= FPLIB_IOC;
1284  return fp32_defaultNaN();
1285  }
1286  if (a_exp == 255 || !b_mnt) {
1287  if (a_exp != 255)
1288  *flags |= FPLIB_DZC;
1289  return fp32_infinity(a_sgn ^ b_sgn);
1290  }
1291  if (!a_mnt || b_exp == 255)
1292  return fp32_zero(a_sgn ^ b_sgn);
1293 
1294  // Divide, setting bottom bit if inexact:
1295  a_mnt = fp32_normalise(a_mnt, &a_exp);
1296  x_sgn = a_sgn ^ b_sgn;
1297  x_exp = a_exp - b_exp + 172;
1298  x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1299  x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1300 
1301  // Normalise and convert to 32 bits, collapsing error into bottom bit:
1302  x_mnt = fp64_normalise(x_mnt, &x_exp);
1303  x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1304 
1305  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1306 }
1307 
1308 static uint64_t
1309 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1310 {
1311  int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1312  uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1313 
1314  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1315  fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1316 
1317  if ((x = fp64_process_NaNs(a, b, mode, flags)))
1318  return x;
1319 
1320  // Handle infinities and zeroes:
1321  if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1322  *flags |= FPLIB_IOC;
1323  return fp64_defaultNaN();
1324  }
1325  if (a_exp == 2047 || !b_mnt) {
1326  if (a_exp != 2047)
1327  *flags |= FPLIB_DZC;
1328  return fp64_infinity(a_sgn ^ b_sgn);
1329  }
1330  if (!a_mnt || b_exp == 2047)
1331  return fp64_zero(a_sgn ^ b_sgn);
1332 
1333  // Find reciprocal of divisor with Newton-Raphson:
1334  a_mnt = fp64_normalise(a_mnt, &a_exp);
1335  b_mnt = fp64_normalise(b_mnt, &b_exp);
1336  x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1337  mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1338  sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1339  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1340  mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1341  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1342 
1343  // Multiply by dividend:
1344  x_sgn = a_sgn ^ b_sgn;
1345  x_exp = a_exp - b_exp + 1031;
1346  mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1347  lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1348  x_mnt = x1_mnt;
1349 
1350  // This is an underestimate, so try adding one:
1351  mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1352  c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1353  if (c <= 0) {
1354  ++x_mnt;
1355  }
1356 
1357  x_mnt = fp64_normalise(x_mnt, &x_exp);
1358 
1359  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1360 }
1361 
1362 static void
1363 set_fpscr0(FPSCR &fpscr, int flags)
1364 {
1365  if (flags & FPLIB_IDC) {
1366  fpscr.idc = 1;
1367  }
1368  if (flags & FPLIB_IOC) {
1369  fpscr.ioc = 1;
1370  }
1371  if (flags & FPLIB_DZC) {
1372  fpscr.dzc = 1;
1373  }
1374  if (flags & FPLIB_OFC) {
1375  fpscr.ofc = 1;
1376  }
1377  if (flags & FPLIB_UFC) {
1378  fpscr.ufc = 1;
1379  }
1380  if (flags & FPLIB_IXC) {
1381  fpscr.ixc = 1;
1382  }
1383 }
1384 
1385 static uint32_t
1386 fp32_sqrt(uint32_t a, int mode, int *flags)
1387 {
1388  int a_sgn, a_exp, x_sgn, x_exp;
1389  uint32_t a_mnt, x, x_mnt;
1390  uint64_t t0, t1;
1391 
1392  fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1393 
1394  // Handle NaNs:
1395  if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1396  return fp32_process_NaN(a, mode, flags);
1397 
1398  // Handle infinities and zeroes:
1399  if (!a_mnt) {
1400  return fp32_zero(a_sgn);
1401  }
1402  if (a_exp == 255 && !a_sgn) {
1403  return fp32_infinity(a_sgn);
1404  }
1405  if (a_sgn) {
1406  *flags |= FPLIB_IOC;
1407  return fp32_defaultNaN();
1408  }
1409 
1410  a_mnt = fp32_normalise(a_mnt, &a_exp);
1411  if (!(a_exp & 1)) {
1412  ++a_exp;
1413  a_mnt >>= 1;
1414  }
1415 
1416  // x = (a * 3 + 5) / 8
1417  x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1418 
1419  // x = (a / x + x) / 2; // 16-bit accuracy
1420  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1421 
1422  // x = (a / x + x) / 2; // 16-bit accuracy
1423  x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1424 
1425  // x = (a / x + x) / 2; // 32-bit accuracy
1426  x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1427 
1428  x_sgn = 0;
1429  x_exp = (a_exp + 147) >> 1;
1430  x_mnt = ((x - (1 << 5)) >> 6) + 1;
1431  t1 = (uint64_t)x_mnt * x_mnt;
1432  t0 = (uint64_t)a_mnt << 19;
1433  if (t1 > t0) {
1434  --x_mnt;
1435  }
1436 
1437  x_mnt = fp32_normalise(x_mnt, &x_exp);
1438 
1439  return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1440 }
1441 
1442 static uint64_t
1443 fp64_sqrt(uint64_t a, int mode, int *flags)
1444 {
1445  int a_sgn, a_exp, x_sgn, x_exp, c;
1446  uint64_t a_mnt, x_mnt, r, x0, x1;
1447  uint32_t x;
1448 
1449  fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1450 
1451  // Handle NaNs:
1452  if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1453  return fp64_process_NaN(a, mode, flags);
1454  }
1455 
1456  // Handle infinities and zeroes:
1457  if (!a_mnt)
1458  return fp64_zero(a_sgn);
1459  if (a_exp == 2047 && !a_sgn)
1460  return fp64_infinity(a_sgn);
1461  if (a_sgn) {
1462  *flags |= FPLIB_IOC;
1463  return fp64_defaultNaN();
1464  }
1465 
1466  a_mnt = fp64_normalise(a_mnt, &a_exp);
1467  if (a_exp & 1) {
1468  ++a_exp;
1469  a_mnt >>= 1;
1470  }
1471 
1472  // x = (a * 3 + 5) / 8
1473  x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1474 
1475  // x = (a / x + x) / 2; // 16-bit accuracy
1476  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1477 
1478  // x = (a / x + x) / 2; // 16-bit accuracy
1479  x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1480 
1481  // x = (a / x + x) / 2; // 32-bit accuracy
1482  x = ((a_mnt / x) >> 2) + (x >> 1);
1483 
1484  // r = 1 / x; // 32-bit accuracy
1485  r = ((uint64_t)1 << 62) / x;
1486 
1487  // r = r * (2 - x * r); // 64-bit accuracy
1488  mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1489  lsr128(&x0, &x1, x0, x1, 31);
1490 
1491  // x = (x + a * r) / 2; // 64-bit accuracy
1492  mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1493  lsl128(&x0, &x1, x0, x1, 5);
1494  lsr128(&x0, &x1, x0, x1, 56);
1495 
1496  x0 = ((uint64_t)x << 31) + (x0 >> 1);
1497 
1498  x_sgn = 0;
1499  x_exp = (a_exp + 1053) >> 1;
1500  x_mnt = x0;
1501  x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1502  mul62x62(&x0, &x1, x_mnt, x_mnt);
1503  lsl128(&x0, &x1, x0, x1, 19);
1504  c = cmp128(x0, x1, 0, a_mnt);
1505  if (c > 0)
1506  --x_mnt;
1507 
1508  x_mnt = fp64_normalise(x_mnt, &x_exp);
1509 
1510  return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1511 }
1512 
1513 static int
1514 modeConv(FPSCR fpscr)
1515 {
1516  return (((int) fpscr) >> 22) & 0xF;
1517 }
1518 
1519 static void
1520 set_fpscr(FPSCR &fpscr, int flags)
1521 {
1522  // translate back to FPSCR
1523  bool underflow = false;
1524  if (flags & FPLIB_IDC) {
1525  fpscr.idc = 1;
1526  }
1527  if (flags & FPLIB_IOC) {
1528  fpscr.ioc = 1;
1529  }
1530  if (flags & FPLIB_DZC) {
1531  fpscr.dzc = 1;
1532  }
1533  if (flags & FPLIB_OFC) {
1534  fpscr.ofc = 1;
1535  }
1536  if (flags & FPLIB_UFC) {
1537  underflow = true; //xx Why is this required?
1538  fpscr.ufc = 1;
1539  }
1540  if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1541  fpscr.ixc = 1;
1542  }
1543 }
1544 
1545 template <>
1546 bool
1547 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1548 {
1549  int flags = 0;
1550  int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1551  set_fpscr(fpscr, flags);
1552  return x;
1553 }
1554 
1555 template <>
1556 bool
1557 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1558 {
1559  int flags = 0;
1560  int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1561  set_fpscr(fpscr, flags);
1562  return x;
1563 }
1564 
1565 template <>
1566 bool
1567 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1568 {
1569  int flags = 0;
1570  int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1571  set_fpscr(fpscr, flags);
1572  return x;
1573 }
1574 
1575 template <>
1576 bool
1577 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1578 {
1579  int flags = 0;
1580  int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1581  set_fpscr(fpscr, flags);
1582  return x;
1583 }
1584 
1585 template <>
1586 bool
1587 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1588 {
1589  int flags = 0;
1590  int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1591  set_fpscr(fpscr, flags);
1592  return x;
1593 }
1594 
1595 template <>
1596 bool
1597 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1598 {
1599  int flags = 0;
1600  int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1601  set_fpscr(fpscr, flags);
1602  return x;
1603 }
1604 
1605 template <>
1606 uint32_t
1607 fplibAbs(uint32_t op)
1608 {
1609  return op & ~((uint32_t)1 << 31);
1610 }
1611 
1612 template <>
1613 uint64_t
1614 fplibAbs(uint64_t op)
1615 {
1616  return op & ~((uint64_t)1 << 63);
1617 }
1618 
1619 template <>
1620 uint32_t
1621 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1622 {
1623  int flags = 0;
1624  uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1625  set_fpscr0(fpscr, flags);
1626  return result;
1627 }
1628 
1629 template <>
1630 uint64_t
1631 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1632 {
1633  int flags = 0;
1634  uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1635  set_fpscr0(fpscr, flags);
1636  return result;
1637 }
1638 
1639 template <>
1640 int
1641 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1642 {
1643  int mode = modeConv(fpscr);
1644  int flags = 0;
1645  int sgn1, exp1, sgn2, exp2, result;
1646  uint32_t mnt1, mnt2;
1647 
1648  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1649  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1650 
1651  if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1652  (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1653  result = 3;
1654  if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1655  (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1656  signal_nans)
1657  flags |= FPLIB_IOC;
1658  } else {
1659  if (op1 == op2 || (!mnt1 && !mnt2)) {
1660  result = 6;
1661  } else if (sgn1 != sgn2) {
1662  result = sgn1 ? 8 : 2;
1663  } else if (exp1 != exp2) {
1664  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1665  } else {
1666  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1667  }
1668  }
1669 
1670  set_fpscr0(fpscr, flags);
1671 
1672  return result;
1673 }
1674 
1675 template <>
1676 int
1677 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1678 {
1679  int mode = modeConv(fpscr);
1680  int flags = 0;
1681  int sgn1, exp1, sgn2, exp2, result;
1682  uint64_t mnt1, mnt2;
1683 
1684  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1685  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1686 
1687  if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1688  (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1689  result = 3;
1690  if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1691  (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1692  signal_nans)
1693  flags |= FPLIB_IOC;
1694  } else {
1695  if (op1 == op2 || (!mnt1 && !mnt2)) {
1696  result = 6;
1697  } else if (sgn1 != sgn2) {
1698  result = sgn1 ? 8 : 2;
1699  } else if (exp1 != exp2) {
1700  result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1701  } else {
1702  result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1703  }
1704  }
1705 
1706  set_fpscr0(fpscr, flags);
1707 
1708  return result;
1709 }
1710 
1711 static uint16_t
1713 {
1714  return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1715 }
1716 
1717 static uint16_t
1719 {
1720  return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1721 }
1722 
1723 static uint32_t
1725 {
1726  return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1727 }
1728 
1729 static uint32_t
1731 {
1732  return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1733 }
1734 
1735 static uint64_t
1737 {
1738  return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1739 }
1740 
1741 static uint64_t
1743 {
1744  return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1745 }
1746 
1747 static uint32_t
1749 {
1750  return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1751 }
1752 
1753 static uint64_t
1755 {
1756  return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1757 }
1758 
1759 static uint32_t
1761 {
1762  return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1763 }
1764 
1765 static uint64_t
1767 {
1768  return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1769 }
1770 
1771 static uint32_t
1772 fp32_FPTwo(int sgn)
1773 {
1774  return fp32_pack(sgn, 128, 0);
1775 }
1776 
1777 static uint64_t
1778 fp64_FPTwo(int sgn)
1779 {
1780  return fp64_pack(sgn, 1024, 0);
1781 }
1782 
1783 template <>
1784 uint16_t
1785 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1786 {
1787  int mode = modeConv(fpscr);
1788  int flags = 0;
1789  int sgn, exp;
1790  uint32_t mnt;
1791  uint16_t result;
1792 
1793  // Unpack floating-point operand optionally with flush-to-zero:
1794  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1795 
1796  bool alt_hp = fpscr.ahp;
1797 
1798  if (exp == 255 && (uint32_t)(mnt << 9)) {
1799  if (alt_hp) {
1800  result = fp16_zero(sgn);
1801  } else if (fpscr.dn) {
1802  result = fp16_defaultNaN();
1803  } else {
1804  result = fp16_FPConvertNaN_32(op);
1805  }
1806  if (!(mnt >> 22 & 1) || alt_hp) {
1807  flags |= FPLIB_IOC;
1808  }
1809  } else if (exp == 255) {
1810  if (alt_hp) {
1811  result = sgn << 15 | (uint16_t)0x7fff;
1812  flags |= FPLIB_IOC;
1813  } else {
1814  result = fp16_infinity(sgn);
1815  }
1816  } else if (!mnt) {
1817  result = fp16_zero(sgn);
1818  } else {
1819  result = fp16_round_(sgn, exp - 127 + 15,
1820  mnt >> 7 | !!(uint32_t)(mnt << 25),
1821  rounding, mode | alt_hp << 4, &flags);
1822  }
1823 
1824  set_fpscr0(fpscr, flags);
1825 
1826  return result;
1827 }
1828 
1829 template <>
1830 uint16_t
1831 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1832 {
1833  int mode = modeConv(fpscr);
1834  int flags = 0;
1835  int sgn, exp;
1836  uint64_t mnt;
1837  uint16_t result;
1838 
1839  // Unpack floating-point operand optionally with flush-to-zero:
1840  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1841 
1842  bool alt_hp = fpscr.ahp;
1843 
1844  if (exp == 2047 && (uint64_t)(mnt << 12)) {
1845  if (alt_hp) {
1846  result = fp16_zero(sgn);
1847  } else if (fpscr.dn) {
1848  result = fp16_defaultNaN();
1849  } else {
1850  result = fp16_FPConvertNaN_64(op);
1851  }
1852  if (!(mnt >> 51 & 1) || alt_hp) {
1853  flags |= FPLIB_IOC;
1854  }
1855  } else if (exp == 2047) {
1856  if (alt_hp) {
1857  result = sgn << 15 | (uint16_t)0x7fff;
1858  flags |= FPLIB_IOC;
1859  } else {
1860  result = fp16_infinity(sgn);
1861  }
1862  } else if (!mnt) {
1863  result = fp16_zero(sgn);
1864  } else {
1865  result = fp16_round_(sgn, exp - 1023 + 15,
1866  mnt >> 36 | !!(uint64_t)(mnt << 28),
1867  rounding, mode | alt_hp << 4, &flags);
1868  }
1869 
1870  set_fpscr0(fpscr, flags);
1871 
1872  return result;
1873 }
1874 
1875 template <>
1876 uint32_t
1877 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1878 {
1879  int mode = modeConv(fpscr);
1880  int flags = 0;
1881  int sgn, exp;
1882  uint16_t mnt;
1883  uint32_t result;
1884 
1885  // Unpack floating-point operand optionally with flush-to-zero:
1886  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1887 
1888  if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1889  if (fpscr.dn) {
1890  result = fp32_defaultNaN();
1891  } else {
1892  result = fp32_FPConvertNaN_16(op);
1893  }
1894  if (!(mnt >> 9 & 1)) {
1895  flags |= FPLIB_IOC;
1896  }
1897  } else if (exp == 31 && !fpscr.ahp) {
1898  result = fp32_infinity(sgn);
1899  } else if (!mnt) {
1900  result = fp32_zero(sgn);
1901  } else {
1902  mnt = fp16_normalise(mnt, &exp);
1903  result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1904  }
1905 
1906  set_fpscr0(fpscr, flags);
1907 
1908  return result;
1909 }
1910 
1911 template <>
1912 uint32_t
1913 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1914 {
1915  int mode = modeConv(fpscr);
1916  int flags = 0;
1917  int sgn, exp;
1918  uint64_t mnt;
1919  uint32_t result;
1920 
1921  // Unpack floating-point operand optionally with flush-to-zero:
1922  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1923 
1924  if (exp == 2047 && (uint64_t)(mnt << 12)) {
1925  if (fpscr.dn) {
1926  result = fp32_defaultNaN();
1927  } else {
1928  result = fp32_FPConvertNaN_64(op);
1929  }
1930  if (!(mnt >> 51 & 1)) {
1931  flags |= FPLIB_IOC;
1932  }
1933  } else if (exp == 2047) {
1934  result = fp32_infinity(sgn);
1935  } else if (!mnt) {
1936  result = fp32_zero(sgn);
1937  } else {
1938  result = fp32_round_(sgn, exp - 1023 + 127,
1939  mnt >> 20 | !!(uint64_t)(mnt << 44),
1940  rounding, mode, &flags);
1941  }
1942 
1943  set_fpscr0(fpscr, flags);
1944 
1945  return result;
1946 }
1947 
1948 template <>
1949 uint64_t
1950 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1951 {
1952  int mode = modeConv(fpscr);
1953  int flags = 0;
1954  int sgn, exp;
1955  uint16_t mnt;
1956  uint64_t result;
1957 
1958  // Unpack floating-point operand optionally with flush-to-zero:
1959  fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1960 
1961  if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1962  if (fpscr.dn) {
1963  result = fp64_defaultNaN();
1964  } else {
1965  result = fp64_FPConvertNaN_16(op);
1966  }
1967  if (!(mnt >> 9 & 1)) {
1968  flags |= FPLIB_IOC;
1969  }
1970  } else if (exp == 31 && !fpscr.ahp) {
1971  result = fp64_infinity(sgn);
1972  } else if (!mnt) {
1973  result = fp64_zero(sgn);
1974  } else {
1975  mnt = fp16_normalise(mnt, &exp);
1976  result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1977  }
1978 
1979  set_fpscr0(fpscr, flags);
1980 
1981  return result;
1982 }
1983 
1984 template <>
1985 uint64_t
1986 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1987 {
1988  int mode = modeConv(fpscr);
1989  int flags = 0;
1990  int sgn, exp;
1991  uint32_t mnt;
1992  uint64_t result;
1993 
1994  // Unpack floating-point operand optionally with flush-to-zero:
1995  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1996 
1997  if (exp == 255 && (uint32_t)(mnt << 9)) {
1998  if (fpscr.dn) {
1999  result = fp64_defaultNaN();
2000  } else {
2001  result = fp64_FPConvertNaN_32(op);
2002  }
2003  if (!(mnt >> 22 & 1)) {
2004  flags |= FPLIB_IOC;
2005  }
2006  } else if (exp == 255) {
2007  result = fp64_infinity(sgn);
2008  } else if (!mnt) {
2009  result = fp64_zero(sgn);
2010  } else {
2011  mnt = fp32_normalise(mnt, &exp);
2012  result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2013  }
2014 
2015  set_fpscr0(fpscr, flags);
2016 
2017  return result;
2018 }
2019 
2020 template <>
2021 uint32_t
2022 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2023 {
2024  int flags = 0;
2025  uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2026  set_fpscr0(fpscr, flags);
2027  return result;
2028 }
2029 
2030 template <>
2031 uint64_t
2032 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2033 {
2034  int flags = 0;
2035  uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2036  set_fpscr0(fpscr, flags);
2037  return result;
2038 }
2039 
2040 template <>
2041 uint32_t
2042 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2043 {
2044  int flags = 0;
2045  uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2046  set_fpscr0(fpscr, flags);
2047  return result;
2048 }
2049 
2050 template <>
2051 uint64_t
2052 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2053 {
2054  int flags = 0;
2055  uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2056  set_fpscr0(fpscr, flags);
2057  return result;
2058 }
2059 
2060 static uint32_t
2061 fp32_repack(int sgn, int exp, uint32_t mnt)
2062 {
2063  return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2064 }
2065 
2066 static uint64_t
2067 fp64_repack(int sgn, int exp, uint64_t mnt)
2068 {
2069  return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2070 }
2071 
2072 static void
2073 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2074 {
2075  // Treat a single quiet-NaN as +Infinity/-Infinity
2076  if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2077  *op1 = fp32_infinity(sgn);
2078  if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2079  *op2 = fp32_infinity(sgn);
2080 }
2081 
2082 static void
2083 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2084 {
2085  // Treat a single quiet-NaN as +Infinity/-Infinity
2086  if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2087  *op1 = fp64_infinity(sgn);
2088  if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2089  *op2 = fp64_infinity(sgn);
2090 }
2091 
2092 template <>
2093 uint32_t
2094 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2095 {
2096  int mode = modeConv(fpscr);
2097  int flags = 0;
2098  int sgn1, exp1, sgn2, exp2;
2099  uint32_t mnt1, mnt2, x, result;
2100 
2101  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2102  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2103 
2104  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2105  result = x;
2106  } else {
2107  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2108  fp32_repack(sgn1, exp1, mnt1) :
2109  fp32_repack(sgn2, exp2, mnt2));
2110  }
2111  set_fpscr0(fpscr, flags);
2112  return result;
2113 }
2114 
2115 template <>
2116 uint64_t
2117 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2118 {
2119  int mode = modeConv(fpscr);
2120  int flags = 0;
2121  int sgn1, exp1, sgn2, exp2;
2122  uint64_t mnt1, mnt2, x, result;
2123 
2124  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2125  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2126 
2127  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2128  result = x;
2129  } else {
2130  result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2131  fp64_repack(sgn1, exp1, mnt1) :
2132  fp64_repack(sgn2, exp2, mnt2));
2133  }
2134  set_fpscr0(fpscr, flags);
2135  return result;
2136 }
2137 
2138 template <>
2139 uint32_t
2140 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2141 {
2142  fp32_minmaxnum(&op1, &op2, 1);
2143  return fplibMax<uint32_t>(op1, op2, fpscr);
2144 }
2145 
2146 template <>
2147 uint64_t
2148 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2149 {
2150  fp64_minmaxnum(&op1, &op2, 1);
2151  return fplibMax<uint64_t>(op1, op2, fpscr);
2152 }
2153 
2154 template <>
2155 uint32_t
2156 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2157 {
2158  int mode = modeConv(fpscr);
2159  int flags = 0;
2160  int sgn1, exp1, sgn2, exp2;
2161  uint32_t mnt1, mnt2, x, result;
2162 
2163  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2164  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2165 
2166  if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2167  result = x;
2168  } else {
2169  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2170  fp32_repack(sgn1, exp1, mnt1) :
2171  fp32_repack(sgn2, exp2, mnt2));
2172  }
2173  set_fpscr0(fpscr, flags);
2174  return result;
2175 }
2176 
2177 template <>
2178 uint64_t
2179 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2180 {
2181  int mode = modeConv(fpscr);
2182  int flags = 0;
2183  int sgn1, exp1, sgn2, exp2;
2184  uint64_t mnt1, mnt2, x, result;
2185 
2186  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2187  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2188 
2189  if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2190  result = x;
2191  } else {
2192  result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2193  fp64_repack(sgn1, exp1, mnt1) :
2194  fp64_repack(sgn2, exp2, mnt2));
2195  }
2196  set_fpscr0(fpscr, flags);
2197  return result;
2198 }
2199 
2200 template <>
2201 uint32_t
2202 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2203 {
2204  fp32_minmaxnum(&op1, &op2, 0);
2205  return fplibMin<uint32_t>(op1, op2, fpscr);
2206 }
2207 
2208 template <>
2209 uint64_t
2210 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2211 {
2212  fp64_minmaxnum(&op1, &op2, 0);
2213  return fplibMin<uint64_t>(op1, op2, fpscr);
2214 }
2215 
2216 template <>
2217 uint32_t
2218 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2219 {
2220  int flags = 0;
2221  uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2222  set_fpscr0(fpscr, flags);
2223  return result;
2224 }
2225 
2226 template <>
2227 uint64_t
2228 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2229 {
2230  int flags = 0;
2231  uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2232  set_fpscr0(fpscr, flags);
2233  return result;
2234 }
2235 
2236 template <>
2237 uint32_t
2238 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2239 {
2240  int mode = modeConv(fpscr);
2241  int flags = 0;
2242  int sgn1, exp1, sgn2, exp2;
2243  uint32_t mnt1, mnt2, result;
2244 
2245  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2246  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2247 
2248  result = fp32_process_NaNs(op1, op2, mode, &flags);
2249  if (!result) {
2250  if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2251  result = fp32_FPTwo(sgn1 ^ sgn2);
2252  } else if (exp1 == 255 || exp2 == 255) {
2253  result = fp32_infinity(sgn1 ^ sgn2);
2254  } else if (!mnt1 || !mnt2) {
2255  result = fp32_zero(sgn1 ^ sgn2);
2256  } else {
2257  result = fp32_mul(op1, op2, mode, &flags);
2258  }
2259  }
2260 
2261  set_fpscr0(fpscr, flags);
2262 
2263  return result;
2264 }
2265 
2266 template <>
2267 uint64_t
2268 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2269 {
2270  int mode = modeConv(fpscr);
2271  int flags = 0;
2272  int sgn1, exp1, sgn2, exp2;
2273  uint64_t mnt1, mnt2, result;
2274 
2275  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2276  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2277 
2278  result = fp64_process_NaNs(op1, op2, mode, &flags);
2279  if (!result) {
2280  if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2281  result = fp64_FPTwo(sgn1 ^ sgn2);
2282  } else if (exp1 == 2047 || exp2 == 2047) {
2283  result = fp64_infinity(sgn1 ^ sgn2);
2284  } else if (!mnt1 || !mnt2) {
2285  result = fp64_zero(sgn1 ^ sgn2);
2286  } else {
2287  result = fp64_mul(op1, op2, mode, &flags);
2288  }
2289  }
2290 
2291  set_fpscr0(fpscr, flags);
2292 
2293  return result;
2294 }
2295 
2296 template <>
2297 uint32_t
2298 fplibNeg(uint32_t op)
2299 {
2300  return op ^ (uint32_t)1 << 31;
2301 }
2302 
2303 template <>
2304 uint64_t
2305 fplibNeg(uint64_t op)
2306 {
2307  return op ^ (uint64_t)1 << 63;
2308 }
2309 
2310 static const uint8_t recip_sqrt_estimate[256] = {
2311  255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2312  226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2313  201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2314  180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2315  162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2316  145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2317  131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2318  118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2319  105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2320  85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2321  67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2322  52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2323  39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2324  28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2325  17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2326  8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2327 };
2328 
2329 template <>
2330 uint32_t
2331 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2332 {
2333  int mode = modeConv(fpscr);
2334  int flags = 0;
2335  int sgn, exp;
2336  uint32_t mnt, result;
2337 
2338  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2339 
2340  if (exp == 255 && (uint32_t)(mnt << 9)) {
2341  result = fp32_process_NaN(op, mode, &flags);
2342  } else if (!mnt) {
2343  result = fp32_infinity(sgn);
2344  flags |= FPLIB_DZC;
2345  } else if (sgn) {
2346  result = fp32_defaultNaN();
2347  flags |= FPLIB_IOC;
2348  } else if (exp == 255) {
2349  result = fp32_zero(0);
2350  } else {
2351  exp += 8;
2352  mnt = fp32_normalise(mnt, &exp);
2353  mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2354  result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2355  }
2356 
2357  set_fpscr0(fpscr, flags);
2358 
2359  return result;
2360 }
2361 
2362 template <>
2363 uint64_t
2364 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2365 {
2366  int mode = modeConv(fpscr);
2367  int flags = 0;
2368  int sgn, exp;
2369  uint64_t mnt, result;
2370 
2371  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2372 
2373  if (exp == 2047 && (uint64_t)(mnt << 12)) {
2374  result = fp64_process_NaN(op, mode, &flags);
2375  } else if (!mnt) {
2376  result = fp64_infinity(sgn);
2377  flags |= FPLIB_DZC;
2378  } else if (sgn) {
2379  result = fp64_defaultNaN();
2380  flags |= FPLIB_IOC;
2381  } else if (exp == 2047) {
2382  result = fp32_zero(0);
2383  } else {
2384  exp += 11;
2385  mnt = fp64_normalise(mnt, &exp);
2386  mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2387  result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2388  }
2389 
2390  set_fpscr0(fpscr, flags);
2391 
2392  return result;
2393 }
2394 
2395 template <>
2396 uint32_t
2397 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2398 {
2399  int mode = modeConv(fpscr);
2400  int flags = 0;
2401  int sgn1, exp1, sgn2, exp2;
2402  uint32_t mnt1, mnt2, result;
2403 
2404  op1 = fplibNeg<uint32_t>(op1);
2405  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2406  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2407 
2408  result = fp32_process_NaNs(op1, op2, mode, &flags);
2409  if (!result) {
2410  if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2411  result = fp32_FPOnePointFive(0);
2412  } else if (exp1 == 255 || exp2 == 255) {
2413  result = fp32_infinity(sgn1 ^ sgn2);
2414  } else {
2415  result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2416  }
2417  }
2418 
2419  set_fpscr0(fpscr, flags);
2420 
2421  return result;
2422 }
2423 
2424 template <>
2425 uint64_t
2426 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2427 {
2428  int mode = modeConv(fpscr);
2429  int flags = 0;
2430  int sgn1, exp1, sgn2, exp2;
2431  uint64_t mnt1, mnt2, result;
2432 
2433  op1 = fplibNeg<uint64_t>(op1);
2434  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2435  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2436 
2437  result = fp64_process_NaNs(op1, op2, mode, &flags);
2438  if (!result) {
2439  if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2440  result = fp64_FPOnePointFive(0);
2441  } else if (exp1 == 2047 || exp2 == 2047) {
2442  result = fp64_infinity(sgn1 ^ sgn2);
2443  } else {
2444  result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2445  }
2446  }
2447 
2448  set_fpscr0(fpscr, flags);
2449 
2450  return result;
2451 }
2452 
2453 template <>
2454 uint32_t
2455 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2456 {
2457  int mode = modeConv(fpscr);
2458  int flags = 0;
2459  int sgn1, exp1, sgn2, exp2;
2460  uint32_t mnt1, mnt2, result;
2461 
2462  op1 = fplibNeg<uint32_t>(op1);
2463  fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2464  fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2465 
2466  result = fp32_process_NaNs(op1, op2, mode, &flags);
2467  if (!result) {
2468  if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2469  result = fp32_FPTwo(0);
2470  } else if (exp1 == 255 || exp2 == 255) {
2471  result = fp32_infinity(sgn1 ^ sgn2);
2472  } else {
2473  result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2474  }
2475  }
2476 
2477  set_fpscr0(fpscr, flags);
2478 
2479  return result;
2480 }
2481 
2482 template <>
2483 uint32_t
2484 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2485 {
2486  int mode = modeConv(fpscr);
2487  int flags = 0;
2488  int sgn, exp;
2489  uint32_t mnt, result;
2490 
2491  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2492 
2493  if (exp == 255 && (uint32_t)(mnt << 9)) {
2494  result = fp32_process_NaN(op, mode, &flags);
2495  } else if (exp == 255) {
2496  result = fp32_zero(sgn);
2497  } else if (!mnt) {
2498  result = fp32_infinity(sgn);
2499  flags |= FPLIB_DZC;
2500  } else if (!((uint32_t)(op << 1) >> 22)) {
2501  bool overflow_to_inf = false;
2502  switch (FPCRRounding(fpscr)) {
2503  case FPRounding_TIEEVEN:
2504  overflow_to_inf = true;
2505  break;
2506  case FPRounding_POSINF:
2507  overflow_to_inf = !sgn;
2508  break;
2509  case FPRounding_NEGINF:
2510  overflow_to_inf = sgn;
2511  break;
2512  case FPRounding_ZERO:
2513  overflow_to_inf = false;
2514  break;
2515  default:
2516  assert(0);
2517  }
2518  result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2519  flags |= FPLIB_OFC | FPLIB_IXC;
2520  } else if (fpscr.fz && exp >= 253) {
2521  result = fp32_zero(sgn);
2522  flags |= FPLIB_UFC;
2523  } else {
2524  exp += 8;
2525  mnt = fp32_normalise(mnt, &exp);
2526  int result_exp = 253 - exp;
2527  uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2528  fraction <<= 15;
2529  if (result_exp == 0) {
2530  fraction >>= 1;
2531  } else if (result_exp == -1) {
2532  fraction >>= 2;
2533  result_exp = 0;
2534  }
2535  result = fp32_pack(sgn, result_exp, fraction);
2536  }
2537 
2538  set_fpscr0(fpscr, flags);
2539 
2540  return result;
2541 }
2542 
2543 template <>
2544 uint64_t
2545 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2546 {
2547  int mode = modeConv(fpscr);
2548  int flags = 0;
2549  int sgn, exp;
2550  uint64_t mnt, result;
2551 
2552  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2553 
2554  if (exp == 2047 && (uint64_t)(mnt << 12)) {
2555  result = fp64_process_NaN(op, mode, &flags);
2556  } else if (exp == 2047) {
2557  result = fp64_zero(sgn);
2558  } else if (!mnt) {
2559  result = fp64_infinity(sgn);
2560  flags |= FPLIB_DZC;
2561  } else if (!((uint64_t)(op << 1) >> 51)) {
2562  bool overflow_to_inf = false;
2563  switch (FPCRRounding(fpscr)) {
2564  case FPRounding_TIEEVEN:
2565  overflow_to_inf = true;
2566  break;
2567  case FPRounding_POSINF:
2568  overflow_to_inf = !sgn;
2569  break;
2570  case FPRounding_NEGINF:
2571  overflow_to_inf = sgn;
2572  break;
2573  case FPRounding_ZERO:
2574  overflow_to_inf = false;
2575  break;
2576  default:
2577  assert(0);
2578  }
2579  result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2580  flags |= FPLIB_OFC | FPLIB_IXC;
2581  } else if (fpscr.fz && exp >= 2045) {
2582  result = fp64_zero(sgn);
2583  flags |= FPLIB_UFC;
2584  } else {
2585  exp += 11;
2586  mnt = fp64_normalise(mnt, &exp);
2587  int result_exp = 2045 - exp;
2588  uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2589  fraction <<= 44;
2590  if (result_exp == 0) {
2591  fraction >>= 1;
2592  } else if (result_exp == -1) {
2593  fraction >>= 2;
2594  result_exp = 0;
2595  }
2596  result = fp64_pack(sgn, result_exp, fraction);
2597  }
2598 
2599  set_fpscr0(fpscr, flags);
2600 
2601  return result;
2602 }
2603 
2604 template <>
2605 uint64_t
2606 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2607 {
2608  int mode = modeConv(fpscr);
2609  int flags = 0;
2610  int sgn1, exp1, sgn2, exp2;
2611  uint64_t mnt1, mnt2, result;
2612 
2613  op1 = fplibNeg<uint64_t>(op1);
2614  fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2615  fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2616 
2617  result = fp64_process_NaNs(op1, op2, mode, &flags);
2618  if (!result) {
2619  if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2620  result = fp64_FPTwo(0);
2621  } else if (exp1 == 2047 || exp2 == 2047) {
2622  result = fp64_infinity(sgn1 ^ sgn2);
2623  } else {
2624  result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2625  }
2626  }
2627 
2628  set_fpscr0(fpscr, flags);
2629 
2630  return result;
2631 }
2632 
2633 template <>
2634 uint32_t
2635 fplibRecpX(uint32_t op, FPSCR &fpscr)
2636 {
2637  int mode = modeConv(fpscr);
2638  int flags = 0;
2639  int sgn, exp;
2640  uint32_t mnt, result;
2641 
2642  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2643 
2644  if (exp == 255 && (uint32_t)(mnt << 9)) {
2645  result = fp32_process_NaN(op, mode, &flags);
2646  }
2647  else {
2648  if (!mnt) { // Zero and denormals
2649  result = fp32_pack(sgn, 254, 0);
2650  } else { // Infinities and normals
2651  result = fp32_pack(sgn, exp ^ 255, 0);
2652  }
2653  }
2654 
2655  set_fpscr0(fpscr, flags);
2656 
2657  return result;
2658 }
2659 
2660 template <>
2661 uint64_t
2662 fplibRecpX(uint64_t op, FPSCR &fpscr)
2663 {
2664  int mode = modeConv(fpscr);
2665  int flags = 0;
2666  int sgn, exp;
2667  uint64_t mnt, result;
2668 
2669  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2670 
2671  if (exp == 2047 && (uint64_t)(mnt << 12)) {
2672  result = fp64_process_NaN(op, mode, &flags);
2673  }
2674  else {
2675  if (!mnt) { // Zero and denormals
2676  result = fp64_pack(sgn, 2046, 0);
2677  } else { // Infinities and normals
2678  result = fp64_pack(sgn, exp ^ 2047, 0);
2679  }
2680  }
2681 
2682  set_fpscr0(fpscr, flags);
2683 
2684  return result;
2685 }
2686 
2687 template <>
2688 uint32_t
2689 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2690 {
2691  int mode = modeConv(fpscr);
2692  int flags = 0;
2693  int sgn, exp;
2694  uint32_t mnt, result;
2695 
2696  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2697  fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2698 
2699  // Handle NaNs, infinities and zeroes:
2700  if (exp == 255 && (uint32_t)(mnt << 9)) {
2701  result = fp32_process_NaN(op, mode, &flags);
2702  } else if (exp == 255) {
2703  result = fp32_infinity(sgn);
2704  } else if (!mnt) {
2705  result = fp32_zero(sgn);
2706  } else if (exp >= 150) {
2707  // There are no fractional bits
2708  result = op;
2709  } else {
2710  // Truncate towards zero:
2711  uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2712  int err = exp < 118 ? 1 :
2713  (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2714  switch (rounding) {
2715  case FPRounding_TIEEVEN:
2716  x += (err == 3 || (err == 2 && (x & 1)));
2717  break;
2718  case FPRounding_POSINF:
2719  x += err && !sgn;
2720  break;
2721  case FPRounding_NEGINF:
2722  x += err && sgn;
2723  break;
2724  case FPRounding_ZERO:
2725  break;
2726  case FPRounding_TIEAWAY:
2727  x += err >> 1;
2728  break;
2729  default:
2730  assert(0);
2731  }
2732 
2733  if (x == 0) {
2734  result = fp32_zero(sgn);
2735  } else {
2736  exp = 150;
2737  mnt = fp32_normalise(x, &exp);
2738  result = fp32_pack(sgn, exp + 8, mnt >> 8);
2739  }
2740 
2741  if (err && exact)
2742  flags |= FPLIB_IXC;
2743  }
2744 
2745  set_fpscr0(fpscr, flags);
2746 
2747  return result;
2748 }
2749 
2750 template <>
2751 uint64_t
2752 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2753 {
2754  int mode = modeConv(fpscr);
2755  int flags = 0;
2756  int sgn, exp;
2757  uint64_t mnt, result;
2758 
2759  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2760  fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2761 
2762  // Handle NaNs, infinities and zeroes:
2763  if (exp == 2047 && (uint64_t)(mnt << 12)) {
2764  result = fp64_process_NaN(op, mode, &flags);
2765  } else if (exp == 2047) {
2766  result = fp64_infinity(sgn);
2767  } else if (!mnt) {
2768  result = fp64_zero(sgn);
2769  } else if (exp >= 1075) {
2770  // There are no fractional bits
2771  result = op;
2772  } else {
2773  // Truncate towards zero:
2774  uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2775  int err = exp < 1011 ? 1 :
2776  (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2777  switch (rounding) {
2778  case FPRounding_TIEEVEN:
2779  x += (err == 3 || (err == 2 && (x & 1)));
2780  break;
2781  case FPRounding_POSINF:
2782  x += err && !sgn;
2783  break;
2784  case FPRounding_NEGINF:
2785  x += err && sgn;
2786  break;
2787  case FPRounding_ZERO:
2788  break;
2789  case FPRounding_TIEAWAY:
2790  x += err >> 1;
2791  break;
2792  default:
2793  assert(0);
2794  }
2795 
2796  if (x == 0) {
2797  result = fp64_zero(sgn);
2798  } else {
2799  exp = 1075;
2800  mnt = fp64_normalise(x, &exp);
2801  result = fp64_pack(sgn, exp + 11, mnt >> 11);
2802  }
2803 
2804  if (err && exact)
2805  flags |= FPLIB_IXC;
2806  }
2807 
2808  set_fpscr0(fpscr, flags);
2809 
2810  return result;
2811 }
2812 
2813 template <>
2814 uint32_t
2815 fplibSqrt(uint32_t op, FPSCR &fpscr)
2816 {
2817  int flags = 0;
2818  uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2819  set_fpscr0(fpscr, flags);
2820  return result;
2821 }
2822 
2823 template <>
2824 uint64_t
2825 fplibSqrt(uint64_t op, FPSCR &fpscr)
2826 {
2827  int flags = 0;
2828  uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2829  set_fpscr0(fpscr, flags);
2830  return result;
2831 }
2832 
2833 template <>
2834 uint32_t
2835 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2836 {
2837  int flags = 0;
2838  uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2839  set_fpscr0(fpscr, flags);
2840  return result;
2841 }
2842 
2843 template <>
2844 uint64_t
2845 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2846 {
2847  int flags = 0;
2848  uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2849  set_fpscr0(fpscr, flags);
2850  return result;
2851 }
2852 
2853 static uint64_t
2854 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2855  int *flags)
2856 {
2857  uint64_t x;
2858  int err;
2859 
2860  if (exp > 1023 + 63) {
2861  *flags = FPLIB_IOC;
2862  return ((uint64_t)!u << 63) - !sgn;
2863  }
2864 
2865  x = lsr64(mnt << 11, 1023 + 63 - exp);
2866  err = (exp > 1023 + 63 - 2 ? 0 :
2867  (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2868  !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2869 
2870  switch (rounding) {
2871  case FPRounding_TIEEVEN:
2872  x += (err == 3 || (err == 2 && (x & 1)));
2873  break;
2874  case FPRounding_POSINF:
2875  x += err && !sgn;
2876  break;
2877  case FPRounding_NEGINF:
2878  x += err && sgn;
2879  break;
2880  case FPRounding_ZERO:
2881  break;
2882  case FPRounding_TIEAWAY:
2883  x += err >> 1;
2884  break;
2885  default:
2886  assert(0);
2887  }
2888 
2889  if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2890  *flags = FPLIB_IOC;
2891  return ((uint64_t)!u << 63) - !sgn;
2892  }
2893 
2894  if (err) {
2895  *flags = FPLIB_IXC;
2896  }
2897 
2898  return sgn ? -x : x;
2899 }
2900 
2901 static uint32_t
2902 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2903  int *flags)
2904 {
2905  uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2906  if (u ? x >= (uint64_t)1 << 32 :
2907  !(x < (uint64_t)1 << 31 ||
2908  (uint64_t)-x <= (uint64_t)1 << 31)) {
2909  *flags = FPLIB_IOC;
2910  x = ((uint32_t)!u << 31) - !sgn;
2911  }
2912  return x;
2913 }
2914 
2915 template <>
2916 uint32_t
2917 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2918 {
2919  int flags = 0;
2920  int sgn, exp;
2921  uint32_t mnt, result;
2922 
2923  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2924  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2925 
2926  // If NaN, set cumulative flag or take exception:
2927  if (exp == 255 && (uint32_t)(mnt << 9)) {
2928  flags = FPLIB_IOC;
2929  result = 0;
2930  } else {
2931  result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2932  (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2933  }
2934 
2935  set_fpscr0(fpscr, flags);
2936 
2937  return result;
2938 }
2939 
2940 template <>
2941 uint32_t
2942 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2943 {
2944  int flags = 0;
2945  int sgn, exp;
2946  uint64_t mnt;
2947  uint32_t result;
2948 
2949  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2950  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2951 
2952  // If NaN, set cumulative flag or take exception:
2953  if (exp == 2047 && (uint64_t)(mnt << 12)) {
2954  flags = FPLIB_IOC;
2955  result = 0;
2956  } else {
2957  result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2958  }
2959 
2960  set_fpscr0(fpscr, flags);
2961 
2962  return result;
2963 }
2964 
2965 template <>
2966 uint64_t
2967 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2968 {
2969  int flags = 0;
2970  int sgn, exp;
2971  uint32_t mnt;
2972  uint64_t result;
2973 
2974  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2975  fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2976 
2977  // If NaN, set cumulative flag or take exception:
2978  if (exp == 255 && (uint32_t)(mnt << 9)) {
2979  flags = FPLIB_IOC;
2980  result = 0;
2981  } else {
2982  result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2983  (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2984  }
2985 
2986  set_fpscr0(fpscr, flags);
2987 
2988  return result;
2989 }
2990 
2991 template <>
2992 uint64_t
2993 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2994 {
2995  int flags = 0;
2996  int sgn, exp;
2997  uint64_t mnt, result;
2998 
2999  // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3000  fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3001 
3002  // If NaN, set cumulative flag or take exception:
3003  if (exp == 2047 && (uint64_t)(mnt << 12)) {
3004  flags = FPLIB_IOC;
3005  result = 0;
3006  } else {
3007  result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3008  }
3009 
3010  set_fpscr0(fpscr, flags);
3011 
3012  return result;
3013 }
3014 
3015 static uint32_t
3016 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3017 {
3018  int x_sgn = !u && a >> 63;
3019  int x_exp = 190 - fbits;
3020  uint64_t x_mnt = x_sgn ? -a : a;
3021 
3022  // Handle zero:
3023  if (!x_mnt) {
3024  return fp32_zero(0);
3025  }
3026 
3027  // Normalise and convert to 32 bits, collapsing error into bottom bit:
3028  x_mnt = fp64_normalise(x_mnt, &x_exp);
3029  x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3030 
3031  return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3032 }
3033 
3034 static uint64_t
3035 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3036 {
3037  int x_sgn = !u && a >> 63;
3038  int x_exp = 1024 + 62 - fbits;
3039  uint64_t x_mnt = x_sgn ? -a : a;
3040 
3041  // Handle zero:
3042  if (!x_mnt) {
3043  return fp64_zero(0);
3044  }
3045 
3046  x_mnt = fp64_normalise(x_mnt, &x_exp);
3047 
3048  return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3049 }
3050 
3051 template <>
3052 uint32_t
3053 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3054 {
3055  int flags = 0;
3056  uint32_t res = fp32_cvtf(op, fbits, u,
3057  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3058  &flags);
3059  set_fpscr0(fpscr, flags);
3060  return res;
3061 }
3062 
3063 template <>
3064 uint64_t
3065 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3066 {
3067  int flags = 0;
3068  uint64_t res = fp64_cvtf(op, fbits, u,
3069  (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3070  &flags);
3071  set_fpscr0(fpscr, flags);
3072  return res;
3073 }
3074 
3075 }
static uint32_t lsr32(uint32_t x, uint32_t shift)
Definition: fplib.cc:84
static uint32_t fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
Definition: fplib.cc:904
static int fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:879
uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:2917
static uint16_t lsl16(uint16_t x, uint32_t shift)
Definition: fplib.cc:66
#define FPLIB_FZ
Definition: fplib.cc:54
static int cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:181
uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2094
static uint32_t fp32_normalise(uint32_t mnt, int *exp)
Definition: fplib.cc:205
static void set_fpscr0(FPSCR &fpscr, int flags)
Definition: fplib.cc:1363
#define FPLIB_IXC
Definition: fplib.cc:59
uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr)
Definition: fplib.cc:2635
Bitfield< 20, 18 > a0
Definition: mt_constants.hh:79
static uint64_t fp64_normalise(uint64_t mnt, int *exp)
Definition: fplib.cc:223
static uint32_t fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:437
static void add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:165
Bitfield< 8 > a
Definition: miscregs.hh:1377
uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
Definition: fplib.cc:2689
static void lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:102
static void mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
Definition: fplib.cc:138
uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2835
static void fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, int *flags)
Definition: fplib.cc:377
static uint64_t fp64_process_NaN(uint64_t a, int mode, int *flags)
Definition: fplib.cc:427
#define FPLIB_AHP
Definition: fplib.cc:56
Floating-point library code, which will gradually replace vfp.hh.
static uint64_t fp64_FPThree(int sgn)
Definition: fplib.cc:1766
static int fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:854
static uint32_t fp32_zero(int sgn)
Definition: fplib.cc:294
Bitfield< 4, 0 > mode
Definition: miscregs.hh:1385
static int fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:766
Bitfield< 1 > b1
Definition: misc.hh:648
Bitfield< 0 > t0
Definition: miscregs.hh:1443
FPRounding
Definition: fplib.hh:59
static uint32_t fp32_defaultNaN()
Definition: fplib.cc:348
static uint64_t fp64_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:1736
static uint16_t fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
Definition: fplib.cc:270
static uint32_t fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
Definition: fplib.cc:684
uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2140
static uint32_t fp32_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:1730
static uint32_t fp32_FPOnePointFive(int sgn)
Definition: fplib.cc:1748
static uint16_t fp16_zero(int sgn)
Definition: fplib.cc:288
Bitfield< 3, 0 > rm
Definition: types.hh:123
int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
Definition: fplib.cc:1641
Bitfield< 7 > b
Definition: miscregs.hh:1564
static void fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
Definition: fplib.cc:2083
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
Definition: fplib.cc:1785
static uint32_t fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
Definition: fplib.cc:483
static FPRounding FPCRRounding(FPSCR &fpscr)
Definition: fplib.hh:69
static const uint8_t recip_sqrt_estimate[256]
Definition: fplib.cc:2310
uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
Definition: fplib.cc:2484
static uint64_t fp64_defaultNaN()
Definition: fplib.cc:354
Bitfield< 22 > u
Definition: miscregs.hh:1537
uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2042
static uint32_t fp32_process_NaN(uint32_t a, int mode, int *flags)
Definition: fplib.cc:417
bool fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
Definition: fplib.cc:1557
static uint16_t fp16_max_normal(int sgn)
Definition: fplib.cc:306
uint32_t fplibNeg(uint32_t op)
Definition: fplib.cc:2298
static uint32_t fp32_sqrt(uint32_t a, int mode, int *flags)
Definition: fplib.cc:1386
uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr)
Definition: fplib.cc:2815
Bitfield< 6, 5 > shift
Definition: types.hh:122
static uint64_t fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
Definition: fplib.cc:963
static int modeConv(FPSCR fpscr)
Definition: fplib.cc:1514
static void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
Definition: fplib.cc:156
static uint32_t fp32_FPTwo(int sgn)
Definition: fplib.cc:1772
static void fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
Definition: fplib.cc:2073
#define FPLIB_OFC
Definition: fplib.cc:61
Bitfield< 3 > r0
static uint32_t fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:614
static uint32_t fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1094
static uint32_t fp32_repack(int sgn, int exp, uint32_t mnt)
Definition: fplib.cc:2061
static uint64_t fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:460
static void sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
Definition: fplib.cc:173
static uint64_t fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, int mode, int *flags)
Definition: fplib.cc:1178
static uint32_t fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:3016
static uint32_t FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:2902
uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2022
#define FPLIB_DZC
Definition: fplib.cc:62
static uint16_t fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:541
static uint64_t fp64_repack(int sgn, int exp, uint64_t mnt)
Definition: fplib.cc:2067
static uint16_t fp16_normalise(uint16_t mnt, int *exp)
Definition: fplib.cc:187
static uint64_t fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
Definition: fplib.cc:282
static void fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, int *flags)
Definition: fplib.cc:397
uint32_t fplibAbs(uint32_t op)
Definition: fplib.cc:1607
static void fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
Definition: fplib.cc:241
scale
Definition: types.hh:94
static uint64_t FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, int *flags)
Definition: fplib.cc:2854
static uint32_t fp32_FPConvertNaN_16(uint16_t op)
Definition: fplib.cc:1724
static void set_fpscr(FPSCR &fpscr, int flags)
Definition: fplib.cc:1520
static uint64_t fp64_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:1742
static uint64_t fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
Definition: fplib.cc:690
static uint64_t fp64_zero(int sgn)
Definition: fplib.cc:300
#define FPLIB_RM
Definition: fplib.cc:52
static uint32_t fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
Definition: fplib.cc:276
Bitfield< 29 > c
Definition: miscregs.hh:1365
uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2455
static uint64_t fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
Definition: fplib.cc:3035
Bitfield< 1 > t1
Definition: miscregs.hh:1442
static uint64_t fp64_infinity(int sgn)
Definition: fplib.cc:336
static uint16_t fp16_infinity(int sgn)
Definition: fplib.cc:324
uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:1621
static uint32_t fp32_max_normal(int sgn)
Definition: fplib.cc:312
uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2397
uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
Definition: fplib.cc:2331
uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2156
uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2202
#define FPLIB_IDC
Definition: fplib.cc:58
bool fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
Definition: fplib.cc:1567
static uint64_t fp64_FPOnePointFive(int sgn)
Definition: fplib.cc:1754
static uint32_t fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1269
static uint64_t fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1058
static void lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
Definition: fplib.cc:120
uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2238
#define FPLIB_RP
Definition: fplib.cc:51
static uint64_t fp64_sqrt(uint64_t a, int mode, int *flags)
Definition: fplib.cc:1443
#define FPLIB_UFC
Definition: fplib.cc:60
static uint64_t fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
Definition: fplib.cc:760
Bitfield< 3, 0 > mask
Definition: types.hh:64
uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
Definition: fplib.cc:2218
static int fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:835
static uint32_t fp32_FPThree(int sgn)
Definition: fplib.cc:1760
Bitfield< 4 > op
Definition: types.hh:80
static uint64_t lsr64(uint64_t x, uint32_t shift)
Definition: fplib.cc:96
bool fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
Definition: fplib.cc:1547
static uint64_t fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
Definition: fplib.cc:1309
static uint16_t fp16_defaultNaN()
Definition: fplib.cc:342
static uint64_t lsl64(uint64_t x, uint32_t shift)
Definition: fplib.cc:90
static uint64_t fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
Definition: fplib.cc:512
static uint32_t fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:1022
Bitfield< 1 > x
Definition: types.hh:105
#define FPLIB_RN
Definition: fplib.cc:50
static uint32_t fp32_infinity(int sgn)
Definition: fplib.cc:330
static uint64_t fp64_FPTwo(int sgn)
Definition: fplib.cc:1778
uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
Floating-point convert from fixed-point.
Definition: fplib.cc:3053
static int fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:785
static void fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, int *flags)
Definition: fplib.cc:360
#define FPLIB_IOC
Definition: fplib.cc:63
Bitfield< 22 > a1
Definition: miscregs.hh:1688
static uint16_t fp16_FPConvertNaN_64(uint64_t op)
Definition: fplib.cc:1718
#define FPLIB_DN
Definition: fplib.cc:55
static int fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
Definition: fplib.cc:810
static uint16_t fp16_FPConvertNaN_32(uint32_t op)
Definition: fplib.cc:1712
static uint32_t lsl32(uint32_t x, uint32_t shift)
Definition: fplib.cc:78
static uint16_t lsr16(uint16_t x, uint32_t shift)
Definition: fplib.cc:72
static uint64_t fp64_max_normal(int sgn)
Definition: fplib.cc:318

Generated on Fri Jun 9 2017 13:03:38 for gem5 by doxygen 1.8.6