
1: 2: /*============================================================================ 3: 4: This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic 5: Package, Release 2b. 6: 7: Written by John R. Hauser. This work was made possible in part by the 8: International Computer Science Institute, located at Suite 600, 1947 Center 9: Street, Berkeley, California 94704. Funding was partially provided by the 10: National Science Foundation under grant MIP-9311980. The original version 11: of this code was written as part of a project to build a fixed-point vector 12: processor in collaboration with the University of California at Berkeley, 13: overseen by Profs. Nelson Morgan and John Wawrzynek. More information 14: is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 15: arithmetic/SoftFloat.html'. 16: 17: THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 18: been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 19: RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 20: AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 21: COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 22: EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 23: INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR 24: OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 25: 26: Derivative works are acceptable, even for commercial purposes, so long as 27: (1) the source code for the derivative work includes prominent notice that 28: the work is derivative, and (2) the source code includes prominent notice with 29: these four paragraphs for those parts of this code that are retained. 30: 31: =============================================================================*/ 32: 33: #include "softfloat.h" 34: 35: /*---------------------------------------------------------------------------- 36: | Primitive arithmetic functions, including multi-word arithmetic, and 37: | division and square root approximations. (Can be specialized to target if 38: | desired.) 39: *----------------------------------------------------------------------------*/ 40: #include "softfloat-macros.h" 41: 42: /*---------------------------------------------------------------------------- 43: | Functions and definitions to determine: (1) whether tininess for underflow 44: | is detected before or after rounding by default, (2) what (if anything) 45: | happens when exceptions are raised, (3) how signaling NaNs are distinguished 46: | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 47: | are propagated from function inputs to output. These details are target- 48: | specific. 49: *----------------------------------------------------------------------------*/ 50: #include "softfloat-specialize.h" 51: 52: void set_float_rounding_mode(int val STATUS_PARAM) 53: { 54: STATUS(float_rounding_mode) = val; 55: } 56: 57: void set_float_exception_flags(int val STATUS_PARAM) 58: { 59: STATUS(float_exception_flags) = val; 60: } 61: 62: #ifdef FLOATX80 63: void set_floatx80_rounding_precision(int val STATUS_PARAM) 64: { 65: STATUS(floatx80_rounding_precision) = val; 66: } 67: #endif 68: 69: /*---------------------------------------------------------------------------- 70: | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 71: | and 7, and returns the properly rounded 32-bit integer corresponding to the 72: | input. If `zSign' is 1, the input is negated before being converted to an 73: | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 74: | is simply rounded to an integer, with the inexact exception raised if the 75: | input cannot be represented exactly as an integer. However, if the fixed- 76: | point input is too large, the invalid exception is raised and the largest 77: | positive or negative integer is returned. 78: *----------------------------------------------------------------------------*/ 79: 80: static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM) 81: { 82: int8 roundingMode; 83: flag roundNearestEven; 84: int8 roundIncrement, roundBits; 85: int32 z; 86: 87: roundingMode = STATUS(float_rounding_mode); 88: roundNearestEven = ( roundingMode == float_round_nearest_even ); 89: roundIncrement = 0x40; 90: if ( ! roundNearestEven ) { 91: if ( roundingMode == float_round_to_zero ) { 92: roundIncrement = 0; 93: } 94: else { 95: roundIncrement = 0x7F; 96: if ( zSign ) { 97: if ( roundingMode == float_round_up ) roundIncrement = 0; 98: } 99: else { 100: if ( roundingMode == float_round_down ) roundIncrement = 0; 101: } 102: } 103: } 104: roundBits = absZ & 0x7F; 105: absZ = ( absZ + roundIncrement )>>7; 106: absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 107: z = absZ; 108: if ( zSign ) z = - z; 109: if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 110: float_raise( float_flag_invalid STATUS_VAR); 111: return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 112: } 113: if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; 114: return z; 115: 116: } 117: 118: /*---------------------------------------------------------------------------- 119: | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and 120: | `absZ1', with binary point between bits 63 and 64 (between the input words), 121: | and returns the properly rounded 64-bit integer corresponding to the input. 122: | If `zSign' is 1, the input is negated before being converted to an integer. 123: | Ordinarily, the fixed-point input is simply rounded to an integer, with 124: | the inexact exception raised if the input cannot be represented exactly as 125: | an integer. However, if the fixed-point input is too large, the invalid 126: | exception is raised and the largest positive or negative integer is 127: | returned. 128: *----------------------------------------------------------------------------*/ 129: 130: static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM) 131: { 132: int8 roundingMode; 133: flag roundNearestEven, increment; 134: int64 z; 135: 136: roundingMode = STATUS(float_rounding_mode); 137: roundNearestEven = ( roundingMode == float_round_nearest_even ); 138: increment = ( (sbits64) absZ1 < 0 ); 139: if ( ! roundNearestEven ) { 140: if ( roundingMode == float_round_to_zero ) { 141: increment = 0; 142: } 143: else { 144: if ( zSign ) { 145: increment = ( roundingMode == float_round_down ) && absZ1; 146: } 147: else { 148: increment = ( roundingMode == float_round_up ) && absZ1; 149: } 150: } 151: } 152: if ( increment ) { 153: ++absZ0; 154: if ( absZ0 == 0 ) goto overflow; 155: absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); 156: } 157: z = absZ0; 158: if ( zSign ) z = - z; 159: if ( z && ( ( z < 0 ) ^ zSign ) ) { 160: overflow: 161: float_raise( float_flag_invalid STATUS_VAR); 162: return 163: zSign ? (sbits64) LIT64( 0x8000000000000000 ) 164: : LIT64( 0x7FFFFFFFFFFFFFFF ); 165: } 166: if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact; 167: return z; 168: 169: } 170: 171: /*---------------------------------------------------------------------------- 172: | Returns the fraction bits of the single-precision floating-point value `a'. 173: *----------------------------------------------------------------------------*/ 174: 175: INLINE bits32 extractFloat32Frac( float32 a ) 176: { 177: 178: return float32_val(a) & 0x007FFFFF; 179: 180: } 181: 182: /*---------------------------------------------------------------------------- 183: | Returns the exponent bits of the single-precision floating-point value `a'. 184: *----------------------------------------------------------------------------*/ 185: 186: INLINE int16 extractFloat32Exp( float32 a ) 187: { 188: 189: return ( float32_val(a)>>23 ) & 0xFF; 190: 191: } 192: 193: /*---------------------------------------------------------------------------- 194: | Returns the sign bit of the single-precision floating-point value `a'. 195: *----------------------------------------------------------------------------*/ 196: 197: INLINE flag extractFloat32Sign( float32 a ) 198: { 199: 200: return float32_val(a)>>31; 201: 202: } 203: 204: /*---------------------------------------------------------------------------- 205: | Normalizes the subnormal single-precision floating-point value represented 206: | by the denormalized significand `aSig'. The normalized exponent and 207: | significand are stored at the locations pointed to by `zExpPtr' and 208: | `zSigPtr', respectively. 209: *----------------------------------------------------------------------------*/ 210: 211: static void 212: normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 213: { 214: int8 shiftCount; 215: 216: shiftCount = countLeadingZeros32( aSig ) - 8; 217: *zSigPtr = aSig<<shiftCount; 218: *zExpPtr = 1 - shiftCount; 219: 220: } 221: 222: /*---------------------------------------------------------------------------- 223: | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 224: | single-precision floating-point value, returning the result. After being 225: | shifted into the proper positions, the three fields are simply added 226: | together to form the result. This means that any integer portion of `zSig' 227: | will be added into the exponent. Since a properly normalized significand 228: | will have an integer portion equal to 1, the `zExp' input should be 1 less 229: | than the desired result exponent whenever `zSig' is a complete, normalized 230: | significand. 231: *----------------------------------------------------------------------------*/ 232: 233: INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 234: { 235: 236: return make_float32( 237: ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig); 238: 239: } 240: 241: /*---------------------------------------------------------------------------- 242: | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 243: | and significand `zSig', and returns the proper single-precision floating- 244: | point value corresponding to the abstract input. Ordinarily, the abstract 245: | value is simply rounded and packed into the single-precision format, with 246: | the inexact exception raised if the abstract input cannot be represented 247: | exactly. However, if the abstract value is too large, the overflow and 248: | inexact exceptions are raised and an infinity or maximal finite value is 249: | returned. If the abstract value is too small, the input value is rounded to 250: | a subnormal number, and the underflow and inexact exceptions are raised if 251: | the abstract input cannot be represented exactly as a subnormal single- 252: | precision floating-point number. 253: | The input significand `zSig' has its binary point between bits 30 254: | and 29, which is 7 bits to the left of the usual location. This shifted 255: | significand must be normalized or smaller. If `zSig' is not normalized, 256: | `zExp' must be 0; in that case, the result returned is a subnormal number, 257: | and it must not require rounding. In the usual case that `zSig' is 258: | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 259: | The handling of underflow and overflow follows the IEC/IEEE Standard for 260: | Binary Floating-Point Arithmetic. 261: *----------------------------------------------------------------------------*/ 262: 263: static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM) 264: { 265: int8 roundingMode; 266: flag roundNearestEven; 267: int8 roundIncrement, roundBits; 268: flag isTiny; 269: 270: roundingMode = STATUS(float_rounding_mode); 271: roundNearestEven = ( roundingMode == float_round_nearest_even ); 272: roundIncrement = 0x40; 273: if ( ! roundNearestEven ) { 274: if ( roundingMode == float_round_to_zero ) { 275: roundIncrement = 0; 276: } 277: else { 278: roundIncrement = 0x7F; 279: if ( zSign ) { 280: if ( roundingMode == float_round_up ) roundIncrement = 0; 281: } 282: else { 283: if ( roundingMode == float_round_down ) roundIncrement = 0; 284: } 285: } 286: } 287: roundBits = zSig & 0x7F; 288: if ( 0xFD <= (bits16) zExp ) { 289: if ( ( 0xFD < zExp ) 290: || ( ( zExp == 0xFD ) 291: && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 292: ) { 293: float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); 294: return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 )); 295: } 296: if ( zExp < 0 ) { 297: isTiny = 298: ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) 299: || ( zExp < -1 ) 300: || ( zSig + roundIncrement < 0x80000000 ); 301: shift32RightJamming( zSig, - zExp, &zSig ); 302: zExp = 0; 303: roundBits = zSig & 0x7F; 304: if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR); 305: } 306: } 307: if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; 308: zSig = ( zSig + roundIncrement )>>7; 309: zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 310: if ( zSig == 0 ) zExp = 0; 311: return packFloat32( zSign, zExp, zSig ); 312: 313: } 314: 315: /*---------------------------------------------------------------------------- 316: | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 317: | and significand `zSig', and returns the proper single-precision floating- 318: | point value corresponding to the abstract input. This routine is just like 319: | `roundAndPackFloat32' except that `zSig' does not have to be normalized. 320: | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 321: | floating-point exponent. 322: *----------------------------------------------------------------------------*/ 323: 324: static float32 325: normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM) 326: { 327: int8 shiftCount; 328: 329: shiftCount = countLeadingZeros32( zSig ) - 1; 330: return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR); 331: 332: } 333: 334: /*---------------------------------------------------------------------------- 335: | Returns the fraction bits of the double-precision floating-point value `a'. 336: *----------------------------------------------------------------------------*/ 337: 338: INLINE bits64 extractFloat64Frac( float64 a ) 339: { 340: 341: return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF ); 342: 343: } 344: 345: /*---------------------------------------------------------------------------- 346: | Returns the exponent bits of the double-precision floating-point value `a'. 347: *----------------------------------------------------------------------------*/ 348: 349: INLINE int16 extractFloat64Exp( float64 a ) 350: { 351: 352: return ( float64_val(a)>>52 ) & 0x7FF; 353: 354: } 355: 356: /*---------------------------------------------------------------------------- 357: | Returns the sign bit of the double-precision floating-point value `a'. 358: *----------------------------------------------------------------------------*/ 359: 360: INLINE flag extractFloat64Sign( float64 a ) 361: { 362: 363: return float64_val(a)>>63; 364: 365: } 366: 367: /*---------------------------------------------------------------------------- 368: | Normalizes the subnormal double-precision floating-point value represented 369: | by the denormalized significand `aSig'. The normalized exponent and 370: | significand are stored at the locations pointed to by `zExpPtr' and 371: | `zSigPtr', respectively. 372: *----------------------------------------------------------------------------*/ 373: 374: static void 375: normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 376: { 377: int8 shiftCount; 378: 379: shiftCount = countLeadingZeros64( aSig ) - 11; 380: *zSigPtr = aSig<<shiftCount; 381: *zExpPtr = 1 - shiftCount; 382: 383: } 384: 385: /*---------------------------------------------------------------------------- 386: | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 387: | double-precision floating-point value, returning the result. After being 388: | shifted into the proper positions, the three fields are simply added 389: | together to form the result. This means that any integer portion of `zSig' 390: | will be added into the exponent. Since a properly normalized significand 391: | will have an integer portion equal to 1, the `zExp' input should be 1 less 392: | than the desired result exponent whenever `zSig' is a complete, normalized 393: | significand. 394: *----------------------------------------------------------------------------*/ 395: 396: INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 397: { 398: 399: return make_float64( 400: ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig); 401: 402: } 403: 404: /*---------------------------------------------------------------------------- 405: | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 406: | and significand `zSig', and returns the proper double-precision floating- 407: | point value corresponding to the abstract input. Ordinarily, the abstract 408: | value is simply rounded and packed into the double-precision format, with 409: | the inexact exception raised if the abstract input cannot be represented 410: | exactly. However, if the abstract value is too large, the overflow and 411: | inexact exceptions are raised and an infinity or maximal finite value is 412: | returned. If the abstract value is too small, the input value is rounded 413: | to a subnormal number, and the underflow and inexact exceptions are raised 414: | if the abstract input cannot be represented exactly as a subnormal double- 415: | precision floating-point number. 416: | The input significand `zSig' has its binary point between bits 62 417: | and 61, which is 10 bits to the left of the usual location. This shifted 418: | significand must be normalized or smaller. If `zSig' is not normalized, 419: | `zExp' must be 0; in that case, the result returned is a subnormal number, 420: | and it must not require rounding. In the usual case that `zSig' is 421: | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 422: | The handling of underflow and overflow follows the IEC/IEEE Standard for 423: | Binary Floating-Point Arithmetic. 424: *----------------------------------------------------------------------------*/ 425: 426: static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM) 427: { 428: int8 roundingMode; 429: flag roundNearestEven; 430: int16 roundIncrement, roundBits; 431: flag isTiny; 432: 433: roundingMode = STATUS(float_rounding_mode); 434: roundNearestEven = ( roundingMode == float_round_nearest_even ); 435: roundIncrement = 0x200; 436: if ( ! roundNearestEven ) { 437: if ( roundingMode == float_round_to_zero ) { 438: roundIncrement = 0; 439: } 440: else { 441: roundIncrement = 0x3FF; 442: if ( zSign ) { 443: if ( roundingMode == float_round_up ) roundIncrement = 0; 444: } 445: else { 446: if ( roundingMode == float_round_down ) roundIncrement = 0; 447: } 448: } 449: } 450: roundBits = zSig & 0x3FF; 451: if ( 0x7FD <= (bits16) zExp ) { 452: if ( ( 0x7FD < zExp ) 453: || ( ( zExp == 0x7FD ) 454: && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 455: ) { 456: float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); 457: return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 )); 458: } 459: if ( zExp < 0 ) { 460: isTiny = 461: ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) 462: || ( zExp < -1 ) 463: || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); 464: shift64RightJamming( zSig, - zExp, &zSig ); 465: zExp = 0; 466: roundBits = zSig & 0x3FF; 467: if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR); 468: } 469: } 470: if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact; 471: zSig = ( zSig + roundIncrement )>>10; 472: zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 473: if ( zSig == 0 ) zExp = 0; 474: return packFloat64( zSign, zExp, zSig ); 475: 476: } 477: 478: /*---------------------------------------------------------------------------- 479: | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 480: | and significand `zSig', and returns the proper double-precision floating- 481: | point value corresponding to the abstract input. This routine is just like 482: | `roundAndPackFloat64' except that `zSig' does not have to be normalized. 483: | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 484: | floating-point exponent. 485: *----------------------------------------------------------------------------*/ 486: 487: static float64 488: normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM) 489: { 490: int8 shiftCount; 491: 492: shiftCount = countLeadingZeros64( zSig ) - 1; 493: return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR); 494: 495: } 496: 497: #ifdef FLOATX80 498: 499: /*---------------------------------------------------------------------------- 500: | Returns the fraction bits of the extended double-precision floating-point 501: | value `a'. 502: *----------------------------------------------------------------------------*/ 503: 504: INLINE bits64 extractFloatx80Frac( floatx80 a ) 505: { 506: 507: return a.low; 508: 509: } 510: 511: /*---------------------------------------------------------------------------- 512: | Returns the exponent bits of the extended double-precision floating-point 513: | value `a'. 514: *----------------------------------------------------------------------------*/ 515: 516: INLINE int32 extractFloatx80Exp( floatx80 a ) 517: { 518: 519: return a.high & 0x7FFF; 520: 521: } 522: 523: /*---------------------------------------------------------------------------- 524: | Returns the sign bit of the extended double-precision floating-point value 525: | `a'. 526: *----------------------------------------------------------------------------*/ 527: 528: INLINE flag extractFloatx80Sign( floatx80 a ) 529: { 530: 531: return a.high>>15; 532: 533: } 534: 535: /*---------------------------------------------------------------------------- 536: | Normalizes the subnormal extended double-precision floating-point value 537: | represented by the denormalized significand `aSig'. The normalized exponent 538: | and significand are stored at the locations pointed to by `zExpPtr' and 539: | `zSigPtr', respectively. 540: *----------------------------------------------------------------------------*/ 541: 542: static void 543: normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 544: { 545: int8 shiftCount; 546: 547: shiftCount = countLeadingZeros64( aSig ); 548: *zSigPtr = aSig<<shiftCount; 549: *zExpPtr = 1 - shiftCount; 550: 551: } 552: 553: /*---------------------------------------------------------------------------- 554: | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an 555: | extended double-precision floating-point value, returning the result. 556: *----------------------------------------------------------------------------*/ 557: 558: INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 559: { 560: floatx80 z; 561: 562: z.low = zSig; 563: z.high = ( ( (bits16) zSign )<<15 ) + zExp; 564: return z; 565: 566: } 567: 568: /*---------------------------------------------------------------------------- 569: | Takes an abstract floating-point value having sign `zSign', exponent `zExp', 570: | and extended significand formed by the concatenation of `zSig0' and `zSig1', 571: | and returns the proper extended double-precision floating-point value 572: | corresponding to the abstract input. Ordinarily, the abstract value is 573: | rounded and packed into the extended double-precision format, with the 574: | inexact exception raised if the abstract input cannot be represented 575: | exactly. However, if the abstract value is too large, the overflow and 576: | inexact exceptions are raised and an infinity or maximal finite value is 577: | returned. If the abstract value is too small, the input value is rounded to 578: | a subnormal number, and the underflow and inexact exceptions are raised if 579: | the abstract input cannot be represented exactly as a subnormal extended 580: | double-precision floating-point number. 581: | If `roundingPrecision' is 32 or 64, the result is rounded to the same 582: | number of bits as single or double precision, respectively. Otherwise, the 583: | result is rounded to the full precision of the extended double-precision 584: | format. 585: | The input significand must be normalized or smaller. If the input 586: | significand is not normalized, `zExp' must be 0; in that case, the result 587: | returned is a subnormal number, and it must not require rounding. The 588: | handling of underflow and overflow follows the IEC/IEEE Standard for Binary 589: | Floating-Point Arithmetic. 590: *----------------------------------------------------------------------------*/ 591: 592: static floatx80 593: roundAndPackFloatx80( 594: int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 595: STATUS_PARAM)