
1: 2: /*============================================================================ 3: 4: This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 5: Arithmetic Package, Release 2b. 6: 7: Written by John R. Hauser. This work was made possible in part by the 8: International Computer Science Institute, located at Suite 600, 1947 Center 9: Street, Berkeley, California 94704. Funding was partially provided by the 10: National Science Foundation under grant MIP-9311980. The original version 11: of this code was written as part of a project to build a fixed-point vector 12: processor in collaboration with the University of California at Berkeley, 13: overseen by Profs. Nelson Morgan and John Wawrzynek. More information 14: is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 15: arithmetic/SoftFloat.html'. 16: 17: THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 18: been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 19: RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 20: AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 21: COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 22: EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 23: INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR 24: OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 25: 26: Derivative works are acceptable, even for commercial purposes, so long as 27: (1) the source code for the derivative work includes prominent notice that 28: the work is derivative, and (2) the source code includes prominent notice with 29: these four paragraphs for those parts of this code that are retained. 30: 31: =============================================================================*/ 32: 33: /*---------------------------------------------------------------------------- 34: | Shifts `a' right by the number of bits given in `count'. If any nonzero 35: | bits are shifted off, they are ``jammed'' into the least significant bit of 36: | the result by setting the least significant bit to 1. The value of `count' 37: | can be arbitrarily large; in particular, if `count' is greater than 32, the 38: | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 39: | The result is stored in the location pointed to by `zPtr'. 40: *----------------------------------------------------------------------------*/ 41: 42: INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) 43: { 44: bits32 z; 45: 46: if ( count == 0 ) { 47: z = a; 48: } 49: else if ( count < 32 ) { 50: z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 51: } 52: else { 53: z = ( a != 0 ); 54: } 55: *zPtr = z; 56: 57: } 58: 59: /*---------------------------------------------------------------------------- 60: | Shifts `a' right by the number of bits given in `count'. If any nonzero 61: | bits are shifted off, they are ``jammed'' into the least significant bit of 62: | the result by setting the least significant bit to 1. The value of `count' 63: | can be arbitrarily large; in particular, if `count' is greater than 64, the 64: | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 65: | The result is stored in the location pointed to by `zPtr'. 66: *----------------------------------------------------------------------------*/ 67: 68: INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) 69: { 70: bits64 z; 71: 72: if ( count == 0 ) { 73: z = a; 74: } 75: else if ( count < 64 ) { 76: z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 77: } 78: else { 79: z = ( a != 0 ); 80: } 81: *zPtr = z; 82: 83: } 84: 85: /*---------------------------------------------------------------------------- 86: | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 87: | _plus_ the number of bits given in `count'. The shifted result is at most 88: | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 89: | bits shifted off form a second 64-bit result as follows: The _last_ bit 90: | shifted off is the most-significant bit of the extra result, and the other 91: | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 92: | bits shifted off were all zero. This extra result is stored in the location 93: | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 94: | (This routine makes more sense if `a0' and `a1' are considered to form 95: | a fixed-point value with binary point between `a0' and `a1'. This fixed- 96: | point value is shifted right by the number of bits given in `count', and 97: | the integer part of the result is returned at the location pointed to by 98: | `z0Ptr'. The fractional part of the result may be slightly corrupted as 99: | described above, and is returned at the location pointed to by `z1Ptr'.) 100: *----------------------------------------------------------------------------*/ 101: 102: INLINE void 103: shift64ExtraRightJamming( 104: bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 105: { 106: bits64 z0, z1; 107: int8 negCount = ( - count ) & 63; 108: 109: if ( count == 0 ) { 110: z1 = a1; 111: z0 = a0; 112: } 113: else if ( count < 64 ) { 114: z1 = ( a0<<negCount ) | ( a1 != 0 ); 115: z0 = a0>>count; 116: } 117: else { 118: if ( count == 64 ) { 119: z1 = a0 | ( a1 != 0 ); 120: } 121: else { 122: z1 = ( ( a0 | a1 ) != 0 ); 123: } 124: z0 = 0; 125: } 126: *z1Ptr = z1; 127: *z0Ptr = z0; 128: 129: } 130: 131: /*---------------------------------------------------------------------------- 132: | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 133: | number of bits given in `count'. Any bits shifted off are lost. The value 134: | of `count' can be arbitrarily large; in particular, if `count' is greater 135: | than 128, the result will be 0. The result is broken into two 64-bit pieces 136: | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 137: *----------------------------------------------------------------------------*/ 138: 139: INLINE void 140: shift128Right( 141: bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 142: { 143: bits64 z0, z1; 144: int8 negCount = ( - count ) & 63; 145: 146: if ( count == 0 ) { 147: z1 = a1; 148: z0 = a0; 149: } 150: else if ( count < 64 ) { 151: z1 = ( a0<<negCount ) | ( a1>>count ); 152: z0 = a0>>count; 153: } 154: else { 155: z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 156: z0 = 0; 157: } 158: *z1Ptr = z1; 159: *z0Ptr = z0; 160: 161: } 162: 163: /*---------------------------------------------------------------------------- 164: | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 165: | number of bits given in `count'. If any nonzero bits are shifted off, they 166: | are ``jammed'' into the least significant bit of the result by setting the 167: | least significant bit to 1. The value of `count' can be arbitrarily large; 168: | in particular, if `count' is greater than 128, the result will be either 169: | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 170: | nonzero. The result is broken into two 64-bit pieces which are stored at 171: | the locations pointed to by `z0Ptr' and `z1Ptr'. 172: *----------------------------------------------------------------------------*/ 173: 174: INLINE void 175: shift128RightJamming( 176: bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 177: { 178: bits64 z0, z1; 179: int8 negCount = ( - count ) & 63; 180: 181: if ( count == 0 ) { 182: z1 = a1; 183: z0 = a0; 184: } 185: else if ( count < 64 ) { 186: z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 187: z0 = a0>>count; 188: } 189: else { 190: if ( count == 64 ) { 191: z1 = a0 | ( a1 != 0 ); 192: } 193: else if ( count < 128 ) { 194: z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 195: } 196: else { 197: z1 = ( ( a0 | a1 ) != 0 ); 198: } 199: z0 = 0; 200: } 201: *z1Ptr = z1; 202: *z0Ptr = z0; 203: 204: } 205: 206: /*---------------------------------------------------------------------------- 207: | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 208: | by 64 _plus_ the number of bits given in `count'. The shifted result is 209: | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 210: | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 211: | off form a third 64-bit result as follows: The _last_ bit shifted off is 212: | the most-significant bit of the extra result, and the other 63 bits of the 213: | extra result are all zero if and only if _all_but_the_last_ bits shifted off 214: | were all zero. This extra result is stored in the location pointed to by 215: | `z2Ptr'. The value of `count' can be arbitrarily large. 216: | (This routine makes more sense if `a0', `a1', and `a2' are considered 217: | to form a fixed-point value with binary point between `a1' and `a2'. This 218: | fixed-point value is shifted right by the number of bits given in `count', 219: | and the integer part of the result is returned at the locations pointed to 220: | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 221: | corrupted as described above, and is returned at the location pointed to by 222: | `z2Ptr'.) 223: *----------------------------------------------------------------------------*/ 224: 225: INLINE void 226: shift128ExtraRightJamming( 227: bits64 a0, 228: bits64 a1, 229: bits64 a2, 230: int16 count, 231: bits64 *z0Ptr, 232: bits64 *z1Ptr, 233: bits64 *z2Ptr 234: ) 235: { 236: bits64 z0, z1, z2; 237: int8 negCount = ( - count ) & 63; 238: 239: if ( count == 0 ) { 240: z2 = a2; 241: z1 = a1; 242: z0 = a0; 243: } 244: else { 245: if ( count < 64 ) { 246: z2 = a1<<negCount; 247: z1 = ( a0<<negCount ) | ( a1>>count ); 248: z0 = a0>>count; 249: } 250: else { 251: if ( count == 64 ) { 252: z2 = a1; 253: z1 = a0; 254: } 255: else { 256: a2 |= a1; 257: if ( count < 128 ) { 258: z2 = a0<<negCount; 259: z1 = a0>>( count & 63 ); 260: } 261: else { 262: z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 263: z1 = 0; 264: } 265: } 266: z0 = 0; 267: } 268: z2 |= ( a2 != 0 ); 269: } 270: *z2Ptr = z2; 271: *z1Ptr = z1; 272: *z0Ptr = z0; 273: 274: } 275: 276: /*---------------------------------------------------------------------------- 277: | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 278: | number of bits given in `count'. Any bits shifted off are lost. The value 279: | of `count' must be less than 64. The result is broken into two 64-bit 280: | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 281: *----------------------------------------------------------------------------*/ 282: 283: INLINE void 284: shortShift128Left( 285: bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) 286: { 287: 288: *z1Ptr = a1<<count; 289: *z0Ptr = 290: ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 291: 292: } 293: 294: /*---------------------------------------------------------------------------- 295: | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 296: | by the number of bits given in `count'. Any bits shifted off are lost. 297: | The value of `count' must be less than 64. The result is broken into three 298: | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 299: | `z1Ptr', and `z2Ptr'. 300: *----------------------------------------------------------------------------*/ 301: 302: INLINE void 303: shortShift192Left( 304: bits64 a0, 305: bits64 a1, 306: bits64 a2, 307: int16 count, 308: bits64 *z0Ptr, 309: bits64 *z1Ptr, 310: bits64 *z2Ptr 311: ) 312: { 313: bits64 z0, z1, z2; 314: int8 negCount; 315: 316: z2 = a2<<count; 317: z1 = a1<<count; 318: z0 = a0<<count; 319: if ( 0 < count ) { 320: negCount = ( ( - count ) & 63 ); 321: z1 |= a2>>negCount; 322: z0 |= a1>>negCount; 323: } 324: *z2Ptr = z2; 325: *z1Ptr = z1; 326: *z0Ptr = z0; 327: 328: } 329: 330: /*---------------------------------------------------------------------------- 331: | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 332: | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 333: | any carry out is lost. The result is broken into two 64-bit pieces which 334: | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 335: *----------------------------------------------------------------------------*/ 336: 337: INLINE void 338: add128( 339: bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 340: { 341: bits64 z1; 342: 343: z1 = a1 + b1; 344: *z1Ptr = z1; 345: *z0Ptr = a0 + b0 + ( z1 < a1 ); 346: 347: } 348: 349: /*---------------------------------------------------------------------------- 350: | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 351: | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 352: | modulo 2^192, so any carry out is lost. The result is broken into three 353: | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 354: | `z1Ptr', and `z2Ptr'. 355: *----------------------------------------------------------------------------*/ 356: 357: INLINE void 358: add192( 359: bits64 a0, 360: bits64 a1, 361: bits64 a2, 362: bits64 b0, 363: bits64 b1, 364: bits64 b2, 365: bits64 *z0Ptr, 366: bits64 *z1Ptr, 367: bits64 *z2Ptr 368: ) 369: { 370: bits64 z0, z1, z2; 371: int8 carry0, carry1; 372: 373: z2 = a2 + b2; 374: carry1 = ( z2 < a2 ); 375: z1 = a1 + b1; 376: carry0 = ( z1 < a1 ); 377: z0 = a0 + b0; 378: z1 += carry1; 379: z0 += ( z1 < carry1 ); 380: z0 += carry0; 381: *z2Ptr = z2; 382: *z1Ptr = z1; 383: *z0Ptr = z0; 384: 385: } 386: 387: /*---------------------------------------------------------------------------- 388: | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 389: | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 390: | 2^128, so any borrow out (carry out) is lost. The result is broken into two 391: | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 392: | `z1Ptr'. 393: *----------------------------------------------------------------------------*/ 394: 395: INLINE void 396: sub128( 397: bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) 398: { 399: 400: *z1Ptr = a1 - b1; 401: *z0Ptr = a0 - b0 - ( a1 < b1 ); 402: 403: } 404: 405: /*---------------------------------------------------------------------------- 406: | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 407: | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 408: | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 409: | result is broken into three 64-bit pieces which are stored at the locations 410: | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 411: *----------------------------------------------------------------------------*/ 412: 413: INLINE void 414: sub192( 415: bits64 a0, 416: bits64 a1, 417: bits64 a2, 418: bits64 b0, 419: bits64 b1, 420: bits64 b2, 421: bits64 *z0Ptr, 422: bits64 *z1Ptr, 423: bits64 *z2Ptr 424: ) 425: { 426: bits64 z0, z1, z2; 427: int8 borrow0, borrow1; 428: 429: z2 = a2 - b2; 430: borrow1 = ( a2 < b2 ); 431: z1 = a1 - b1; 432: borrow0 = ( a1 < b1 ); 433: z0 = a0 - b0; 434: z0 -= ( z1 < borrow1 ); 435: z1 -= borrow1; 436: z0 -= borrow0; 437: *z2Ptr = z2; 438: *z1Ptr = z1; 439: *z0Ptr = z0; 440: 441: } 442: 443: /*---------------------------------------------------------------------------- 444: | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 445: | into two 64-bit pieces which are stored at the locations pointed to by 446: | `z0Ptr' and `z1Ptr'. 447: *----------------------------------------------------------------------------*/ 448: 449: INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) 450: { 451: bits32 aHigh, aLow, bHigh, bLow; 452: bits64 z0, zMiddleA, zMiddleB, z1; 453: 454: aLow = a; 455: aHigh = a>>32; 456: bLow = b; 457: bHigh = b>>32; 458: z1 = ( (bits64) aLow ) * bLow; 459: zMiddleA = ( (bits64) aLow ) * bHigh; 460: zMiddleB = ( (bits64) aHigh ) * bLow; 461: z0 = ( (bits64) aHigh ) * bHigh; 462: zMiddleA += zMiddleB; 463: z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 464: zMiddleA <<= 32; 465: z1 += zMiddleA; 466: z0 += ( z1 < zMiddleA ); 467: *z1Ptr = z1; 468: *z0Ptr = z0; 469: 470: } 471: 472: /*---------------------------------------------------------------------------- 473: | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 474: | `b' to obtain a 192-bit product. The product is broken into three 64-bit 475: | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 476: | `z2Ptr'. 477: *----------------------------------------------------------------------------*/ 478: 479: INLINE void 480: mul128By64To192( 481: bits64 a0, 482: bits64 a1, 483: bits64 b, 484: bits64 *z0Ptr, 485: bits64 *z1Ptr, 486: bits64 *z2Ptr 487: ) 488: { 489: bits64 z0, z1, z2, more1; 490: 491: mul64To128( a1, b, &z1, &z2 ); 492: mul64To128( a0, b, &z0, &more1 ); 493: add128( z0, more1, 0, z1, &z0, &z1 ); 494: *z2Ptr = z2; 495: *z1Ptr = z1; 496: *z0Ptr = z0; 497: 498: } 499: 500: /*---------------------------------------------------------------------------- 501: | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 502: | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 503: | product. The product is broken into four 64-bit pieces which are stored at 504: | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 505: *----------------------------------------------------------------------------*/ 506: 507: INLINE void 508: mul128To256( 509: bits64 a0, 510: bits64 a1, 511: bits64 b0, 512: bits64 b1, 513: bits64 *z0Ptr, 514: bits64 *z1Ptr, 515: bits64 *z2Ptr, 516: bits64 *z3Ptr 517: ) 518: { 519: bits64 z0, z1, z2, z3; 520: bits64 more1, more2; 521: 522: mul64To128( a1, b1, &z2, &z3 ); 523: mul64To128( a1, b0, &z1, &more2 ); 524: add128( z1, more2, 0, z2, &z1, &z2 ); 525: mul64To128( a0, b0, &z0, &more1 ); 526: add128( z0, more1, 0, z1, &z0, &z1 ); 527: mul64To128( a0, b1, &more1, &more2 ); 528: add128( more1, more2, 0, z2, &more1, &z2 ); 529: add128( z0, z1, 0, more1, &z0, &z1 ); 530: *z3Ptr = z3; 531: *z2Ptr = z2; 532: *z1Ptr = z1; 533: *z0Ptr = z0; 534: 535: } 536: 537: /*---------------------------------------------------------------------------- 538: | Returns an approximation to the 64-bit integer quotient obtained by dividing 539: | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 540: | divisor `b' must be at least 2^63. If q is the exact quotient truncated 541: | toward zero, the approximation returned lies between q and q + 2 inclusive. 542: | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 543: | unsigned integer is returned. 544: *----------------------------------------------------------------------------*/ 545: 546: static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) 547: { 548: bits64 b0, b1; 549: bits64 rem0, rem1, term0, term1; 550: bits64 z; 551: 552: if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 553: b0 = b>>32; 554: z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 555: mul64To128( b, z, &term0, &term1 ); 556: sub128( a0, a1, term0, term1, &rem0, &rem1 ); 557: while ( ( (sbits64) rem0 ) < 0 ) { 558: z -= LIT64( 0x100000000 ); 559: b1 = b<<32; 560: add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 561: } 562: rem0 = ( rem0<<32 ) | ( rem1>>32 ); 563: z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 564: return z; 565: 566: } 567: 568: /*---------------------------------------------------------------------------- 569: | Returns an approximation to the square root of the 32-bit significand given 570: | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 571: | `aExp' (the least significant bit) is 1, the integer returned approximates 572: | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 573: | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 574: | case, the approximation returned lies strictly within +/-2 of the exact 575: | value. 576: *----------------------------------------------------------------------------*/ 577: 578: static bits32 estimateSqrt32( int16 aExp, bits32 a ) 579: { 580: static const bits16 sqrtOddAdjustments[] = { 581: 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 582: 0x039C, 0x0468, 0x0545<