llvm.org GIT mirror llvm / 96c7471
Implement correctly-rounded decimal->binary conversion, i.e. conversion from user input strings. Such conversions are more intricate and subtle than they may appear; it is unlikely I have got it completely right first time. I would appreciate being informed of any bugs and incorrect roundings you might discover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42912 91177308-0d34-0410-b5e6-96231b3b80d8 Neil Booth 13 years ago
2 changed file(s) with 364 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
5959 if the requested precision is less than the natural precision the
6060 output is correctly rounded for the specified rounding mode.
6161
62 Conversion to and from decimal text is not currently implemented.
62 It also reads decimal floating point numbers and correctly rounds
63 according to the specified rounding mode.
64
65 Conversion to decimal text is not currently implemented.
6366
6467 Non-zero finite numbers are represented internally as a sign bit,
6568 a 16-bit signed exponent, and the significand as an array of
8285
8386 Some features that may or may not be worth adding:
8487
85 Conversions to and from decimal strings (hard).
88 Binary to decimal conversion (hard).
8689
8790 Optional ability to detect underflow tininess before rounding.
8891
8992 New formats: x87 in single and double precision mode (IEEE apart
90 from extended exponent range) and IBM two-double extended
91 precision (hard).
93 from extended exponent range) (hard).
9294
9395 New operations: sqrt, IEEE remainder, C90 fmod, nextafter,
9496 nexttoward.
185187 opStatus multiply(const APFloat &, roundingMode);
186188 opStatus divide(const APFloat &, roundingMode);
187189 opStatus mod(const APFloat &, roundingMode);
190 opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
191
192 /* Sign operations. */
193 void changeSign();
194 void clearSign();
188195 void copySign(const APFloat &);
189 opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
190 void changeSign(); // neg
191 void clearSign(); // abs
192196
193197 /* Conversions. */
194198 opStatus convert(const fltSemantics &, roundingMode);
271275 opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
272276 roundingMode);
273277 opStatus convertFromHexadecimalString(const char *, roundingMode);
278 opStatus convertFromDecimalString (const char *, roundingMode);
274279 char *convertNormalToHexString(char *, unsigned int, bool,
275280 roundingMode) const;
281 opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
282 int, roundingMode);
283
276284 APInt convertFloatAPFloatToAPInt() const;
277285 APInt convertDoubleAPFloatToAPInt() const;
278286 APInt convertF80LongDoubleAPFloatToAPInt() const;
5151 // onto the usual format above. For now only storage of constants of
5252 // this type is supported, no arithmetic.
5353 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106 };
54
55 /* A tight upper bound on number of parts required to hold the value
56 pow(5, power) is
57
58 power * 1024 / (441 * integerPartWidth) + 1
59
60 However, whilst the result may require only this many parts,
61 because we are multiplying two values to get it, the
62 multiplication may require an extra part with the excess part
63 being zero (consider the trivial case of 1 * 1, tcFullMultiply
64 requires two parts to hold the single-part result). So we add an
65 extra one to guarantee enough space whilst multiplying. */
66 const unsigned int maxExponent = 16383;
67 const unsigned int maxPrecision = 113;
68 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
69 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
70 / (441 * integerPartWidth));
5471 }
5572
5673 /* Put a bunch of private, handy routines in an anonymous namespace. */
7592 }
7693
7794 unsigned int
78 hexDigitValue (unsigned int c)
95 hexDigitValue(unsigned int c)
7996 {
8097 unsigned int r;
8198
236253 }
237254
238255 return moreSignificant;
256 }
257
258 /* The error from the true value, in half-ulps, on multiplying two
259 floating point numbers, which differ from the value they
260 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
261 than the returned value.
262
263 See "How to Read Floating Point Numbers Accurately" by William D
264 Clinger. */
265 unsigned int
266 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
267 {
268 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
269
270 if (HUerr1 + HUerr2 == 0)
271 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
272 else
273 return inexactMultiply + 2 * (HUerr1 + HUerr2);
274 }
275
276 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
277 when the least significant BITS are truncated. BITS cannot be
278 zero. */
279 integerPart
280 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
281 {
282 unsigned int count, partBits;
283 integerPart part, boundary;
284
285 assert (bits != 0);
286
287 bits--;
288 count = bits / integerPartWidth;
289 partBits = bits % integerPartWidth + 1;
290
291 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
292
293 if (isNearest)
294 boundary = (integerPart) 1 << (partBits - 1);
295 else
296 boundary = 0;
297
298 if (count == 0) {
299 if (part - boundary <= boundary - part)
300 return part - boundary;
301 else
302 return boundary - part;
303 }
304
305 if (part == boundary) {
306 while (--count)
307 if (parts[count])
308 return ~(integerPart) 0; /* A lot. */
309
310 return parts[0];
311 } else if (part == boundary - 1) {
312 while (--count)
313 if (~parts[count])
314 return ~(integerPart) 0; /* A lot. */
315
316 return -parts[0];
317 }
318
319 return ~(integerPart) 0; /* A lot. */
320 }
321
322 /* Place pow(5, power) in DST, and return the number of parts used.
323 DST must be at least one part larger than size of the answer. */
324 static unsigned int
325 powerOf5(integerPart *dst, unsigned int power)
326 {
327 /* A tight upper bound on number of parts required to hold the
328 value pow(5, power) is
329
330 power * 65536 / (28224 * integerPartWidth) + 1
331
332 However, whilst the result may require only N parts, because we
333 are multiplying two values to get it, the multiplication may
334 require N + 1 parts with the excess part being zero (consider
335 the trivial case of 1 * 1, the multiplier requires two parts to
336 hold the single-part result). So we add two to guarantee
337 enough space whilst multiplying. */
338 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
339 15625, 78125 };
340 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
341 static unsigned int partsCount[16] = { 1 };
342
343 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
344 unsigned int result;
345
346 assert(power <= maxExponent);
347
348 p1 = dst;
349 p2 = scratch;
350
351 *p1 = firstEightPowers[power & 7];
352 power >>= 3;
353
354 result = 1;
355 pow5 = pow5s;
356
357 for (unsigned int n = 0; power; power >>= 1, n++) {
358 unsigned int pc;
359
360 pc = partsCount[n];
361
362 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
363 if (pc == 0) {
364 pc = partsCount[n - 1];
365 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
366 pc *= 2;
367 if (pow5[pc - 1] == 0)
368 pc--;
369 partsCount[n] = pc;
370 }
371
372 if (power & 1) {
373 integerPart *tmp;
374
375 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
376 result += pc;
377 if (p2[result - 1] == 0)
378 result--;
379
380 /* Now result is in p1 with partsCount parts and p2 is scratch
381 space. */
382 tmp = p1, p1 = p2, p2 = tmp;
383 }
384
385 pow5 += pc;
386 }
387
388 if (p1 != dst)
389 APInt::tcAssign(dst, p1, result);
390
391 return result;
239392 }
240393
241394 /* Zero at the end to avoid modular arithmetic when adding one; used
649802 APInt::tcShiftLeft(dividend, partsCount, bit);
650803 }
651804
805 /* Ensure the dividend >= divisor initially for the loop below.
806 Incidentally, this means that the division loop below is
807 guaranteed to set the integer bit to one. */
652808 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
653809 exponent--;
654810 APInt::tcShiftLeft(dividend, partsCount, 1);
8641020
8651021 /* Keep OMSB up-to-date. */
8661022 if(omsb > (unsigned) exponentChange)
867 omsb -= (unsigned) exponentChange;
1023 omsb -= exponentChange;
8681024 else
8691025 omsb = 0;
8701026 }
9151071
9161072 /* We have a non-zero denormal. */
9171073 assert(omsb < semantics->precision);
918 assert(exponent == semantics->minExponent);
9191074
9201075 /* Canonicalize zeroes. */
9211076 if(omsb == 0)
17501905 }
17511906
17521907 APFloat::opStatus
1908 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
1909 unsigned sigPartCount, int exp,
1910 roundingMode rounding_mode)
1911 {
1912 unsigned int parts, pow5PartCount;
1913 fltSemantics calcSemantics = { 32767, -32767, 0 };
1914 integerPart pow5Parts[maxPowerOfFiveParts];
1915 bool isNearest;
1916
1917 isNearest = (rounding_mode == rmNearestTiesToEven
1918 || rounding_mode == rmNearestTiesToAway);
1919
1920 parts = partCountForBits(semantics->precision + 11);
1921
1922 /* Calculate pow(5, abs(exp)). */
1923 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
1924
1925 for (;; parts *= 2) {
1926 opStatus sigStatus, powStatus;
1927 unsigned int excessPrecision, truncatedBits;
1928
1929 calcSemantics.precision = parts * integerPartWidth - 1;
1930 excessPrecision = calcSemantics.precision - semantics->precision;
1931 truncatedBits = excessPrecision;
1932
1933 APFloat decSig(calcSemantics, fcZero, sign);
1934 APFloat pow5(calcSemantics, fcZero, false);
1935
1936 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
1937 rmNearestTiesToEven);
1938 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
1939 rmNearestTiesToEven);
1940 /* Add exp, as 10^n = 5^n * 2^n. */
1941 decSig.exponent += exp;
1942
1943 lostFraction calcLostFraction;
1944 integerPart HUerr, HUdistance, powHUerr;
1945
1946 if (exp >= 0) {
1947 /* multiplySignificand leaves the precision-th bit set to 1. */
1948 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
1949 powHUerr = powStatus != opOK;
1950 } else {
1951 calcLostFraction = decSig.divideSignificand(pow5);
1952 /* Denormal numbers have less precision. */
1953 if (decSig.exponent < semantics->minExponent) {
1954 excessPrecision += (semantics->minExponent - decSig.exponent);
1955 truncatedBits = excessPrecision;
1956 if (excessPrecision > calcSemantics.precision)
1957 excessPrecision = calcSemantics.precision;
1958 }
1959 /* Extra half-ulp lost in reciprocal of exponent. */
1960 powHUerr = 1 + powStatus != opOK;
1961 }
1962
1963 /* Both multiplySignificand and divideSignificand return the
1964 result with the integer bit set. */
1965 assert (APInt::tcExtractBit
1966 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
1967
1968 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
1969 powHUerr);
1970 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
1971 excessPrecision, isNearest);
1972
1973 /* Are we guaranteed to round correctly if we truncate? */
1974 if (HUdistance >= HUerr) {
1975 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
1976 calcSemantics.precision - excessPrecision,
1977 excessPrecision);
1978 /* Take the exponent of decSig. If we tcExtract-ed less bits
1979 above we must adjust our exponent to compensate for the
1980 implicit right shift. */
1981 exponent = (decSig.exponent + semantics->precision
1982 - (calcSemantics.precision - excessPrecision));
1983 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
1984 decSig.partCount(),
1985 truncatedBits);
1986 return normalize(rounding_mode, calcLostFraction);
1987 }
1988 }
1989 }
1990
1991 APFloat::opStatus
1992 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
1993 {
1994 const char *dot, *firstSignificantDigit;
1995 integerPart val, maxVal, decValue;
1996 opStatus fs;
1997
1998 /* Skip leading zeroes and any decimal point. */
1999 p = skipLeadingZeroesAndAnyDot(p, &dot);
2000 firstSignificantDigit = p;
2001
2002 /* The maximum number that can be multiplied by ten with any digit
2003 added without overflowing an integerPart. */
2004 maxVal = (~ (integerPart) 0 - 9) / 10;
2005
2006 val = 0;
2007 while (val <= maxVal) {
2008 if (*p == '.') {
2009 assert(dot == 0);
2010 dot = p++;
2011 }
2012
2013 decValue = digitValue(*p);
2014 if (decValue == -1U)
2015 break;
2016 p++;
2017 val = val * 10 + decValue;
2018 }
2019
2020 integerPart *decSignificand;
2021 unsigned int partCount, maxPartCount;
2022
2023 partCount = 0;
2024 maxPartCount = 4;
2025 decSignificand = new integerPart[maxPartCount];
2026 decSignificand[partCount++] = val;
2027
2028 /* Now continue to do single-part arithmetic for as long as we can.
2029 Then do a part multiplication, and repeat. */
2030 while (decValue != -1U) {
2031 integerPart multiplier;
2032
2033 val = 0;
2034 multiplier = 1;
2035
2036 while (multiplier <= maxVal) {
2037 if (*p == '.') {
2038 assert(dot == 0);
2039 dot = p++;
2040 }
2041
2042 decValue = digitValue(*p);
2043 if (decValue == -1U)
2044 break;
2045 p++;
2046 multiplier *= 10;
2047 val = val * 10 + decValue;
2048 }
2049
2050 if (partCount == maxPartCount) {
2051 integerPart *newDecSignificand;
2052 newDecSignificand = new integerPart[maxPartCount = partCount * 2];
2053 APInt::tcAssign(newDecSignificand, decSignificand, partCount);
2054 delete [] decSignificand;
2055 decSignificand = newDecSignificand;
2056 }
2057
2058 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2059 partCount, partCount + 1, false);
2060
2061 /* If we used another part (likely), increase the count. */
2062 if (decSignificand[partCount] != 0)
2063 partCount++;
2064 }
2065
2066 /* Now decSignificand contains the supplied significand ignoring the
2067 decimal point. Figure out our effective exponent, which is the
2068 specified exponent adjusted for any decimal point. */
2069
2070 if (p == firstSignificantDigit) {
2071 /* Ignore the exponent if we are zero - we cannot overflow. */
2072 category = fcZero;
2073 fs = opOK;
2074 } else {
2075 int decimalExponent;
2076
2077 if (dot)
2078 decimalExponent = dot + 1 - p;
2079 else
2080 decimalExponent = 0;
2081
2082 /* Add the given exponent. */
2083 if (*p == 'e' || *p == 'E')
2084 decimalExponent = totalExponent(p, decimalExponent);
2085
2086 category = fcNormal;
2087 fs = roundSignificandWithExponent(decSignificand, partCount,
2088 decimalExponent, rounding_mode);
2089 }
2090
2091 delete [] decSignificand;
2092
2093 return fs;
2094 }
2095
2096 APFloat::opStatus
17532097 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
17542098 {
17552099 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
17622106
17632107 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
17642108 return convertFromHexadecimalString(p + 2, rounding_mode);
1765
1766 assert(0 && "Decimal to binary conversions not yet implemented");
1767 abort();
2109 else
2110 return convertFromDecimalString(p, rounding_mode);
17682111 }
17692112
17702113 /* Write out a hexadecimal representation of the floating point value