Implementing custom strtod, fail on some cases [ci skip]

author: Milo Yip <miloyip@gmail.com> 2014-09-12 19:03:20 +0400
committer: Milo Yip <miloyip@gmail.com> 2014-09-12 19:03:20 +0400
commit: 4bd240abeecc37070576298271af27a2097d9665 (patch)
tree: a9ed38e252db0423858208070cd944b3e23ba8b4 /include/rapidjson
parent: 359ebc78c0e1939e952fe2e6d1001286c5cf3dbb (diff)
2 files changed, 398 insertions, 39 deletions
diff --git a/include/rapidjson/internal/strtod.h b/include/rapidjson/internal/strtod.h
index d65d3fe3..16982339 100644
--- a/include/rapidjson/internal/strtod.h
+++ b/include/rapidjson/internal/strtod.h
@@ -21,11 +21,292 @@
 #ifndef RAPIDJSON_STRTOD_
 #define RAPIDJSON_STRTOD_
 
+#include "../rapidjson.h"
 #include "pow10.h"
 
 namespace rapidjson {
 namespace internal {
 
+class Double {
+public:
+    Double(double d) : d(d) {}
+    Double(uint64_t u) : u(u) {}
+
+    double NextDouble() const {
+        RAPIDJSON_ASSERT(!Sign());
+        return Double(u + 1).Value();
+    }
+
+    double PreviousDouble() const {
+        RAPIDJSON_ASSERT(!Sign());
+        if (d == 0.0)
+            return 0.0;
+        else
+            return Double(u - 1).Value();
+    }
+
+    bool Sign() const { return (u & kSignMask) != 0; }
+    uint64_t Significand() const { return u & kSignificandMask; }
+    int Exponent() const { return (u & kExponentMask) - kExponentBias; }
+    double Value() const { return d;}
+
+private:
+    static const int kSignificandSize = 52;
+    static const int kExponentBias = 0x3FF;
+    static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
+    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+    static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+
+    union {
+        double d;
+        uint64_t u;
+    };
+};
+
+class BigInteger {
+public:
+    typedef uint64_t Type;
+
+    explicit BigInteger(uint64_t u) {
+        *this = u;
+    }
+
+    BigInteger(const char* decimals, size_t length)  {
+        RAPIDJSON_ASSERT(length > 0);
+        *this = 0u;
+        size_t i = 0;
+        const size_t kMaxDigitPerIteration = 19;  // 2^64 = 18446744073709551616 > 10^19
+        while (length >= kMaxDigitPerIteration) {
+            AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration);
+            length -= kMaxDigitPerIteration;
+            i += kMaxDigitPerIteration;
+        }
+
+        if (length > 0)
+            AppendDecimal64(decimals + i, decimals + i + length);
+    }
+
+    BigInteger& operator=(uint64_t u) {
+        digits_[0] = u;            
+        count_ = 1;
+        return *this;
+    }
+
+    BigInteger& operator+=(uint64_t u) {
+        Type backup = digits_[0];
+        digits_[0] += u;
+        for (size_t i = 0; i < count_ - 1; i++) {
+            if (digits_[i] >= backup)
+                return *this; // no carry
+            backup = digits_[i + 1];
+            digits_[i + 1] += 1;
+        }
+
+        // Last carry
+        if (digits_[count_ - 1] < backup)
+            PushBack(1);
+
+        return *this;
+    }
+
+    BigInteger& operator*=(uint64_t u) {
+        if (u == 0) return *this = 0;
+        if (u == 1) return *this;
+        uint64_t k = 0;
+        for (size_t i = 0; i < count_; i++) {
+            uint64_t hi;
+            digits_[i] = MulAdd64(digits_[i], u, k, &hi);
+            k = hi;
+        }
+        
+        if (k > 0)
+            PushBack(k);
+
+        return *this;
+    }
+
+    BigInteger& operator*=(uint32_t u) {
+        if (u == 0) return *this = 0;
+        if (u == 1) return *this;
+        uint32_t k = 0;
+        for (size_t i = 0; i < count_; i++) {
+            const uint64_t c = digits_[i] >> 32;
+            const uint64_t d = digits_[i] & 0xFFFFFFFF;
+            const uint64_t uc = u * c;
+            const uint64_t ud = u * d;
+            const uint64_t p0 = ud + k;
+            const uint64_t p1 = uc + (p0 >> 32);
+            digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32);
+            k = p1 >> 32;
+        }
+        
+        if (k > 0)
+            PushBack(k);
+
+        return *this;
+    }
+
+    BigInteger& operator<<=(size_t shift) {
+        if (IsZero()) return *this;
+
+        if (shift >= kTypeBit) {
+            size_t offset = shift / kTypeBit;
+            RAPIDJSON_ASSERT(count_ + offset <= kCapacity);
+            for (size_t i = count_; i > 0; i--)
+                digits_[i - 1 + offset] = digits_[i - 1];
+            for (size_t i = 0; i < offset; i++)
+                digits_[i] = 0;
+            count_ += offset;
+            shift -= offset * kTypeBit;
+        }
+
+        if (shift > 0) {
+            // Inter-digit shifts
+            Type carry = 0;
+            for (size_t i = 0; i < count_; i++) {
+                Type newCarry = digits_[i] >> (kTypeBit - shift);
+                digits_[i] = (digits_[i] << shift) | carry;
+                carry = newCarry;
+            }
+
+            // Last carry
+            if (carry)
+                PushBack(carry);
+        }
+
+        return *this;
+    }
+
+    bool operator==(const BigInteger& rhs) const {
+        return count_ == rhs.count_ && memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0;
+    }
+
+    BigInteger& MultiplyPow5(unsigned exp) {
+        static const uint32_t kPow5[12] = {
+            5,
+            5 * 5,
+            5 * 5 * 5,
+            5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5
+        };
+        if (exp == 0) return *this;
+        unsigned e = exp;
+        for (; e >= 27; e -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27
+        for (; e >= 13; e -= 13) *this *= 1220703125u; // 5^13
+        if (e > 0)               *this *= kPow5[e - 1];
+        return *this;
+    }
+
+    // Compute absolute difference of this and rhs.
+    // Return false if this < rhs
+    bool Difference(const BigInteger& rhs, BigInteger* out) const {
+        int cmp = Compare(rhs);
+        if (cmp == 0) {
+            *out = BigInteger(0);
+            return false;
+        }
+        const BigInteger *a, *b;  // Makes a > b
+        bool ret;
+        if (cmp < 0) { a = &rhs; b = this; ret = true; }
+        else         { a = this; b = &rhs; ret = false; }
+
+        Type borrow = 0;
+        for (size_t i = 0; i < a->count_; i++) {
+            Type d = a->digits_[i] - borrow;
+            if (i < b->count_)
+                d -= b->digits_[i];
+            borrow = (d > a->digits_[i]) ? 1 : 0;
+            out->digits_[i] = d;
+            if (d != 0)
+                out->count_ = i + 1;
+        }
+
+        return ret;
+    }
+
+    int Compare(const BigInteger& rhs) const {
+        if (count_ != rhs.count_) {
+            if (count_ < rhs.count_)
+                return -1;
+            else
+                return 1;
+        }
+
+        for (size_t i = count_; i > 0;) {
+            i--;
+            if (digits_[i] != rhs.digits_[i]) {
+                if (digits_[i] < rhs.digits_[i])
+                    return -1;
+                else
+                    return 1;
+            }
+        }
+
+        return 0;
+    }
+
+    size_t GetCount() const { return count_; }
+    Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_);  return digits_[index]; }
+    bool IsZero() const { return count_ == 1 && digits_[0] == 0;  }
+
+private:
+    void AppendDecimal64(const char* begin, const char* end) {
+        uint64_t u = ParseUint64(begin, end);
+        if (IsZero())
+            *this = u;
+        else {
+            unsigned exp = end - begin;
+            MultiplyPow5(exp) <<= exp;    // *this *= 10^exp
+            *this += u;
+        }
+    }
+
+    void PushBack(Type digit) {
+        RAPIDJSON_ASSERT(count_ < kCapacity);
+        digits_[count_++] = digit;
+    }
+
+    static uint64_t ParseUint64(const char* begin, const char* end) {
+        uint64_t r = 0;
+        for (const char* p = begin; p != end; ++p) {
+            RAPIDJSON_ASSERT(*p >= '0' && *p <= '9');
+            r = r * 10 + (*p - '0');
+        }
+        return r;
+    }
+
+    // Assume a * b + k < 2^128
+    static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+        uint64_t low = _umul128(a, b, outHigh);
+        uint64_t outLow = low + k;
+        if (outLow < low)
+            (*outHigh)++;
+        //uint64_t outLow;
+        //unsigned char carry = _addcarryx_u64(0, low, k, &outLow);
+        //_addcarry_u64(carry, *outHigh, 0, outHigh);
+        return outLow;
+#else
+        // TODO
+#endif
+    }
+
+    static const size_t kBitCount = 3328;  // 64bit * 54 > 10^1000
+    static const size_t kCapacity = kBitCount / sizeof(Type);
+    static const size_t kTypeBit = sizeof(Type) * 8;
+
+    Type digits_[kCapacity];
+    size_t count_;
+};
+
 inline double StrtodFastPath(double significand, int exp) {
     if (exp < -308)
         return 0.0;
@@ -46,27 +327,122 @@ inline double NormalPrecision(double d, int p) {
     return d;
 }
 
-inline double FullPrecision(bool useStrtod, double d, int p, const char* str) {
+inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp, bool* adjustToNegative) {
+    static const int kSignificandSize = 52;
+    static const int kExponentBias = 0x3FF;
+    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+    static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+
+    union {
+        double d;
+        uint64_t u;
+    }u;
+    u.d = b;
+    const uint64_t bInt = (u.u & kSignificandMask) | kHiddenBit;
+    const int bExp = ((u.u & kExponentMask) >> kSignificandSize) - kExponentBias - kSignificandSize;
+    const int hExp = bExp - 1;
+
+    int dS_Exp2 = 0;
+    int dS_Exp5 = 0;
+    int bS_Exp2 = 0;
+    int bS_Exp5 = 0;
+    int hS_Exp2 = 0;
+    int hS_Exp5 = 0;
+
+    // Adjust for decimal exponent
+    if (dExp >= 0) {
+        dS_Exp2 += dExp;
+        dS_Exp5 += dExp;
+    }
+    else {
+        bS_Exp2 -= dExp;
+        bS_Exp5 -= dExp;
+        hS_Exp2 -= dExp;
+        hS_Exp5 -= dExp;
+    }
+
+    // Adjust for binary exponent
+    if (bExp >= 0)
+        bS_Exp2 += bExp;
+    else {
+        dS_Exp2 -= bExp;
+        hS_Exp2 -= bExp;
+    }
+
+    // Adjust for half ulp exponent
+    if (hExp >= 0)
+        hS_Exp2 += hExp;
+    else {
+        dS_Exp2 -= hExp;
+        bS_Exp2 -= hExp;
+    }
+
+    // Remove common power of two factor from all three scaled values
+    int common_Exp2 = std::min(dS_Exp2, std::min(bS_Exp2, hS_Exp2));
+    dS_Exp2 -= common_Exp2;
+    bS_Exp2 -= common_Exp2;
+    hS_Exp2 -= common_Exp2;
+
+    BigInteger dS = d;
+    dS.MultiplyPow5(dS_Exp5) <<= dS_Exp2;
+
+    BigInteger bS(bInt);
+    bS.MultiplyPow5(bS_Exp5) <<= bS_Exp2;
+
+    BigInteger hS(1);
+    hS.MultiplyPow5(hS_Exp5) <<= hS_Exp2;
+
+    BigInteger delta(0);
+    *adjustToNegative = dS.Difference(bS, &delta);
+
+    return delta.Compare(hS);
+}
+
+inline double FullPrecision(double d, int dExp, const char* decimals, size_t length) {
+    RAPIDJSON_ASSERT(d >= 0.0);
+
     // Use fast path for string-to-double conversion if possible
     // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
-    if (!useStrtod && p > 22) {
+    int p = dExp;
+    if (p > 22) {
         if (p < 22 + 16) {
             // Fast Path Cases In Disguise
             d *= internal::Pow10(p - 22);
             p = 22;
         }
-        else
-            useStrtod = true;
     }
 
-    if (!useStrtod && p >= -22 && d <= 9007199254740991.0) // 2^53 - 1
-        d = StrtodFastPath(d, p);
-    else {
-        printf("s=%s p=%d\n", str, p);
-        double guess = NormalPrecision(d, p);
-        d = guess;
+    if (p >= -22 && d <= 9007199254740991.0) // 2^53 - 1
+        return StrtodFastPath(d, p);
+
+    if (p + int(length) < -324)
+        return 0.0;
+
+    //printf("s=%s p=%d\n", decimals, p);
+    const BigInteger dInt(decimals, length);
+    Double approx = NormalPrecision(d, p);
+    for (;;) {
+        //printf("approx=%.17g\n", approx.Value());
+        bool adjustToNegative;
+        int cmp = CheckWithinHalfULP(approx.Value(), dInt, dExp, &adjustToNegative);
+        if (cmp < 0)
+            return approx.Value();  // within half ULP
+        else if (cmp == 0) {
+            // Round towards even
+            if (approx.Significand() & 1)
+                return approx.NextDouble();
+            else
+                return approx.Value();
+        }
+        else {
+            // adjustment
+            if (adjustToNegative)
+                approx = approx.PreviousDouble();
+            else
+                approx = approx.NextDouble();
+        }            
     }
-    return d;
 }
 
 } // namespace internal
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index b4299777..17faff8e 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -731,6 +731,7 @@ private:
         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
         size_t Tell() { return is.Tell(); }
+        size_t Length() { return 0; }
         const char* Pop() { return 0; }
 
     protected:
@@ -751,6 +752,8 @@ private:
             return Base::is.Take();
         }
 
+        size_t Length() { return stackStream.Length(); }
+
         const char* Pop() {
             stackStream.Put('\0');
             return stackStream.Pop();
@@ -811,7 +814,6 @@ private:
 
         // Parse 64bit int
         bool useDouble = false;
-        bool useStrtod = false;
         double d = 0.0;
         if (use64bit) {
             if (minus) 
@@ -838,9 +840,6 @@ private:
 
         // Force double for big integer
         if (useDouble) {
-            if (parseFlags & kParseFullPrecisionFlag)
-                useStrtod = true;
-
             while (s.Peek() >= '0' && s.Peek() <= '9') {
                 if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
                     RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
@@ -860,24 +859,15 @@ private:
                     i64 = i;
         
                 while (s.Peek() >= '0' && s.Peek() <= '9') {
-                    if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) { // 2^53 - 1 for fast path
-                        if (parseFlags & kParseFullPrecisionFlag) {
-                            while (s.Peek() >= '0' && s.Peek() <= '9') {
-                                s.TakePush();
-                                --expFrac;
-                            }
-                            useStrtod = true;
-                        }
+                    if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
                         break;
-                    }
                     else {
                         i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
                         --expFrac;
                     }
                 }
 
-                if (!useStrtod)
-                    d = (double)i64;
+                d = (double)i64;
 #else
                 // Use double to store significand in 32-bit architecture
                 d = use64bit ? (double)i64 : (double)i;
@@ -885,17 +875,9 @@ private:
                 useDouble = true;
             }
 
-            if ((parseFlags & kParseFullPrecisionFlag) == 0 || !useStrtod) {
-                while (s.Peek() >= '0' && s.Peek() <= '9') {
-                    d = d * 10.0 + (s.TakePush() - '0');
-                    --expFrac;
-                }
-            }
-            else {
-                while (s.Peek() >= '0' && s.Peek() <= '9') {
-                    s.TakePush();
-                    --expFrac;
-                }
+            while (s.Peek() >= '0' && s.Peek() <= '9') {
+                d = d * 10.0 + (s.TakePush() - '0');
+                --expFrac;
             }
 
             if (expFrac == 0)
@@ -936,12 +918,13 @@ private:
 
         // Finish parsing, call event according to the type of number.
         bool cont = true;
-        const char* str = s.Pop();  // Pop stack no matter if it will be used or not.
+        size_t length = s.Length();
+        const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
 
         if (useDouble) {
             int p = exp + expFrac;
             if (parseFlags & kParseFullPrecisionFlag)
-                d = internal::FullPrecision(useStrtod, d, p, str);
+                d = internal::FullPrecision(d, p, decimal, length);
             else
                 d = internal::NormalPrecision(d, p);
author	Milo Yip <miloyip@gmail.com>	2014-09-12 19:03:20 +0400
committer	Milo Yip <miloyip@gmail.com>	2014-09-12 19:03:20 +0400
commit	4bd240abeecc37070576298271af27a2097d9665 (patch)
tree	a9ed38e252db0423858208070cd944b3e23ba8b4 /include/rapidjson
parent	359ebc78c0e1939e952fe2e6d1001286c5cf3dbb (diff)