diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index e431cbcd..6fa640db 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -268,10 +268,9 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend, } // Consume a remaining 4-7 digit run in a single SWAR step instead of // byte-by-byte (reuses the existing 4-digit helpers). The parsed result is - // identical either way. Gated to clang: on gcc the extra 4-digit check - // regresses inputs whose remainder is shorter than 4 digits (it becomes pure - // overhead there); clang does not show that. -#if defined(__clang__) + // identical either way. Historically gated to clang because gcc regressed on + // short remainders, but that verdict predates the span-elision restructure; + // with the leaner hot path the 4-digit step now wins on gcc as well. if ((pend - p) >= 4) { uint32_t const val4 = read4_to_u32(p); if (is_made_of_four_digits_fast(val4)) { @@ -280,7 +279,6 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend, p += 4; } } -#endif } enum class parse_error { diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 7d338f3b..117ec696 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -198,7 +198,14 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative, // We proceed optimistically, assuming that detail::rounds_to_nearest() // returns true. if (binary_format::min_exponent_fast_path() <= exponent && - exponent <= binary_format::max_exponent_fast_path()) { + exponent <= binary_format::max_exponent_fast_path() && + mantissa <= binary_format::max_mantissa_fast_path()) { + // The mantissa bound above is a necessary condition for BOTH branches + // below: the rounding-mode-dependent branch checks the tighter + // max_mantissa_fast_path(exponent) <= max_mantissa_fast_path(). Testing + // it before detail::rounds_to_nearest() spares long-mantissa inputs + // (which can never take the fast path) the volatile-float probe. + // // Unfortunately, the conventional Clinger's fast path is only possible // when the system rounds to the nearest float. // @@ -209,18 +216,16 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative, if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { // We have that fegetround() == FE_TONEAREST. // Next is Clinger's fast path. - if (mantissa <= binary_format::max_mantissa_fast_path()) { - value = T(mantissa); - if (exponent < 0) { - value = value / binary_format::exact_power_of_ten(-exponent); - } else { - value = value * binary_format::exact_power_of_ten(exponent); - } - if (is_negative) { - value = -value; - } - return true; + value = T(mantissa); + if (exponent < 0) { + value = value / binary_format::exact_power_of_ten(-exponent); + } else { + value = value * binary_format::exact_power_of_ten(exponent); + } + if (is_negative) { + value = -value; } + return true; } else { // We do not have that fegetround() == FE_TONEAREST. // Next is a modified Clinger's fast path, inspired by Jakub JelĂ­nek's