Usually nextafter is implemented in the following way:
double nextafter(double x, double y)
{
// handle corner cases
int delta = ((x > 0) == (x < y)) ? 1 : -1;
unsigned long long mant = __mant(x); // get mantissa as int
mant += delta;
...
}
Here, a binary representation is obtained using __mant(x).
Out of curiosity: is it possible to implement nextafter without obtaining a binary representation? For example, using a sequence of arithmetic floating point operations.
The code below implements nextafter in the ascending direction for finite values for IEEE-754 arithmetic with round-to-nearest-ties-to-even. Handling of NaNs, infinities, and the descending direction is obvious.
Without assuming IEEE-754 or round-to-nearest, the floating-point properties are sufficiently well characterized by C 2018 5.2.4.2.2 that we we can implement nextafter (again in the ascending direction) this way:
-DBL_MAX.-DBL_TRUE_MIN, return zero.+DBL_TRUE_MIN.+DBL_MAX, return +∞.nextafter(x, y) implementation, as it moves the first argument in the direction of the second argument, so we never ascend from +∞ because we never receive a second argument greater than +∞.)logb to find the exponent e. If e is less than DBL_MIN, return the input plus DBL_TRUE_MIN (the ULP of subnormals and the lowest normals). If e is not less than DBL_MIN, return the input plus scalb(1, e + 1 - DBL_MANT_DIG) (the specific ULP for the input). Rounding method is irrelevant as these additions are exact.FLT_RADIX (the input equals scalb(1, e)), decrement the second argument of scalb by one (because this nextafter step transitions from a greater exponent to a lower one).Note that FLT_RADIX is correct above; there is no DBL_RADIX; all floating-point formats use the same radix.
If you want to consider logb and scalb as functions that manipulate the floating-point representation, then they could be replaced by ordinary arithmetic. log could find a quick approximation that could be quickly refined to the true exponent, and scalb can be implemented in a variety of ways, possibly simply a table look-up. If log remains objectionable, then trial comparisons would suffice.
The above handles formats with or without subnormals because, if subnormals are supported, it steps into them with the decrement, and, if subnormals are not supported, the minimum normal magnitude is DBL_TRUE_MIN, so it is recognized in the above as the point where we step to zero next.
There is one caveat; the C standard allows it to be “indeterminable” whether an implementation supports subnormals or not “if floating-point operations do not consistently interpret subnormal representations as zero, nor as nonzero.” In that case, I do not see that the standard specifies what the standard nextafter does, so there is nothing for us to do to match it in our implementation. Supposing that subnormals are sometimes supported, DBL_TRUE_MIN must be a subnormal value, and the above will attempt to work as if subnormal support is currently on (e.g., flush-to-zero is off) and, if it is off, you will get whatever you get.
#include <float.h>
#include <math.h>
/* Return the next floating-point value after the finite value q.
This was inspired by Algorithm 3.5 in Siegfried M. Rump, Takeshi Ogita, and
Shin'ichi Oishi, "Accurate Floating-Point Summation", _Technical Report
05.12_, Faculty for Information and Communication Sciences, Hamburg
University of Technology, November 13, 2005.
IEEE-754 and the default rounding mode,
round-to-nearest-ties-to-even, may be required.
*/
double NextAfter(double q)
{
/* Scale is .625 ULP, so multiplying it by any significand in [1, 2)
yields something in [.625 ULP, 1.25 ULP].
*/
static const double Scale = 0.625 * DBL_EPSILON;
/* Either of the following may be used, according to preference and
performance characteristics. In either case, use a fused multiply-add
(fma) to add to q a number that is in [.625 ULP, 1.25 ULP]. When this
is rounded to the floating-point format, it must produce the next
number after q.
*/
#if 0
// SmallestPositive is the smallest positive floating-point number.
static const double SmallestPositive = DBL_EPSILON * DBL_MIN;
if (fabs(q) < 2*DBL_MIN)
return q + SmallestPositive;
return fma(fabs(q), Scale, q);
#else
return fma(fmax(fabs(q), DBL_MIN), Scale, q);
#endif
}
#if defined CompileMain
#include <stdio.h>
#include <stdlib.h>
#define NumberOf(a) (sizeof (a) / sizeof *(a))
int main(void)
{
int status = EXIT_SUCCESS;
static const struct { double in, out; } cases[] =
{
{ INFINITY, INFINITY },
{ 0x1.fffffffffffffp1023, INFINITY },
{ 0x1.ffffffffffffep1023, 0x1.fffffffffffffp1023 },
{ 0x1.ffffffffffffdp1023, 0x1.ffffffffffffep1023 },
{ 0x1.ffffffffffffcp1023, 0x1.ffffffffffffdp1023 },
{ 0x1.0000000000003p1023, 0x1.0000000000004p1023 },
{ 0x1.0000000000002p1023, 0x1.0000000000003p1023 },
{ 0x1.0000000000001p1023, 0x1.0000000000002p1023 },
{ 0x1.0000000000000p1023, 0x1.0000000000001p1023 },
{ 0x1.fffffffffffffp1022, 0x1.0000000000000p1023 },
{ 0x1.fffffffffffffp1, 0x1.0000000000000p2 },
{ 0x1.ffffffffffffep1, 0x1.fffffffffffffp1 },
{ 0x1.ffffffffffffdp1, 0x1.ffffffffffffep1 },
{ 0x1.ffffffffffffcp1, 0x1.ffffffffffffdp1 },
{ 0x1.0000000000003p1, 0x1.0000000000004p1 },
{ 0x1.0000000000002p1, 0x1.0000000000003p1 },
{ 0x1.0000000000001p1, 0x1.0000000000002p1 },
{ 0x1.0000000000000p1, 0x1.0000000000001p1 },
{ 0x1.fffffffffffffp-1022, 0x1.0000000000000p-1021 },
{ 0x1.ffffffffffffep-1022, 0x1.fffffffffffffp-1022 },
{ 0x1.ffffffffffffdp-1022, 0x1.ffffffffffffep-1022 },
{ 0x1.ffffffffffffcp-1022, 0x1.ffffffffffffdp-1022 },
{ 0x1.0000000000003p-1022, 0x1.0000000000004p-1022 },
{ 0x1.0000000000002p-1022, 0x1.0000000000003p-1022 },
{ 0x1.0000000000001p-1022, 0x1.0000000000002p-1022 },
{ 0x1.0000000000000p-1022, 0x1.0000000000001p-1022 },
{ 0x0.fffffffffffffp-1022, 0x1.0000000000000p-1022 },
{ 0x0.ffffffffffffep-1022, 0x0.fffffffffffffp-1022 },
{ 0x0.ffffffffffffdp-1022, 0x0.ffffffffffffep-1022 },
{ 0x0.ffffffffffffcp-1022, 0x0.ffffffffffffdp-1022 },
{ 0x0.0000000000003p-1022, 0x0.0000000000004p-1022 },
{ 0x0.0000000000002p-1022, 0x0.0000000000003p-1022 },
{ 0x0.0000000000001p-1022, 0x0.0000000000002p-1022 },
{ 0x0.0000000000000p-1022, 0x0.0000000000001p-1022 },
{ -0x1.fffffffffffffp1023, -0x1.ffffffffffffep1023 },
{ -0x1.ffffffffffffep1023, -0x1.ffffffffffffdp1023 },
{ -0x1.ffffffffffffdp1023, -0x1.ffffffffffffcp1023 },
{ -0x1.0000000000004p1023, -0x1.0000000000003p1023 },
{ -0x1.0000000000003p1023, -0x1.0000000000002p1023 },
{ -0x1.0000000000002p1023, -0x1.0000000000001p1023 },
{ -0x1.0000000000001p1023, -0x1.0000000000000p1023 },
{ -0x1.0000000000000p1023, -0x1.fffffffffffffp1022 },
{ -0x1.0000000000000p2, -0x1.fffffffffffffp1 },
{ -0x1.fffffffffffffp1, -0x1.ffffffffffffep1 },
{ -0x1.ffffffffffffep1, -0x1.ffffffffffffdp1 },
{ -0x1.ffffffffffffdp1, -0x1.ffffffffffffcp1 },
{ -0x1.0000000000004p1, -0x1.0000000000003p1 },
{ -0x1.0000000000003p1, -0x1.0000000000002p1 },
{ -0x1.0000000000002p1, -0x1.0000000000001p1 },
{ -0x1.0000000000001p1, -0x1.0000000000000p1 },
{ -0x1.0000000000000p-1021, -0x1.fffffffffffffp-1022 },
{ -0x1.fffffffffffffp-1022, -0x1.ffffffffffffep-1022 },
{ -0x1.ffffffffffffep-1022, -0x1.ffffffffffffdp-1022 },
{ -0x1.ffffffffffffdp-1022, -0x1.ffffffffffffcp-1022 },
{ -0x1.0000000000004p-1022, -0x1.0000000000003p-1022 },
{ -0x1.0000000000003p-1022, -0x1.0000000000002p-1022 },
{ -0x1.0000000000002p-1022, -0x1.0000000000001p-1022 },
{ -0x1.0000000000001p-1022, -0x1.0000000000000p-1022 },
{ -0x1.0000000000000p-1022, -0x0.fffffffffffffp-1022 },
{ -0x0.fffffffffffffp-1022, -0x0.ffffffffffffep-1022 },
{ -0x0.ffffffffffffep-1022, -0x0.ffffffffffffdp-1022 },
{ -0x0.ffffffffffffdp-1022, -0x0.ffffffffffffcp-1022 },
{ -0x0.0000000000004p-1022, -0x0.0000000000003p-1022 },
{ -0x0.0000000000003p-1022, -0x0.0000000000002p-1022 },
{ -0x0.0000000000002p-1022, -0x0.0000000000001p-1022 },
{ -0x0.0000000000001p-1022, -0x0.0000000000000p-1022 },
};
for (int i = 0; i < NumberOf(cases); ++i)
{
double in = cases[i].in, expected = cases[i].out;
double observed = NextAfter(in);
printf("NextAfter(%a) = %a.\n", in, observed);
if (! (observed == expected))
{
printf("\tError, expected %a.\n", expected);
status = EXIT_FAILURE;
}
}
return status;
}
#endif // defined CompileMain
For my sample ISO-C99 implementation below I used float mapped to IEEE-754 binary32 because I can get better test coverage that way. The assumptions are that IEEE-754 bindings for C are in effect (so C floating-point types are bound to IEEE-754 binary types, subnormals are supported etc), the rounding mode in effect is round-to-nearest-or-even, and exception signaling requirements specified by the ISO-C standard (FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW) are waived (the masked response is delivered).
The code may be more complex than it needs to be; I simply separated out the various operand classes and handled them one by one. I used the Intel compiler with the strictest floating-point settings to compile. The implementation of nextafterf() from the Intel math library is used as a golden reference. I consider it highly unlikely, but obviously not impossible, that my implementation has a bug that matches a bug in Intel's library implementation.
I am demonstrating three variants below, one of which is suitable for platforms without FMA (fused multiply-add), while another avoids floating-point division which may be slow on some platforms. In general, the stepping mechanism differs between normalized operands and subnormal / zero operands. Stepping from a subnormal towards negative zero does not readily work with any general scheme. Each of the three variants uses a different approach of dealing with this: by special casing one single case, by reducing slightly the step size for subnormal operands, or by scaling to positive integers followed by back-scaling.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <float.h>
#include <string.h>
#include <math.h>
#define VARIANT (1) // 1, 2, or 3
float my_nextafterf (float a, float b)
{
const float FP32_MIN_NORMAL = 0x1.000000p-126f;
const float FP32_MAX_NORMAL = 0x1.fffffep+127f;
const float FP32_EPSILON = 0x1.0p-23f;
const float FP32_ONE = 1.0f;
const float FP32_HALF = 0.5f;
const float FP32_ZERO = 0.0f;
const float FP32_NEG_ZERO = FP32_ZERO * (-FP32_ONE);
const float FP32_MIN_SUBNORM = FP32_MIN_NORMAL * FP32_EPSILON;
const float FP32_SUBNORM_SCALE = FP32_ONE / FP32_MIN_NORMAL;
const float FP32_INT_SCALE = FP32_ONE / FP32_EPSILON;
const float FP32_ONE_M_ULP = FP32_ONE - FP32_EPSILON * FP32_HALF;
const float FP32_ONE_P_ULP = FP32_ONE + FP32_EPSILON;
const float FP32_INC = FP32_ONE_P_ULP * FP32_EPSILON * FP32_HALF;
float r;
if ((!(fabsf(a) <= INFINITY)) || (!(fabsf(b) <= INFINITY))) { // unordered
r = a + b;
}
else if (a == b) { // equal
r = b;
}
else if (fabsf (a) == INFINITY) { // infinity
r = (a >= FP32_ZERO) ? FP32_MAX_NORMAL : (-FP32_MAX_NORMAL);
}
#if VARIANT == 1
else if (fabsf (a) > FP32_MIN_NORMAL) { // normal
float factor = ((a >= FP32_ZERO) == (a < b)) ? FP32_INC : (-FP32_INC);
r = fmaf (factor, a, a);
} else { // zero, subnormal, or smallest normal
float scal = (a >= FP32_ZERO) ? FP32_INT_SCALE : (-FP32_INT_SCALE);
float incr = ((a >= FP32_ZERO) == (a < b)) ? FP32_ONE : (-FP32_ONE);
r = (a * scal * FP32_SUBNORM_SCALE + incr) / scal / FP32_SUBNORM_SCALE;
}
#elif VARIANT == 2
else if (fabsf (a) > FP32_MIN_NORMAL) { // normal
r = ((a < b) == (a >= FP32_ZERO)) ?
(a / FP32_ONE_M_ULP) : (a * FP32_ONE_M_ULP);
} else { // zero, subnormal, or smallest normal
float incr = (a >= FP32_ZERO) ? FP32_MIN_SUBNORM : (-FP32_MIN_SUBNORM);
r = ((a < b) == (a >= FP32_ZERO)) ? (a + incr) :
((a == (-FP32_MIN_SUBNORM)) ? FP32_NEG_ZERO : (a - incr));
}
#elif VARIANT == 3
else {
float factor1 = (fabsf (a) > FP32_MIN_NORMAL) ?
(((a < b) == (a >= FP32_ZERO)) ? FP32_INC : (-FP32_INC)) :
((a >= FP32_ZERO) ? FP32_MIN_SUBNORM : (-FP32_MIN_SUBNORM));
float factor2 = (fabsf (a) > FP32_MIN_NORMAL) ? a :
(((a < b) == (a >= FP32_ZERO)) ? FP32_ONE_M_ULP : (-FP32_ONE_M_ULP));
r = fmaf (factor1, factor2, a);
}
#else // VARIANT
#error unknown VARIANT
#endif // VARIANT
return r;
}
float uint32_as_float (uint32_t a)
{
float r;
memcpy (&r, &a, sizeof r);
return r;
}
uint32_t float_as_uint32 (float a)
{
uint32_t r;
memcpy (&r, &a, sizeof r);
return r;
}
// Fixes via: Greg Rose, KISS: A Bit Too Simple. http://eprint.iacr.org/2011/007
static uint32_t kiss_z = 362436069;
static uint32_t kiss_w = 521288629;
static uint32_t kiss_jsr = 123456789;
static uint32_t kiss_jcong = 380116160;
#define znew (kiss_z=36969*(kiss_z&0xffff)+(kiss_z>>16))
#define wnew (kiss_w=18000*(kiss_w&0xffff)+(kiss_w>>16))
#define MWC ((znew<<16)+wnew)
#define SHR3 (kiss_jsr^=(kiss_jsr<<13),kiss_jsr^=(kiss_jsr>>17),kiss_jsr^=(kiss_jsr<<5))
#define CONG (kiss_jcong=69069*kiss_jcong+13579)
#define KISS ((MWC^CONG)+SHR3)
int main (void)
{
float a, b, res, ref;
uint32_t ia, ib, ires, iref;
const uint32_t FP32_QNAN_BIT = 0x00400000; // x86 and other architectures
printf ("Testing nextafterf() variant %d\n", VARIANT);
ia = 0x0000000;
do {
for (int i = 1; i < 20; i++) {
switch (i) {
case 0: ib = ia;
break;
case 1: ib = ia - 1;
break;
case 2: ib = ia + 1;
break;
case 3: ib = 0x00000000;
break;
case 4: ib = 0x80000000;
break;
case 5: ib = 0x7f800000;
break;
case 6: ib = 0xff800000;
break;
default: ib = KISS;
break;
}
a = uint32_as_float (ia);
b = uint32_as_float (ib);
res = my_nextafterf (a, b);
ref = nextafterf (a, b);
ires = float_as_uint32 (res);
iref = float_as_uint32 (ref);
if (ires != iref) {
/* if both 'from' and 'to' are NaN, result may be either NaN, quietened */
if (!(isnan (a) && isnan (b) &&
((ires == (ia | FP32_QNAN_BIT)) || (ires == (ib | FP32_QNAN_BIT))))) {
printf ("error: a=%08x b=%08x res=%08x ref=%08x\n", ia, ib, ires, iref);
}
}
}
ia++;
if (!(ia & 0xffffff)) printf ("\ria = 0x%08x", ia);
} while (ia);
return EXIT_SUCCESS;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With