329 lines
11 KiB
C++
329 lines
11 KiB
C++
/*
|
|
* This implementation is extracted from numpy:
|
|
* Repo: github.com/numpy/numpy
|
|
* File: numpy/core/src/npymath/halffloat.c
|
|
* Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
|
|
* Functions are made "static inline" for performance, and
|
|
* non-conversion functions are removed, and generation of
|
|
* exceptions is disabled.
|
|
*/
|
|
|
|
#include <cstdint>
|
|
typedef uint16_t npy_uint16;
|
|
typedef uint32_t npy_uint32;
|
|
typedef uint64_t npy_uint64;
|
|
|
|
/*
|
|
* This chooses between 'ties to even' and 'ties away from zero'.
|
|
*/
|
|
#define NPY_HALF_ROUND_TIES_TO_EVEN 1
|
|
/*
|
|
* If these are 1, the conversions try to trigger underflow,
|
|
* overflow, and invalid exceptions in the FP system when needed.
|
|
*/
|
|
#define NPY_HALF_GENERATE_OVERFLOW 0
|
|
#define NPY_HALF_GENERATE_UNDERFLOW 0
|
|
#define NPY_HALF_GENERATE_INVALID 0
|
|
|
|
/*
|
|
********************************************************************
|
|
* BIT-LEVEL CONVERSIONS *
|
|
********************************************************************
|
|
*/
|
|
|
|
static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
|
|
{
|
|
npy_uint32 f_exp, f_sig;
|
|
npy_uint16 h_sgn, h_exp, h_sig;
|
|
|
|
h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
|
|
f_exp = (f&0x7f800000u);
|
|
|
|
/* Exponent overflow/NaN converts to signed inf/NaN */
|
|
if (f_exp >= 0x47800000u) {
|
|
if (f_exp == 0x7f800000u) {
|
|
/* Inf or NaN */
|
|
f_sig = (f&0x007fffffu);
|
|
if (f_sig != 0) {
|
|
/* NaN - propagate the flag in the significand... */
|
|
npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
|
|
/* ...but make sure it stays a NaN */
|
|
if (ret == 0x7c00u) {
|
|
ret++;
|
|
}
|
|
return h_sgn + ret;
|
|
} else {
|
|
/* signed inf */
|
|
return (npy_uint16) (h_sgn + 0x7c00u);
|
|
}
|
|
} else {
|
|
/* overflow to signed inf */
|
|
#if NPY_HALF_GENERATE_OVERFLOW
|
|
npy_set_floatstatus_overflow();
|
|
#endif
|
|
return (npy_uint16) (h_sgn + 0x7c00u);
|
|
}
|
|
}
|
|
|
|
/* Exponent underflow converts to a subnormal half or signed zero */
|
|
if (f_exp <= 0x38000000u) {
|
|
/*
|
|
* Signed zeros, subnormal floats, and floats with small
|
|
* exponents all convert to signed zero halfs.
|
|
*/
|
|
if (f_exp < 0x33000000u) {
|
|
#if NPY_HALF_GENERATE_UNDERFLOW
|
|
/* If f != 0, it underflowed to 0 */
|
|
if ((f&0x7fffffff) != 0) {
|
|
npy_set_floatstatus_underflow();
|
|
}
|
|
#endif
|
|
return h_sgn;
|
|
}
|
|
/* Make the subnormal significand */
|
|
f_exp >>= 23;
|
|
f_sig = (0x00800000u + (f&0x007fffffu));
|
|
#if NPY_HALF_GENERATE_UNDERFLOW
|
|
/* If it's not exactly represented, it underflowed */
|
|
if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
|
|
npy_set_floatstatus_underflow();
|
|
}
|
|
#endif
|
|
f_sig >>= (113 - f_exp);
|
|
/* Handle rounding by adding 1 to the bit beyond half precision */
|
|
#if NPY_HALF_ROUND_TIES_TO_EVEN
|
|
/*
|
|
* If the last bit in the half significand is 0 (already even), and
|
|
* the remaining bit pattern is 1000...0, then we do not add one
|
|
* to the bit after the half significand. In all other cases, we do.
|
|
*/
|
|
if ((f_sig&0x00003fffu) != 0x00001000u) {
|
|
f_sig += 0x00001000u;
|
|
}
|
|
#else
|
|
f_sig += 0x00001000u;
|
|
#endif
|
|
h_sig = (npy_uint16) (f_sig >> 13);
|
|
/*
|
|
* If the rounding causes a bit to spill into h_exp, it will
|
|
* increment h_exp from zero to one and h_sig will be zero.
|
|
* This is the correct result.
|
|
*/
|
|
return (npy_uint16) (h_sgn + h_sig);
|
|
}
|
|
|
|
/* Regular case with no overflow or underflow */
|
|
h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
|
|
/* Handle rounding by adding 1 to the bit beyond half precision */
|
|
f_sig = (f&0x007fffffu);
|
|
#if NPY_HALF_ROUND_TIES_TO_EVEN
|
|
/*
|
|
* If the last bit in the half significand is 0 (already even), and
|
|
* the remaining bit pattern is 1000...0, then we do not add one
|
|
* to the bit after the half significand. In all other cases, we do.
|
|
*/
|
|
if ((f_sig&0x00003fffu) != 0x00001000u) {
|
|
f_sig += 0x00001000u;
|
|
}
|
|
#else
|
|
f_sig += 0x00001000u;
|
|
#endif
|
|
h_sig = (npy_uint16) (f_sig >> 13);
|
|
/*
|
|
* If the rounding causes a bit to spill into h_exp, it will
|
|
* increment h_exp by one and h_sig will be zero. This is the
|
|
* correct result. h_exp may increment to 15, at greatest, in
|
|
* which case the result overflows to a signed inf.
|
|
*/
|
|
#if NPY_HALF_GENERATE_OVERFLOW
|
|
h_sig += h_exp;
|
|
if (h_sig == 0x7c00u) {
|
|
npy_set_floatstatus_overflow();
|
|
}
|
|
return h_sgn + h_sig;
|
|
#else
|
|
return h_sgn + h_exp + h_sig;
|
|
#endif
|
|
}
|
|
|
|
static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
|
|
{
|
|
npy_uint64 d_exp, d_sig;
|
|
npy_uint16 h_sgn, h_exp, h_sig;
|
|
|
|
h_sgn = (d&0x8000000000000000ULL) >> 48;
|
|
d_exp = (d&0x7ff0000000000000ULL);
|
|
|
|
/* Exponent overflow/NaN converts to signed inf/NaN */
|
|
if (d_exp >= 0x40f0000000000000ULL) {
|
|
if (d_exp == 0x7ff0000000000000ULL) {
|
|
/* Inf or NaN */
|
|
d_sig = (d&0x000fffffffffffffULL);
|
|
if (d_sig != 0) {
|
|
/* NaN - propagate the flag in the significand... */
|
|
npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
|
|
/* ...but make sure it stays a NaN */
|
|
if (ret == 0x7c00u) {
|
|
ret++;
|
|
}
|
|
return h_sgn + ret;
|
|
} else {
|
|
/* signed inf */
|
|
return h_sgn + 0x7c00u;
|
|
}
|
|
} else {
|
|
/* overflow to signed inf */
|
|
#if NPY_HALF_GENERATE_OVERFLOW
|
|
npy_set_floatstatus_overflow();
|
|
#endif
|
|
return h_sgn + 0x7c00u;
|
|
}
|
|
}
|
|
|
|
/* Exponent underflow converts to subnormal half or signed zero */
|
|
if (d_exp <= 0x3f00000000000000ULL) {
|
|
/*
|
|
* Signed zeros, subnormal floats, and floats with small
|
|
* exponents all convert to signed zero halfs.
|
|
*/
|
|
if (d_exp < 0x3e60000000000000ULL) {
|
|
#if NPY_HALF_GENERATE_UNDERFLOW
|
|
/* If d != 0, it underflowed to 0 */
|
|
if ((d&0x7fffffffffffffffULL) != 0) {
|
|
npy_set_floatstatus_underflow();
|
|
}
|
|
#endif
|
|
return h_sgn;
|
|
}
|
|
/* Make the subnormal significand */
|
|
d_exp >>= 52;
|
|
d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
|
|
#if NPY_HALF_GENERATE_UNDERFLOW
|
|
/* If it's not exactly represented, it underflowed */
|
|
if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
|
|
npy_set_floatstatus_underflow();
|
|
}
|
|
#endif
|
|
d_sig >>= (1009 - d_exp);
|
|
/* Handle rounding by adding 1 to the bit beyond half precision */
|
|
#if NPY_HALF_ROUND_TIES_TO_EVEN
|
|
/*
|
|
* If the last bit in the half significand is 0 (already even), and
|
|
* the remaining bit pattern is 1000...0, then we do not add one
|
|
* to the bit after the half significand. In all other cases, we do.
|
|
*/
|
|
if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
|
|
d_sig += 0x0000020000000000ULL;
|
|
}
|
|
#else
|
|
d_sig += 0x0000020000000000ULL;
|
|
#endif
|
|
h_sig = (npy_uint16) (d_sig >> 42);
|
|
/*
|
|
* If the rounding causes a bit to spill into h_exp, it will
|
|
* increment h_exp from zero to one and h_sig will be zero.
|
|
* This is the correct result.
|
|
*/
|
|
return h_sgn + h_sig;
|
|
}
|
|
|
|
/* Regular case with no overflow or underflow */
|
|
h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
|
|
/* Handle rounding by adding 1 to the bit beyond half precision */
|
|
d_sig = (d&0x000fffffffffffffULL);
|
|
#if NPY_HALF_ROUND_TIES_TO_EVEN
|
|
/*
|
|
* If the last bit in the half significand is 0 (already even), and
|
|
* the remaining bit pattern is 1000...0, then we do not add one
|
|
* to the bit after the half significand. In all other cases, we do.
|
|
*/
|
|
if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
|
|
d_sig += 0x0000020000000000ULL;
|
|
}
|
|
#else
|
|
d_sig += 0x0000020000000000ULL;
|
|
#endif
|
|
h_sig = (npy_uint16) (d_sig >> 42);
|
|
|
|
/*
|
|
* If the rounding causes a bit to spill into h_exp, it will
|
|
* increment h_exp by one and h_sig will be zero. This is the
|
|
* correct result. h_exp may increment to 15, at greatest, in
|
|
* which case the result overflows to a signed inf.
|
|
*/
|
|
#if NPY_HALF_GENERATE_OVERFLOW
|
|
h_sig += h_exp;
|
|
if (h_sig == 0x7c00u) {
|
|
npy_set_floatstatus_overflow();
|
|
}
|
|
return h_sgn + h_sig;
|
|
#else
|
|
return h_sgn + h_exp + h_sig;
|
|
#endif
|
|
}
|
|
|
|
static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
|
|
{
|
|
npy_uint16 h_exp, h_sig;
|
|
npy_uint32 f_sgn, f_exp, f_sig;
|
|
|
|
h_exp = (h&0x7c00u);
|
|
f_sgn = ((npy_uint32)h&0x8000u) << 16;
|
|
switch (h_exp) {
|
|
case 0x0000u: /* 0 or subnormal */
|
|
h_sig = (h&0x03ffu);
|
|
/* Signed zero */
|
|
if (h_sig == 0) {
|
|
return f_sgn;
|
|
}
|
|
/* Subnormal */
|
|
h_sig <<= 1;
|
|
while ((h_sig&0x0400u) == 0) {
|
|
h_sig <<= 1;
|
|
h_exp++;
|
|
}
|
|
f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
|
|
f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
|
|
return f_sgn + f_exp + f_sig;
|
|
case 0x7c00u: /* inf or NaN */
|
|
/* All-ones exponent and a copy of the significand */
|
|
return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
|
|
default: /* normalized */
|
|
/* Just need to adjust the exponent and shift */
|
|
return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
|
|
}
|
|
}
|
|
|
|
static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
|
|
{
|
|
npy_uint16 h_exp, h_sig;
|
|
npy_uint64 d_sgn, d_exp, d_sig;
|
|
|
|
h_exp = (h&0x7c00u);
|
|
d_sgn = ((npy_uint64)h&0x8000u) << 48;
|
|
switch (h_exp) {
|
|
case 0x0000u: /* 0 or subnormal */
|
|
h_sig = (h&0x03ffu);
|
|
/* Signed zero */
|
|
if (h_sig == 0) {
|
|
return d_sgn;
|
|
}
|
|
/* Subnormal */
|
|
h_sig <<= 1;
|
|
while ((h_sig&0x0400u) == 0) {
|
|
h_sig <<= 1;
|
|
h_exp++;
|
|
}
|
|
d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
|
|
d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
|
|
return d_sgn + d_exp + d_sig;
|
|
case 0x7c00u: /* inf or NaN */
|
|
/* All-ones exponent and a copy of the significand */
|
|
return d_sgn + 0x7ff0000000000000ULL +
|
|
(((npy_uint64)(h&0x03ffu)) << 42);
|
|
default: /* normalized */
|
|
/* Just need to adjust the exponent and shift */
|
|
return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
|
|
}
|
|
}
|