[libc] Implement cosf function that is correctly rounded to all rounding modes.
Implement cosf function that is correctly rounded to all rounding modes. Performance benchmark using perf tool from CORE-MATH project (https://gitlab.inria.fr/core-math/core-math/-/tree/master) on Ryzen 1700: Before this patch (not correctly rounded): ``` $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh cosf CORE-MATH reciprocal throughput : 19.043 System LIBC reciprocal throughput : 26.328 LIBC reciprocal throughput : 30.955 $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh cosf --latency GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 49.995 System LIBC latency : 59.286 LIBC latency : 60.174 ``` After this patch (correctly rounded): ``` $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh cosf GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 19.072 System LIBC reciprocal throughput : 26.286 LIBC reciprocal throughput : 13.631 $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh cosf --latency GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 49.872 System LIBC latency : 59.468 LIBC latency : 56.119 ``` Reviewed By: orex, zimmermann6 Differential Revision: https://reviews.llvm.org/D130644
This commit is contained in:
parent
6ee9e25fd9
commit
2ff187fbc9
|
@ -154,7 +154,7 @@ Accuracy of Higher Math Functions
|
|||
============== ================ =============== ======================
|
||||
<Func> <Func_f> (float) <Func> (double) <Func_l> (long double)
|
||||
============== ================ =============== ======================
|
||||
cos 0.776 ULPs large
|
||||
cos |check| large
|
||||
exp |check|
|
||||
exp2 |check|
|
||||
expm1 |check|
|
||||
|
@ -197,7 +197,7 @@ Performance
|
|||
| +-----------+-------------------+-----------+-------------------+ +------------+-------------------------+--------------+---------------+
|
||||
| | LLVM libc | Reference (glibc) | LLVM libc | Reference (glibc) | | CPU | OS | Compiler | Special flags |
|
||||
+==============+===========+===================+===========+===================+=====================================+============+=========================+==============+===============+
|
||||
| cosf | 37 | 32 | 73 | 72 | :math:`[0, 2\pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
|
||||
| cosf | 14 | 32 | 56 | 59 | :math:`[0, 2\pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
|
||||
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
|
||||
| expf | 9 | 7 | 44 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
|
||||
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
|
||||
|
|
|
@ -62,10 +62,17 @@ add_entrypoint_object(
|
|||
cosf.cpp
|
||||
HDRS
|
||||
../cosf.h
|
||||
range_reduction.h
|
||||
range_reduction_fma.h
|
||||
DEPENDS
|
||||
.sincosf_utils
|
||||
.common_constants
|
||||
libc.include.math
|
||||
libc.src.errno.errno
|
||||
libc.src.__support.FPUtil.fputil
|
||||
libc.src.__support.FPUtil.fma
|
||||
libc.src.__support.FPUtil.multiply_add
|
||||
libc.src.__support.FPUtil.nearest_integer
|
||||
libc.src.__support.FPUtil.polyeval
|
||||
COMPILE_OPTIONS
|
||||
-O3
|
||||
)
|
||||
|
|
|
@ -7,59 +7,184 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/math/cosf.h"
|
||||
#include "math_utils.h"
|
||||
#include "sincosf_utils.h"
|
||||
|
||||
#include "common_constants.h"
|
||||
#include "src/__support/FPUtil/BasicOperations.h"
|
||||
#include "src/__support/FPUtil/FEnvImpl.h"
|
||||
#include "src/__support/FPUtil/FPBits.h"
|
||||
#include "src/__support/FPUtil/PolyEval.h"
|
||||
#include "src/__support/FPUtil/except_value_utils.h"
|
||||
#include "src/__support/FPUtil/multiply_add.h"
|
||||
#include "src/__support/common.h"
|
||||
#include <math.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <errno.h>
|
||||
|
||||
#if defined(LIBC_TARGET_HAS_FMA)
|
||||
#include "range_reduction_fma.h"
|
||||
// using namespace __llvm_libc::fma;
|
||||
using __llvm_libc::fma::FAST_PASS_BOUND;
|
||||
using __llvm_libc::fma::large_range_reduction;
|
||||
using __llvm_libc::fma::small_range_reduction;
|
||||
#else
|
||||
#include "range_reduction.h"
|
||||
// using namespace __llvm_libc::generic;
|
||||
using __llvm_libc::generic::FAST_PASS_BOUND;
|
||||
using __llvm_libc::generic::large_range_reduction;
|
||||
using __llvm_libc::generic::small_range_reduction;
|
||||
#endif
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
|
||||
// error is 0.5303 * 2^-23. A single-step range reduction is used for
|
||||
// small values. Large inputs have their range reduced using fast integer
|
||||
// arithmetic.
|
||||
LLVM_LIBC_FUNCTION(float, cosf, (float y)) {
|
||||
double x = y;
|
||||
double s;
|
||||
int n;
|
||||
const sincos_t *p = &SINCOSF_TABLE[0];
|
||||
// Exceptional cases for cosf.
|
||||
static constexpr int COSF_EXCEPTS = 6;
|
||||
|
||||
if (abstop12(y) < abstop12(PIO4)) {
|
||||
double x2 = x * x;
|
||||
static constexpr fputil::ExceptionalValues<float, COSF_EXCEPTS> CosfExcepts{
|
||||
/* inputs */ {
|
||||
0x55325019, // x = 0x1.64a032p43
|
||||
0x5922aa80, // x = 0x1.4555p51
|
||||
0x5aa4542c, // x = 0x1.48a858p54
|
||||
0x5f18b878, // x = 0x1.3170fp63
|
||||
0x6115cb11, // x = 0x1.2b9622p67
|
||||
0x7beef5ef, // x = 0x1.ddebdep120
|
||||
},
|
||||
/* outputs (RZ, RU offset, RD offset, RN offset) */
|
||||
{
|
||||
{0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ)
|
||||
{0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ)
|
||||
{0x3efa40a4, 1, 0, 0}, // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ)
|
||||
{0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ)
|
||||
{0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ)
|
||||
{0x3f08a21c, 1, 0,
|
||||
0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ)
|
||||
}};
|
||||
|
||||
if (unlikely(abstop12(y) < abstop12(as_float(0x39800000))))
|
||||
return 1.0f;
|
||||
LLVM_LIBC_FUNCTION(float, cosf, (float x)) {
|
||||
using FPBits = typename fputil::FPBits<float>;
|
||||
FPBits xbits(x);
|
||||
xbits.set_sign(false);
|
||||
|
||||
return sinf_poly(x, x2, p, 1);
|
||||
} else if (likely(abstop12(y) < abstop12(120.0f))) {
|
||||
x = reduce_fast(x, p, &n);
|
||||
uint32_t x_abs = xbits.uintval();
|
||||
double xd = static_cast<double>(xbits.get_val());
|
||||
|
||||
// Setup the signs for sin and cos.
|
||||
s = p->sign[n & 3];
|
||||
// Range reduction:
|
||||
// For |x| > pi/16, we perform range reduction as follows:
|
||||
// Find k and y such that:
|
||||
// x = (k + y) * pi/16
|
||||
// k is an integer
|
||||
// |y| < 0.5
|
||||
// For small range (|x| < 2^46 when FMA instructions are available, 2^22
|
||||
// otherwise), this is done by performing:
|
||||
// k = round(x * 16/pi)
|
||||
// y = x * 16/pi - k
|
||||
// For large range, we will omit all the higher parts of 16/pi such that the
|
||||
// least significant bits of their full products with x are larger than 31,
|
||||
// since cos((k + y + 32*i) * pi/16) = cos(x + i * 2pi) = cos(x).
|
||||
//
|
||||
// When FMA instructions are not available, we store the digits of 16/pi in
|
||||
// chunks of 28-bit precision. This will make sure that the products:
|
||||
// x * SIXTEEN_OVER_PI_28[i] are all exact.
|
||||
// When FMA instructions are available, we simply store the digits of 16/pi in
|
||||
// chunks of doubles (53-bit of precision).
|
||||
// So when multiplying by the largest values of single precision, the
|
||||
// resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
|
||||
// worst-case analysis of range reduction, |y| >= 2^-38, so this should give
|
||||
// us more than 40 bits of accuracy. For the worst-case estimation of range
|
||||
// reduction, see for instances:
|
||||
// Elementary Functions by J-M. Muller, Chapter 11,
|
||||
// Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
|
||||
// Chapter 10.2.
|
||||
//
|
||||
// Once k and y are computed, we then deduce the answer by the cosine of sum
|
||||
// formula:
|
||||
// cos(x) = cos((k + y)*pi/16)
|
||||
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
|
||||
// The values of sin(k*pi/16) and cos(k*pi/16) for k = 0..31 are precomputed
|
||||
// and stored using a vector of 32 doubles. Sin(y*pi/16) and cos(y*pi/16) are
|
||||
// computed using degree-7 and degree-8 minimax polynomials generated by
|
||||
// Sollya respectively.
|
||||
|
||||
if (n & 2)
|
||||
p = &SINCOSF_TABLE[1];
|
||||
|
||||
return sinf_poly(x * s, x * x, p, n ^ 1);
|
||||
} else if (abstop12(y) < abstop12(INFINITY)) {
|
||||
uint32_t xi = as_uint32_bits(y);
|
||||
int sign = xi >> 31;
|
||||
|
||||
x = reduce_large(xi, &n);
|
||||
|
||||
// Setup signs for sin and cos - include original sign.
|
||||
s = p->sign[(n + sign) & 3];
|
||||
|
||||
if ((n + sign) & 2)
|
||||
p = &SINCOSF_TABLE[1];
|
||||
|
||||
return sinf_poly(x * s, x * x, p, n ^ 1);
|
||||
// |x| < 0x1.0p-12f
|
||||
if (unlikely(x_abs < 0x3980'0000U)) {
|
||||
// When |x| < 2^-12, the relative error of the approximation cos(x) ~ 1
|
||||
// is:
|
||||
// |cos(x) - 1| < |x^2 / 2| = 2^-25 < epsilon(1)/2.
|
||||
// So the correctly rounded values of cos(x) are:
|
||||
// = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD,
|
||||
// = 1 otherwise.
|
||||
// To simplify the rounding decision and make it more efficient and to
|
||||
// prevent compiler to perform constant folding, we use
|
||||
// fma(x, -2^-25, 1) instead.
|
||||
// Note: to use the formula 1 - 2^-25*x to decide the correct rounding, we
|
||||
// do need fma(x, -2^-25, 1) to prevent underflow caused by -2^-25*x when
|
||||
// |x| < 2^-125. For targets without FMA instructions, we simply use
|
||||
// double for intermediate results as it is more efficient than using an
|
||||
// emulated version of FMA.
|
||||
#if defined(LIBC_TARGET_HAS_FMA)
|
||||
return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f);
|
||||
#else
|
||||
return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0));
|
||||
#endif // LIBC_TARGET_HAS_FMA
|
||||
}
|
||||
|
||||
return invalid(y);
|
||||
using ExceptChecker = typename fputil::ExceptionChecker<float, COSF_EXCEPTS>;
|
||||
{
|
||||
float result;
|
||||
if (ExceptChecker::check_odd_func(CosfExcepts, x_abs, false, result))
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO(lntue): refactor range reduction and most of polynomial approximation
|
||||
// to share between sinf, cosf, and sincosf.
|
||||
int k;
|
||||
double y;
|
||||
|
||||
if (likely(x_abs < FAST_PASS_BOUND)) {
|
||||
k = small_range_reduction(xd, y);
|
||||
} else {
|
||||
// x is inf or nan.
|
||||
if (unlikely(x_abs >= 0x7f80'0000U)) {
|
||||
if (x_abs == 0x7f80'0000U) {
|
||||
errno = EDOM;
|
||||
fputil::set_except(FE_INVALID);
|
||||
}
|
||||
return x +
|
||||
FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
|
||||
}
|
||||
|
||||
k = large_range_reduction(xd, xbits.get_exponent(), y);
|
||||
}
|
||||
|
||||
// After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
|
||||
// So k is an integer and -0.5 <= y <= 0.5.
|
||||
// Then cos(x) = cos((k + y)*pi/16)
|
||||
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
|
||||
|
||||
double ysq = y * y;
|
||||
|
||||
// Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya
|
||||
// with:
|
||||
// > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]);
|
||||
double sin_y =
|
||||
y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10,
|
||||
0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29);
|
||||
// Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with:
|
||||
// > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]);
|
||||
// Note that cosm1_y = cos(y*pi/16) - 1.
|
||||
double cosm1_y =
|
||||
ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14,
|
||||
-0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35);
|
||||
|
||||
double sin_k = -SIN_K_PI_OVER_16[k & 31];
|
||||
// cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16).
|
||||
// cos_k = y * cos(k * pi/16)
|
||||
double cos_k = SIN_K_PI_OVER_16[(k + 8) & 31];
|
||||
|
||||
// Combine the results with the sine of sum formula:
|
||||
// cos(x) = cos((k + y)*pi/16)
|
||||
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
|
||||
// = cosm1_y * cos_k + sin_y * sin_k
|
||||
// = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
|
||||
return fputil::multiply_add(sin_y, sin_k,
|
||||
fputil::multiply_add(cosm1_y, cos_k, cos_k));
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
|
|
@ -51,21 +51,65 @@ TEST(LlvmLibcCosfTest, InFloatRange) {
|
|||
float x = float(FPBits(v));
|
||||
if (isnan(x) || isinf(x))
|
||||
continue;
|
||||
ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 1.0);
|
||||
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cos, x,
|
||||
__llvm_libc::cosf(x), 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
// For small values, cos(x) is 1.
|
||||
TEST(LlvmLibcCosfTest, SmallValues) {
|
||||
float x = float(FPBits(0x17800000U));
|
||||
float result = __llvm_libc::cosf(x);
|
||||
EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result, 1.0);
|
||||
EXPECT_FP_EQ(1.0f, result);
|
||||
TEST(LlvmLibcCosfTest, SpecificBitPatterns) {
|
||||
constexpr int N = 42;
|
||||
constexpr uint32_t INPUTS[N] = {
|
||||
0x3f06'0a92U, // x = pi/6
|
||||
0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
|
||||
0x3f49'0fdbU, // x = pi/4
|
||||
0x3f86'0a92U, // x = pi/3
|
||||
0x3fa7'832aU, // x = 0x1.4f0654p+0f
|
||||
0x3fc9'0fdbU, // x = pi/2
|
||||
0x4017'1973U, // x = 0x1.2e32e6p+1f
|
||||
0x4049'0fdbU, // x = pi
|
||||
0x4096'cbe4U, // x = 0x1.2d97c8p+2f
|
||||
0x40c9'0fdbU, // x = 2*pi
|
||||
0x433b'7490U, // x = 0x1.76e92p+7f
|
||||
0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
|
||||
0x4619'9998U, // x = 0x1.33333p+13f
|
||||
0x474d'246fU, // x = 0x1.9a48dep+15f
|
||||
0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
|
||||
0x4c23'32e9U, // x = 0x1.4665d2p+25f
|
||||
0x50a3'e87fU, // x = 0x1.47d0fep+34f
|
||||
0x5239'47f6U, // x = 0x1.728fecp+37f
|
||||
0x53b1'46a6U, // x = 0x1.628d4cp+40f
|
||||
0x5532'5019U, // x = 0x1.64a032p+43f
|
||||
0x55ca'fb2aU, // x = 0x1.95f654p+44f
|
||||
0x588e'f060U, // x = 0x1.1de0cp+50f
|
||||
0x5922'aa80U, // x = 0x1.4555p+51f
|
||||
0x5aa4'542cU, // x = 0x1.48a858p+54f
|
||||
0x5c07'bcd0U, // x = 0x1.0f79ap+57f
|
||||
0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
|
||||
0x5f18'b878U, // x = 0x1.3170fp+63f
|
||||
0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
|
||||
0x6115'cb11U, // x = 0x1.2b9622p+67f
|
||||
0x61a4'0b40U, // x = 0x1.48168p+68f
|
||||
0x6386'134eU, // x = 0x1.0c269cp+72f
|
||||
0x6589'8498U, // x = 0x1.13093p+76f
|
||||
0x6600'0001U, // x = 0x1.000002p+77f
|
||||
0x664e'46e4U, // x = 0x1.9c8dc8p+77f
|
||||
0x66b0'14aaU, // x = 0x1.602954p+78f
|
||||
0x67a9'242bU, // x = 0x1.524856p+80f
|
||||
0x6a19'76f1U, // x = 0x1.32ede2p+85f
|
||||
0x6c55'da58U, // x = 0x1.abb4bp+89f
|
||||
0x6f79'be45U, // x = 0x1.f37c8ap+95f
|
||||
0x7276'69d4U, // x = 0x1.ecd3a8p+101f
|
||||
0x7758'4625U, // x = 0x1.b08c4ap+111f
|
||||
0x7bee'f5efU, // x = 0x1.ddebdep+120f
|
||||
};
|
||||
|
||||
x = float(FPBits(0x0040000U));
|
||||
result = __llvm_libc::cosf(x);
|
||||
EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result, 1.0);
|
||||
EXPECT_FP_EQ(1.0f, result);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
float x = float(FPBits(INPUTS[i]));
|
||||
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cos, x,
|
||||
__llvm_libc::cosf(x), 0.5);
|
||||
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cos, -x,
|
||||
__llvm_libc::cosf(-x), 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
// SDCOMP-26094: check cosf in the cases for which the range reducer
|
||||
|
@ -73,6 +117,6 @@ TEST(LlvmLibcCosfTest, SmallValues) {
|
|||
TEST(LlvmLibcCosfTest, SDCOMP_26094) {
|
||||
for (uint32_t v : SDCOMP26094_VALUES) {
|
||||
float x = float(FPBits(v));
|
||||
ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 1.0);
|
||||
ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 0.5);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,15 +40,19 @@ add_fp_unittest(
|
|||
|
||||
add_fp_unittest(
|
||||
cosf_test
|
||||
NO_RUN_POSTBUILD
|
||||
NEED_MPFR
|
||||
SUITE
|
||||
libc_math_exhaustive_tests
|
||||
SRCS
|
||||
cosf_test.cpp
|
||||
DEPENDS
|
||||
.exhaustive_test
|
||||
libc.include.math
|
||||
libc.src.math.cosf
|
||||
libc.src.__support.FPUtil.fputil
|
||||
LINK_LIBRARIES
|
||||
-lpthread
|
||||
)
|
||||
|
||||
add_fp_unittest(
|
||||
|
|
|
@ -6,20 +6,71 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "exhaustive_test.h"
|
||||
#include "src/__support/FPUtil/FPBits.h"
|
||||
#include "src/math/cosf.h"
|
||||
#include "utils/MPFRWrapper/MPFRUtils.h"
|
||||
#include <math.h>
|
||||
#include "utils/UnitTest/FPMatcher.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
using FPBits = __llvm_libc::fputil::FPBits<float>;
|
||||
|
||||
namespace mpfr = __llvm_libc::testing::mpfr;
|
||||
|
||||
TEST(LlvmLibccosffExhaustiveTest, AllValues) {
|
||||
uint32_t bits = 0;
|
||||
do {
|
||||
FPBits xbits(bits);
|
||||
float x = float(xbits);
|
||||
ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 1.0);
|
||||
} while (bits++ < 0xffff'ffffU);
|
||||
struct LlvmLibcCosfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
|
||||
bool check(uint32_t start, uint32_t stop,
|
||||
mpfr::RoundingMode rounding) override {
|
||||
mpfr::ForceRoundingMode r(rounding);
|
||||
uint32_t bits = start;
|
||||
bool result = true;
|
||||
do {
|
||||
FPBits xbits(bits);
|
||||
float x = float(xbits);
|
||||
bool r = EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x),
|
||||
0.5, rounding);
|
||||
result &= r;
|
||||
} while (++bits < stop);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// Range: [0, +Inf);
|
||||
static constexpr uint32_t POS_START = 0x0000'0000U;
|
||||
static constexpr uint32_t POS_STOP = 0x7f80'0000U;
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
|
||||
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRangeRoundUp) {
|
||||
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRangeRoundDown) {
|
||||
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRangeRoundTowardZero) {
|
||||
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero);
|
||||
}
|
||||
|
||||
// Range: (-Inf, 0];
|
||||
static constexpr uint32_t NEG_START = 0x8000'0000U;
|
||||
static constexpr uint32_t NEG_STOP = 0xff80'0000U;
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
|
||||
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRangeRoundUp) {
|
||||
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRangeRoundDown) {
|
||||
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward);
|
||||
}
|
||||
|
||||
TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRangeRoundTowardZero) {
|
||||
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero);
|
||||
}
|
||||
|
|
|
@ -636,8 +636,11 @@ libc_math_function(name = "fmaxl")
|
|||
libc_math_function(
|
||||
name = "cosf",
|
||||
additional_deps = [
|
||||
":math_utils",
|
||||
":sincosf_utils",
|
||||
":__support_fputil_fma",
|
||||
":__support_fputil_multiply_add",
|
||||
":__support_fputil_polyeval",
|
||||
":common_constants",
|
||||
":range_reduction",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue