/* * Copyright 1993-2023 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #if !defined(__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__) #if defined(_MSC_VER) #pragma message("crt/math_functions.hpp is an internal header file and must not be used directly. Please use cuda_runtime_api.h or cuda_runtime.h instead.") #else #warning "crt/math_functions.hpp is an internal header file and must not be used directly. Please use cuda_runtime_api.h or cuda_runtime.h instead." #endif #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #define __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_MATH_FUNCTIONS_HPP__ #endif #if !defined(__MATH_FUNCTIONS_HPP__) #define __MATH_FUNCTIONS_HPP__ /******************************************************************************* * * * * * * *******************************************************************************/ #if defined(__cplusplus) && defined(__CUDACC__) /******************************************************************************* * * * * * * *******************************************************************************/ #include "builtin_types.h" #include "host_defines.h" /******************************************************************************* * * * * * * *******************************************************************************/ #if defined(__CUDACC_RTC__) __host__ __device__ __cudart_builtin__ int signbit(const float x) { return __signbitf(x); } __host__ __device__ __cudart_builtin__ int signbit(const double x) { return __signbit(x); } __host__ __device__ __cudart_builtin__ int signbit(const long double x) { return __signbitl(static_cast(x));} __host__ __device__ __cudart_builtin__ int isfinite(const float x) { return __finitef(x); } __host__ __device__ __cudart_builtin__ int isfinite(const double x) { return __finite(x); } __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { return __finitel(static_cast(x)); } __host__ __device__ __cudart_builtin__ int isnan(const float x) { return __isnanf(x); } __host__ __device__ __cudart_builtin__ int isnan(const double x) { return __isnan(x); } __host__ __device__ __cudart_builtin__ int isnan(const long double x) { return __isnanl(static_cast(x)); } __host__ __device__ __cudart_builtin__ int isinf(const float x) { return __isinff(x); } __host__ __device__ __cudart_builtin__ int isinf(const double x) { return __isinf(x); } __host__ __device__ __cudart_builtin__ int isinf(const long double x) { return __isinfl(static_cast(x)); } __host__ __device__ __cudart_builtin__ long long int abs(const long long int a) { return llabs(a); } __host__ __device__ __cudart_builtin__ long int abs(const long int in) { return llabs(in); } __host__ __device__ __cudart_builtin__ float abs(const float in) { return fabsf(in); } __host__ __device__ __cudart_builtin__ double abs(const double in) { return fabs(in); } __host__ __device__ __cudart_builtin__ float fabs(const float in) { return fabsf(in); } __host__ __device__ __cudart_builtin__ float ceil(const float in) { return ceilf(in); } __host__ __device__ __cudart_builtin__ float floor(const float in) { return floorf(in); } __host__ __device__ __cudart_builtin__ float sqrt(const float in) { return sqrtf(in); } __host__ __device__ __cudart_builtin__ float pow(const float a, const float b) { return powf(a, b); } extern "C" __device__ float powif(float, int); __host__ __device__ __cudart_builtin__ float pow(const float a, const int b) { return powif(a, b); } extern "C" __device__ double powi(double, int); __host__ __device__ __cudart_builtin__ double pow(const double a, const int b) { return powi(a, b); } __host__ __device__ __cudart_builtin__ float log(const float in) { return logf(in); } __host__ __device__ __cudart_builtin__ float log10(const float in) { return log10f(in); } __host__ __device__ __cudart_builtin__ float fmod(const float a, const float b) { return fmodf(a, b); } __host__ __device__ __cudart_builtin__ float modf(const float a, float*b) { return modff(a, b); } __host__ __device__ __cudart_builtin__ float exp(const float in) { return expf(in); } __host__ __device__ __cudart_builtin__ float frexp(const float a, int*b) { return frexpf(a, b); } __host__ __device__ __cudart_builtin__ float ldexp(const float a, int b) { return ldexpf(a, b); } __host__ __device__ __cudart_builtin__ float asin(const float in) { return asinf(in); } __host__ __device__ __cudart_builtin__ float sin(const float in) { return sinf(in); } __host__ __device__ __cudart_builtin__ float sinh(const float in) { return sinhf(in); } __host__ __device__ __cudart_builtin__ float acos(const float in) { return acosf(in); } __host__ __device__ __cudart_builtin__ float cos(const float in) { return cosf(in); } __host__ __device__ __cudart_builtin__ float cosh(const float in) { return coshf(in); } __host__ __device__ __cudart_builtin__ float atan(const float in) { return atanf(in); } __host__ __device__ __cudart_builtin__ float atan2(const float a, const float b) { return atan2f(a, b); } __host__ __device__ __cudart_builtin__ float tan(const float in) { return tanf(in); } __host__ __device__ __cudart_builtin__ float tanh(const float in) { return tanhf(in); } #elif defined(__GNUC__) #undef signbit #undef isfinite #undef isnan #undef isinf #if defined(_LIBCPP_VERSION) extern "C" __device__ float powif(float, int); extern "C" __device__ double powi(double, int); #endif /* _LIBCPP_VERSION */ #if defined(__APPLE__) __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const float x) { return __signbitf(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const double x) { return __signbitd(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const long double x) { return __signbitl(x);} __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const float x) { return __isfinitef(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) { return __isfinited(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { return __isfinite(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const double x) throw() { return __isnand(x); } #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const float x) { return __isnanf(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const long double x) { return __isnan(x); } #endif /* defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 */ __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const double x) throw() { return __isinfd(x); } #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const float x) { return __isinff(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const long double x) { return __isinf(x); } #endif /* defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 7000 */ #else /* __APPLE__ */ #if ((defined _GLIBCXX_MATH_H) && _GLIBCXX_MATH_H) && (__cplusplus >= 201103L) #if defined(__CUDA_ARCH__) #define __NV_BUILTIN_FUNC_DECL__ __forceinline__ __host__ __device__ __cudart_builtin__ #if _GLIBCXX_HAVE_OBSOLETE_ISNAN && !_GLIBCXX_NO_OBSOLETE_ISINF_ISNAN_DYNAMIC __NV_BUILTIN_FUNC_DECL__ int isnan(const double a) throw() { return __isnan(a); } __NV_BUILTIN_FUNC_DECL__ int isinf(const double x) throw() { return __isinf(x); } #endif /* _GLIBCXX_HAVE_OBSOLETE_ISNAN && !_GLIBCXX_NO_OBSOLETE_ISINF_ISNAN_DYNAMIC */ #undef __NV_BUILTIN_FUNC_DECL__ #endif /* __CUDA_ARCH */ #else /* !(((defined _GLIBCXX_MATH_H) && _GLIBCXX_MATH_H) && (__cplusplus >= 201103L)) */ #if defined(__QNX__) #if defined(__QNX__) && defined(_LIBCPP_VERSION) static __inline__ __host__ __device__ __cudart_builtin__ bool signbit(const float x) { #if defined(__CUDA_ARCH__) return (__signbitf(x) != 0); #else /* !__CUDA_ARCH__ */ return signbit(x); #endif /* __CUDA_ARCH__ */ } static __inline__ __host__ __device__ __cudart_builtin__ bool signbit(const double x) { #if defined(__CUDA_ARCH__) return (__signbit(x) != 0); #else /* !__CUDA_ARCH__ */ return signbit(x); #endif /* __CUDA_ARCH__ */ } static __inline__ __host__ __device__ __cudart_builtin__ bool signbit(const long double x) { #if defined(__CUDA_ARCH__) return (__signbitl(x) != 0); #else /* !__CUDA_ARCH__ */ return signbit(x); #endif /* __CUDA_ARCH__ */ } #endif /* (__QNX__ && _LIBCPP_VERSION) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const long double a) { #if defined(__CUDA_ARCH__) return (__finitel(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const double a) { #if defined(__CUDA_ARCH__) return (__finite(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const float a) { #if defined(__CUDA_ARCH__) return (__finitef(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const long double a) { #if defined(__CUDA_ARCH__) return (__isnanl(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const double a) { #if defined(__CUDA_ARCH__) return (__isnan(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const float a) { #if defined(__CUDA_ARCH__) return (__isnanf(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isinf(const long double a) { #if defined(__CUDA_ARCH__) return (__isinfl(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isinf(const double a) { #if defined(__CUDA_ARCH__) return (__isinf(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } static __inline__ __host__ __device__ __cudart_builtin__ bool isinf(const float a) { #if defined(__CUDA_ARCH__) return (__isinff(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } #elif ( (defined(__ANDROID__) || defined(__HORIZON__)) && defined(_LIBCPP_VERSION)) #if defined(__CUDA_ARCH__) __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const float x) { return __signbitf(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const double x) { return __signbit(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const long double x) { return __signbitl(x);} __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const float x) { return __finitef(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) { return __finite(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { return __finitel(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const double x) { return __isnan(x); } #if _LIBCPP_VERSION < 8000 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const float x) { return __isnanf(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const long double x) { return __isnanl(x); } #endif /* _LIBCPP_VERSION < 8000 */ __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const double x) { return __isinf(x); } #if _LIBCPP_VERSION < 8000 __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const float x) { return __isinff(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const long double x) { return __isinfl(x); } #endif /* _LIBCPP_VERSION < 8000 */ #else /* !defined(__CUDA_ARCH__) */ __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const float x) { return signbit(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const double x) { return signbit(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const long double x) { return signbit(x);} __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const float x) { return isfinite(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) { return isfinite(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { return isfinite(x); } #if _LIBCPP_VERSION < 8000 __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const float x) { return isnan(x); } /* int isnan(double) provided by math.h */ __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const long double x) { return isnan(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const float x) { return isinf(x); } /* int isinf(double) provided by math.h */ __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const long double x) { return isinf(x); } #endif /* _LIBCPP_VERSION < 8000 */ #endif /* defined(__CUDA_ARCH__) */ #else /* !__QNX__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const float x) { return __signbitf(x); } #if defined(__ICC) __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const double x) throw() { return __signbit(x); } #else /* !__ICC */ __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const double x) { return __signbit(x); } #endif /* __ICC */ __forceinline__ __host__ __device__ __cudart_builtin__ int signbit(const long double x) { return __signbitl(x);} #if defined(__ANDROID__) __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const float x) { #if defined(__CUDA_ARCH__) return __finitef(x); #else /* !__CUDA_ARCH__ */ return __isfinitef(x); #endif /* __CUDA_ARCH__ */ } #else /* !__ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const float x) { return __finitef(x); } #endif /* __ANDROID__ */ #if defined(__ANDROID__) __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) { #ifdef __CUDA_ARCH__ return __finite(x); #else /* !__CUDA_ARCH__ */ return __isfinite(x); #endif /* __CUDA_ARCH__ */ } #elif defined(__ICC) __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) throw() { return __finite(x); } #else __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const double x) { return __finite(x); } #endif /* __ANDROID__ */ #if defined(__ANDROID__) __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { #ifdef __CUDA_ARCH__ return __finitel(x); #else /* !__CUDA_ARCH__ */ return __isfinitel(x); #endif /* __CUDA_ARCH__ */ } #else /* !__ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double x) { return __finitel(x); } #endif /* __ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const float x) { return __isnanf(x); } #if defined(__ANDROID__) __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const double x) { return __isnan(x); } #else /* !__ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const double x) throw() { return __isnan(x); } #endif /* __ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isnan(const long double x) { return __isnanl(x); } __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const float x) { return __isinff(x); } #if defined(__ANDROID__) __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const double x) { return __isinf(x); } #else /* !__ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const double x) throw() { return __isinf(x); } #endif /* __ANDROID__ */ __forceinline__ __host__ __device__ __cudart_builtin__ int isinf(const long double x) { return __isinfl(x); } #endif /* __QNX__ || __HORIZON__ */ #endif /* ((defined _GLIBCXX_MATH_H) && _GLIBCXX_MATH_H) && (__cplusplus >= 201103L) */ #endif /* __APPLE__ */ #if defined(__arm__) && !defined(_STLPORT_VERSION) && !_GLIBCXX_USE_C99 #if !defined(__ANDROID__) || (!defined(_LIBCPP_VERSION) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))) #if !defined(__QNX__) && !defined(__HORIZON__) static __inline__ __host__ __device__ __cudart_builtin__ long long int abs(const long long int a) { return llabs(a); } #endif /* !__QNX__ && !__HORIZON__*/ #endif /* !defined(__ANDROID__) || (!defined(_LIBCPP_VERSION) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))) */ #endif /* __arm__ && !_STLPORT_VERSION && !_GLIBCXX_USE_C99 */ #elif defined(_WIN32) #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int signbit(const long double a) { return __signbitl(a); } static __inline__ __host__ __device__ __cudart_builtin__ int signbit(const double a) { return __signbit(a); } static __inline__ __host__ __device__ __cudart_builtin__ int signbit(const float a) { return __signbitf(a); } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isinf(const long double a) { return __isinfl(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isinf(const long double a) { #if defined(__CUDA_ARCH__) return (__isinfl(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isinf(const double a) { return __isinf(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isinf(const double a) { #if defined(__CUDA_ARCH__) return (__isinf(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isinf(const float a) { return __isinff(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ bool isinf(const float a) { #if defined(__CUDA_ARCH__) return (__isinff(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isinf(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isnan(const long double a) { return __isnanl(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const long double a) { #if defined(__CUDA_ARCH__) return (__isnanl(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isnan(const double a) { return __isnan(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const double a) { #if defined(__CUDA_ARCH__) return (__isnan(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isnan(const float a) { return __isnanf(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isnan(const float a) { #if defined(__CUDA_ARCH__) return (__isnanf(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isnan(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isfinite(const long double a) { return __finitel(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const long double a) { #if defined(__CUDA_ARCH__) return (__finitel(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isfinite(const double a) { return __finite(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const double a) { #if defined(__CUDA_ARCH__) return (__finite(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (!defined(_MSC_VER) || _MSC_VER < 1800) static __inline__ __host__ __device__ __cudart_builtin__ int isfinite(const float a) { return __finitef(a); } #else /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ static __inline__ __host__ __device__ __cudart_builtin__ bool isfinite(const float a) { #if defined(__CUDA_ARCH__) return (__finitef(a) != 0); #else /* defined(__CUDA_ARCH__) */ return isfinite(a); #endif /* defined(__CUDA_ARCH__) */ } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #endif /* __CUDACC_RTC__ */ #if defined(__CUDACC_RTC__) #define __MATH_FUNCTIONS_DECL__ __host__ __device__ #define __MATH_FUNCTIONS_DEVICE_DECL__ __device__ #else /* __CUDACC_RTC__ */ #define __MATH_FUNCTIONS_DECL__ static inline __host__ __device__ #define __MATH_FUNCTIONS_DEVICE_DECL__ static inline __device__ #endif /* __CUDACC_RTC__ */ #if defined(__CUDACC_RTC__) || (!defined(_MSC_VER) || _MSC_VER < 1800) #if defined(__QNX__) && defined(_LIBCPP_VERSION) _LIBCPP_BEGIN_NAMESPACE_STD #endif /* __QNX__ && _LIBCPP_VERSION */ #if !defined(__QNX__) && !(defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 3800) #if !(((defined _GLIBCXX_MATH_H) && _GLIBCXX_MATH_H) && (__cplusplus >= 201103L)) __MATH_FUNCTIONS_DECL__ float logb(const float a) { return logbf(a); } __MATH_FUNCTIONS_DECL__ int ilogb(const float a) { return ilogbf(a); } __MATH_FUNCTIONS_DECL__ float scalbn(const float a, const int b) { return scalbnf(a, b); } __MATH_FUNCTIONS_DECL__ float scalbln(const float a, const long int b) { return scalblnf(a, b); } __MATH_FUNCTIONS_DECL__ float exp2(const float a) { return exp2f(a); } __MATH_FUNCTIONS_DECL__ float expm1(const float a) { return expm1f(a); } __MATH_FUNCTIONS_DECL__ float log2(const float a) { return log2f(a); } __MATH_FUNCTIONS_DECL__ float log1p(const float a) { return log1pf(a); } __MATH_FUNCTIONS_DECL__ float acosh(const float a) { return acoshf(a); } __MATH_FUNCTIONS_DECL__ float asinh(const float a) { return asinhf(a); } __MATH_FUNCTIONS_DECL__ float atanh(const float a) { return atanhf(a); } __MATH_FUNCTIONS_DECL__ float hypot(const float a, const float b) { return hypotf(a, b); } __MATH_FUNCTIONS_DECL__ float cbrt(const float a) { return cbrtf(a); } __MATH_FUNCTIONS_DECL__ float erf(const float a) { return erff(a); } __MATH_FUNCTIONS_DECL__ float erfc(const float a) { return erfcf(a); } __MATH_FUNCTIONS_DECL__ float lgamma(const float a) { return lgammaf(a); } __MATH_FUNCTIONS_DECL__ float tgamma(const float a) { return tgammaf(a); } __MATH_FUNCTIONS_DECL__ float copysign(const float a, const float b) { return copysignf(a, b); } __MATH_FUNCTIONS_DECL__ float nextafter(const float a, const float b) { return nextafterf(a, b); } __MATH_FUNCTIONS_DECL__ float remainder(const float a, const float b) { return remainderf(a, b); } __MATH_FUNCTIONS_DECL__ float remquo(const float a, const float b, int *quo) { return remquof(a, b, quo); } __MATH_FUNCTIONS_DECL__ float round(const float a) { return roundf(a); } __MATH_FUNCTIONS_DECL__ long int lround(const float a) { return lroundf(a); } __MATH_FUNCTIONS_DECL__ long long int llround(const float a) { return llroundf(a); } __MATH_FUNCTIONS_DECL__ float trunc(const float a) { return truncf(a); } __MATH_FUNCTIONS_DECL__ float rint(const float a) { return rintf(a); } __MATH_FUNCTIONS_DECL__ long int lrint(const float a) { return lrintf(a); } __MATH_FUNCTIONS_DECL__ long long int llrint(const float a) { return llrintf(a); } __MATH_FUNCTIONS_DECL__ float nearbyint(const float a) { return nearbyintf(a); } __MATH_FUNCTIONS_DECL__ float fdim(const float a, const float b) { return fdimf(a, b); } __MATH_FUNCTIONS_DECL__ float fma(const float a, const float b, const float c) { return fmaf(a, b, c); } __MATH_FUNCTIONS_DECL__ float fmax(const float a, const float b) { return fmaxf(a, b); } __MATH_FUNCTIONS_DECL__ float fmin(const float a, const float b) { return fminf(a, b); } #endif /* !(((defined _GLIBCXX_MATH_H) && _GLIBCXX_MATH_H) && (__cplusplus >= 201103L)) */ #endif /* !(!defined(__QNX__) && !(defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 3800)) */ #if defined(__QNX__) && defined(_LIBCPP_VERSION) _LIBCPP_END_NAMESPACE_STD #endif #endif /* __CUDACC_RTC__ || (!defined(_MSC_VER) || _MSC_VER < 1800) */ __MATH_FUNCTIONS_DECL__ float exp10(const float a) { return exp10f(a); } __MATH_FUNCTIONS_DECL__ float rsqrt(const float a) { return rsqrtf(a); } __MATH_FUNCTIONS_DECL__ float rcbrt(const float a) { return rcbrtf(a); } __MATH_FUNCTIONS_DECL__ float sinpi(const float a) { return sinpif(a); } __MATH_FUNCTIONS_DECL__ float cospi(const float a) { return cospif(a); } __MATH_FUNCTIONS_DECL__ void sincospi(const float a, float *const sptr, float *const cptr) { sincospif(a, sptr, cptr); } __MATH_FUNCTIONS_DECL__ void sincos(const float a, float *const sptr, float *const cptr) { sincosf(a, sptr, cptr); } __MATH_FUNCTIONS_DECL__ float j0(const float a) { return j0f(a); } __MATH_FUNCTIONS_DECL__ float j1(const float a) { return j1f(a); } __MATH_FUNCTIONS_DECL__ float jn(const int n, const float a) { return jnf(n, a); } __MATH_FUNCTIONS_DECL__ float y0(const float a) { return y0f(a); } __MATH_FUNCTIONS_DECL__ float y1(const float a) { return y1f(a); } __MATH_FUNCTIONS_DECL__ float yn(const int n, const float a) { return ynf(n, a); } __MATH_FUNCTIONS_DEVICE_DECL__ float cyl_bessel_i0(const float a) { return cyl_bessel_i0f(a); } __MATH_FUNCTIONS_DEVICE_DECL__ float cyl_bessel_i1(const float a) { return cyl_bessel_i1f(a); } __MATH_FUNCTIONS_DECL__ float erfinv(const float a) { return erfinvf(a); } __MATH_FUNCTIONS_DECL__ float erfcinv(const float a) { return erfcinvf(a); } __MATH_FUNCTIONS_DECL__ float normcdfinv(const float a) { return normcdfinvf(a); } __MATH_FUNCTIONS_DECL__ float normcdf(const float a) { return normcdff(a); } __MATH_FUNCTIONS_DECL__ float erfcx(const float a) { return erfcxf(a); } __MATH_FUNCTIONS_DECL__ double copysign(const double a, const float b) { return copysign(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ double copysign(const float a, const double b) { return copysign(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ unsigned int min(const unsigned int a, const unsigned int b) { return umin(a, b); } __MATH_FUNCTIONS_DECL__ unsigned int min(const int a, const unsigned int b) { return umin(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ unsigned int min(const unsigned int a, const int b) { return umin(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ long int min(const long int a, const long int b) { long int retval; /* Suppress VS warning: warning C4127: conditional expression is constant */ #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ /* long can be of 32-bit type on some systems. */ if (sizeof(long int) == sizeof(int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(min(static_cast(a), static_cast(b))); } else { retval = static_cast(llmin(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int min(const unsigned long int a, const unsigned long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umin(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmin(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int min(const long int a, const unsigned long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umin(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmin(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int min(const unsigned long int a, const long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umin(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmin(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ long long int min(const long long int a, const long long int b) { return llmin(a, b); } __MATH_FUNCTIONS_DECL__ unsigned long long int min(const unsigned long long int a, const unsigned long long int b) { return ullmin(a, b); } __MATH_FUNCTIONS_DECL__ unsigned long long int min(const long long int a, const unsigned long long int b) { return ullmin(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ unsigned long long int min(const unsigned long long int a, const long long int b) { return ullmin(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ float min(const float a, const float b) { return fminf(a, b); } __MATH_FUNCTIONS_DECL__ double min(const double a, const double b) { return fmin(a, b); } __MATH_FUNCTIONS_DECL__ double min(const float a, const double b) { return fmin(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ double min(const double a, const float b) { return fmin(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ unsigned int max(const unsigned int a, const unsigned int b) { return umax(a, b); } __MATH_FUNCTIONS_DECL__ unsigned int max(const int a, const unsigned int b) { return umax(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ unsigned int max(const unsigned int a, const int b) { return umax(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ long int max(const long int a, const long int b) { long int retval; /* long can be of 32-bit type on some systems. */ #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(long int) == sizeof(int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(max(static_cast(a), static_cast(b))); } else { retval = static_cast(llmax(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int max(const unsigned long int a, const unsigned long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umax(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmax(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int max(const long int a, const unsigned long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umax(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmax(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ unsigned long int max(const unsigned long int a, const long int b) { unsigned long int retval; #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (push) #pragma warning (disable: 4127) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ if (sizeof(unsigned long int) == sizeof(unsigned int)) { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) #pragma warning (pop) #endif /* _MSC_VER && !defined(__CUDA_ARCH__) */ retval = static_cast(umax(static_cast(a), static_cast(b))); } else { retval = static_cast(ullmax(static_cast(a), static_cast(b))); } return retval; } __MATH_FUNCTIONS_DECL__ long long int max(const long long int a, const long long int b) { return llmax(a, b); } __MATH_FUNCTIONS_DECL__ unsigned long long int max(const unsigned long long int a, const unsigned long long int b) { return ullmax(a, b); } __MATH_FUNCTIONS_DECL__ unsigned long long int max(const long long int a, const unsigned long long int b) { return ullmax(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ unsigned long long int max(const unsigned long long int a, const long long int b) { return ullmax(a, static_cast(b)); } __MATH_FUNCTIONS_DECL__ float max(const float a, const float b) { return fmaxf(a, b); } __MATH_FUNCTIONS_DECL__ double max(const double a, const double b) { return fmax(a, b); } __MATH_FUNCTIONS_DECL__ double max(const float a, const double b) { return fmax(static_cast(a), b); } __MATH_FUNCTIONS_DECL__ double max(const double a, const float b) { return fmax(a, static_cast(b)); } #if !defined(__CUDA_ARCH__) #if defined(_WIN32) #define __HELPER_FUNC_LINKAGE static inline __host__ __device__ #pragma warning (push) #pragma warning (disable : 4211) #else /* !defined(_WIN32) */ #define __HELPER_FUNC_LINKAGE inline __host__ __device__ #endif /* defined(_WIN32) */ __HELPER_FUNC_LINKAGE int min(const int a, const int b) { return (a < b) ? a : b; } __HELPER_FUNC_LINKAGE unsigned int umin(const unsigned int a, const unsigned int b) { return (a < b) ? a : b; } __HELPER_FUNC_LINKAGE long long int llmin(const long long int a, const long long int b) { return (a < b) ? a : b; } __HELPER_FUNC_LINKAGE unsigned long long int ullmin(const unsigned long long int a, const unsigned long long int b) { return (a < b) ? a : b; } __HELPER_FUNC_LINKAGE int max(const int a, const int b) { return (a > b) ? a : b; } __HELPER_FUNC_LINKAGE unsigned int umax(const unsigned int a, const unsigned int b) { return (a > b) ? a : b; } __HELPER_FUNC_LINKAGE long long int llmax(const long long int a, const long long int b) { return (a > b) ? a : b; } __HELPER_FUNC_LINKAGE unsigned long long int ullmax(const unsigned long long int a, const unsigned long long int b) { return (a > b) ? a : b; } #if defined(_WIN32) #pragma warning (pop) #endif /* defined(_WIN32) */ #undef __HELPER_FUNC_LINKAGE #endif /* !defined(__CUDA_ARCH__) */ #undef __MATH_FUNCTIONS_DECL__ #undef __MATH_FUNCTIONS_DEVICE_DECL__ /******************************************************************************* * * * * * * *******************************************************************************/ #endif /* __cplusplus && __CUDACC__ */ #if !defined(__CUDACC__) #include "host_defines.h" #include "math_constants.h" #define __cuda_INT_MAX \ ((int)((unsigned int)-1 >> 1)) /******************************************************************************* * * * ONLY FOR HOST CODE! NOT FOR DEVICE EXECUTION * * * *******************************************************************************/ #include #if defined(_WIN32) #pragma warning (push) #pragma warning (disable : 4211) #endif /* _WIN32 */ #if defined(_WIN32) || defined(__APPLE__) || defined (__ANDROID__) || defined(__QNX__) __func__(int __isnan(const double a)) { unsigned long long int l; memcpy(&l, &a, sizeof(double)); return (l << 1ULL) > 0xffe0000000000000ULL; } #endif /* _WIN32 || __APPLE__ || __ANDROID__ || __QNX__ */ #if defined(_WIN32) || defined(__APPLE__) || defined(__QNX__) /******************************************************************************* * * * HOST IMPLEMENTATION FOR DOUBLE ROUTINES FOR WINDOWS & APPLE PLATFORMS * * * *******************************************************************************/ __func__(double exp10(const double a)) { return pow(10.0, a); } __func__(float exp10f(const float a)) { return static_cast(exp10(static_cast(a))); } __func__(void sincos(const double a, double *sptr, double *cptr)) { *sptr = sin(a); *cptr = cos(a); } __func__(void sincosf(const float a, float *sptr, float *cptr)) { double s, c; sincos(static_cast(a), &s, &c); *sptr = static_cast(s); *cptr = static_cast(c); } __func__(int __isinf(const double a)) { unsigned long long int l; memcpy(&l, &a, sizeof(double)); return (l << 1ULL) == 0xffe0000000000000ULL; } #endif /* _WIN32 || __APPLE__ */ #if defined(_WIN32) || defined (__ANDROID__) #if (!defined(_MSC_VER) || _MSC_VER < 1800) __func__(double log2(const double a)) { return log(a) * 1.44269504088896340; } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #endif /* _WIN32 || __ANDROID__ */ #if defined(_WIN32) /******************************************************************************* * * * HOST IMPLEMENTATION FOR DOUBLE ROUTINES FOR WINDOWS PLATFORM * * * *******************************************************************************/ __func__(int __signbit(double a)) { signed long long int l; memcpy(&l, &a, sizeof(double)); return l < 0LL; } #if (!defined(_MSC_VER) || _MSC_VER < 1800) __func__(double copysign(double a, double b)) { unsigned long long int la, lb; memcpy(&la, &a, sizeof(double)); memcpy(&lb, &b, sizeof(double)); la = (la & 0x7fffffffffffffffULL) | (lb & 0x8000000000000000ULL); memcpy(&a, &la, sizeof(double)); return a; } #endif /* MSC_VER < 1800 */ __func__(int __finite(double a)) { unsigned long long int l; memcpy(&l, &a, sizeof(double)); return (l << 1ULL) < 0xffe0000000000000ULL; } #if (!defined(_MSC_VER) || _MSC_VER < 1800) __func__(double fmax(double a, double b)) { if (__isnan(a) && __isnan(b)) return a + b; if (__isnan(a)) return b; if (__isnan(b)) return a; if ((a == 0.0) && (b == 0.0) && __signbit(b)) return a; return a > b ? a : b; } __func__(double fmin(double a, double b)) { if (__isnan(a) && __isnan(b)) return a + b; if (__isnan(a)) return b; if (__isnan(b)) return a; if ((a == 0.0) && (b == 0.0) && __signbit(a)) return a; return a < b ? a : b; } __func__(double trunc(double a)) { return a < 0.0 ? ceil(a) : floor(a); } __func__(double round(double a)) { double fa = fabs(a); if (fa > CUDART_TWO_TO_52) { return a; } else { double u = floor(fa + 0.5); if (fa < 0.5) u = 0; u = copysign (u, a); return u; } } __func__(long int lround(double a)) { return static_cast(round(a)); } __func__(long long int llround(double a)) { return static_cast(round(a)); } __func__(double rint(double a)) { double fa = fabs(a); double u = CUDART_TWO_TO_52 + fa; if (fa >= CUDART_TWO_TO_52) { u = a; } else { u = u - CUDART_TWO_TO_52; u = copysign (u, a); } return u; } __func__(double nearbyint(double a)) { return rint(a); } __func__(long int lrint(double a)) { return static_cast(rint(a)); } __func__(long long int llrint(double a)) { return static_cast(rint(a)); } __func__(double fdim(double a, double b)) { if (a > b) { return (a - b); } else if (a <= b) { return 0.0; } else if (__isnan(a)) { return a; } else { return b; } } __func__(double scalbn(double a, int b)) { return ldexp(a, b); } __func__(double scalbln(double a, long int b)) { int t; if (b > 2147483647L) { t = 2147483647; } else if (b < (-2147483647 - 1)) { t = (-2147483647 - 1); } else { t = static_cast(b); } return scalbn(a, t); } __func__(double exp2(double a)) { return pow(2.0, a); } /* * The following is based on: David Goldberg, "What every computer scientist * should know about floating-point arithmetic", ACM Computing Surveys, Volume * 23, Issue 1, March 1991. */ __func__(double log1p(double a)) { volatile double u, m; u = 1.0 + a; if (u == 1.0) { /* a very close to zero */ u = a; } else { m = u - 1.0; u = log(u); if (a < 1.0) { /* a somewhat close to zero */ u = a * u; u = u / m; } } return u; } /* * This code based on: http://www.cs.berkeley.edu/~wkahan/Math128/Sumnfp.pdf */ __func__(double expm1(double a)) { volatile double u, m; u = exp(a); m = u - 1.0; if (m == 0.0) { /* a very close zero */ m = a; } else if (fabs(a) < 1.0) { /* a somewhat close zero */ u = log(u); m = m * a; m = m / u; } return m; } __func__(double cbrt(double a)) { double s, t; if (a == 0.0 || __isinf(a)) { return a; } s = fabs(a); t = exp2(CUDART_THIRD * log2(s)); /* initial approximation */ t = t - (t - (s / (t * t))) * CUDART_THIRD; /* refine approximation */ t = copysign(t, a); return t; } __func__(double acosh(double a)) { double s, t; t = a - 1.0; if (t == a) { return log(2.0) + log(a); } else { s = a + 1.0; t = t + sqrt(s * t); return log1p(t); } } __func__(double asinh(double a)) { double fa, oofa, t; fa = fabs(a); if (fa > 1e18) { t = log(2.0) + log(fa); } else { oofa = 1.0 / fa; t = fa + fa / (oofa + sqrt(1.0 + oofa * oofa)); t = log1p(t); } t = copysign(t, a); return t; } __func__(double atanh(double a)) { double fa, t; if (__isnan(a)) { return a + a; } fa = fabs(a); t = (2.0 * fa) / (1.0 - fa); t = 0.5 * log1p(t); if (__isnan(t) || !__signbit(a)) { return t; } return -t; } __func__(int ilogb(double a)) { unsigned long long int i; int expo = -1022; if (__isnan(a)) return -__cuda_INT_MAX-1; if (__isinf(a)) return __cuda_INT_MAX; memcpy(&i, &a, sizeof(double)); i = i & 0x7fffffffffffffffULL; if (i == 0) return -__cuda_INT_MAX-1; if (i >= 0x0010000000000000ULL) { return (int)(((i >> 52ULL) & 0x7ffU) - 1023); } while (i < 0x0010000000000000ULL) { expo--; i <<= 1; } return expo; } __func__(double logb(double a)) { unsigned long long int i; int expo = -1022; if (__isnan(a)) return a + a; if (__isinf(a)) return fabs(a); memcpy(&i, &a, sizeof(double)); i = i & 0x7fffffffffffffffULL; if (i == 0) return -1.0/fabs(a); if (i >= 0x0010000000000000ULL) { return (double)((int)((i >> 52ULL) & 0x7ffU) - 1023); } while (i < 0x0010000000000000ULL) { expo--; i <<= 1; } return static_cast(expo); } __func__(double remquo(double a, double b, int *quo)) { unsigned long long int aa, bb; int rem1 = 1; /* do FPREM1, a.k.a IEEE remainder */ int expo_a; int expo_b; unsigned long long mant_a; unsigned long long mant_b; unsigned long long mant_c; unsigned long long temp; int sign_a; int sign_b; int sign_c; int expo_c; int expodiff; int quot = 0; /* initialize quotient */ int l; int iter; memcpy(&aa, &a, sizeof(double)); mant_a = (aa << 11ULL) | 0x8000000000000000ULL; expo_a = (int)((aa >> 52ULL) & 0x7ffU) - 1023; sign_a = (int)(aa >> 63ULL); memcpy(&bb, &b, sizeof(double)); mant_b = (bb << 11ULL) | 0x8000000000000000ULL; expo_b = (int)((bb >> 52ULL) & 0x7ffU) - 1023; sign_b = (int)(bb >> 63ULL); sign_c = sign_a; /* remainder has sign of dividend */ expo_c = expo_a; /* default */ /* handled NaNs and infinities */ if (__isnan(a) || __isnan(b)) { *quo = quot; return a + b; } if (__isinf(a) || (b == 0.0)) { *quo = quot; aa = 0xfff8000000000000ULL; memcpy(&a, &aa, sizeof(double)); return a; } if ((a == 0.0) || (__isinf(b))) { *quo = quot; return a; } /* normalize denormals */ if (expo_a < -1022) { mant_a = mant_a + mant_a; while (mant_a < 0x8000000000000000ULL) { mant_a = mant_a + mant_a; expo_a--; } } if (expo_b < -1022) { mant_b = mant_b + mant_b; while (mant_b < 0x8000000000000000ULL) { mant_b = mant_b + mant_b; expo_b--; } } expodiff = expo_a - expo_b; /* clamp iterations if exponent difference negative */ if (expodiff < 0) { iter = -1; } else { iter = expodiff; } /* Shift dividend and divisor right by one bit to prevent overflow during the division algorithm. */ mant_a = mant_a >> 1ULL; mant_b = mant_b >> 1ULL; expo_c = expo_a - iter; /* default exponent of result */ /* Use binary longhand division (restoring) */ for (l = 0; l < (iter + 1); l++) { mant_a = mant_a - mant_b; if (mant_a & 0x8000000000000000ULL) { mant_a = mant_a + mant_b; quot = quot + quot; } else { quot = quot + quot + 1; } mant_a = mant_a + mant_a; } /* Save current remainder */ mant_c = mant_a; /* If remainder's mantissa is all zeroes, final result is zero. */ if (mant_c == 0) { quot = quot & 7; *quo = (sign_a ^ sign_b) ? -quot : quot; aa = static_cast(sign_c) << 63ULL; memcpy(&a, &aa, sizeof(double)); return a; } /* Normalize result */ while (!(mant_c & 0x8000000000000000ULL)) { mant_c = mant_c + mant_c; expo_c--; } /* For IEEE remainder (quotient rounded to nearest-even we might need to do a final subtraction of the divisor from the remainder. */ if (rem1 && ((expodiff+1) >= 0)) { temp = mant_a - mant_b; /* round quotient to nearest even */ if (((temp != 0ULL) && (!(temp & 0x8000000000000000ULL))) || ((temp == 0ULL) && (quot & 1))) { mant_a = mant_a >> 1ULL; quot++; /* Since the divisor is greater than the remainder, the result will have opposite sign of the dividend. To avoid a negative mantissa when subtracting the divisor from remainder, reverse subtraction */ sign_c = 1 ^ sign_c; expo_c = expo_a - iter + 1; mant_c = mant_b - mant_a; /* normalize result */ while (!(mant_c & 0x8000000000000000ULL)) { mant_c = mant_c + mant_c; expo_c--; } } } /* package up result */ if (expo_c >= -1022) { /* normal */ mant_c = ((mant_c >> 11ULL) + (((static_cast(sign_c)) << 63ULL) + (((unsigned long long)(expo_c + 1022)) << 52ULL))); } else { /* denormal */ mant_c = (((static_cast(sign_c)) << 63ULL) + (mant_c >> (unsigned long long)(11 - expo_c - 1022))); } quot = quot & 7; /* mask quotient down to least significant three bits */ *quo = (sign_a ^ sign_b) ? -quot : quot; memcpy(&a, &mant_c, sizeof(double)); return a; } __func__(double remainder(double a, double b)) { int quo; return remquo (a, b, &quo); } __func__(double fma (double a, double b, double c)) { struct { unsigned int lo; unsigned int hi; } xx, yy, zz, ww; double d; unsigned int s, t, u, prod0, prod1, prod2, prod3, expo_x, expo_y, expo_z; memcpy(&xx, &a, sizeof(double)); memcpy(&yy, &b, sizeof(double)); memcpy(&zz, &c, sizeof(double)); expo_z = 0x7FFU; t = xx.hi >> 20; expo_x = expo_z & t; expo_x = expo_x - 1; /* expo(x) - 1 */ t = yy.hi >> 20; expo_y = expo_z & t; expo_y = expo_y - 1; /* expo(y) - 1 */ t = zz.hi >> 20; expo_z = expo_z & t; expo_z = expo_z - 1; /* expo(z) - 1 */ if (!((expo_x <= 0x7FDU) && (expo_y <= 0x7FDU) && (expo_z <= 0x7FDU))) { /* fma (nan, y, z) --> nan fma (x, nan, z) --> nan fma (x, y, nan) --> nan */ if (((yy.hi << 1) | (yy.lo != 0)) > 0xffe00000U) { yy.hi |= 0x00080000U; memcpy(&d, &yy, sizeof(double)); return d; } if (((zz.hi << 1) | (zz.lo != 0)) > 0xffe00000U) { zz.hi |= 0x00080000U; memcpy(&d, &zz, sizeof(double)); return d; } if (((xx.hi << 1) | (xx.lo != 0)) > 0xffe00000U) { xx.hi |= 0x00080000U; memcpy(&d, &xx, sizeof(double)); return d; } /* fma (0, inf, z) --> INDEFINITE fma (inf, 0, z) --> INDEFINITE fma (-inf,+y,+inf) --> INDEFINITE fma (+x,-inf,+inf) --> INDEFINITE fma (+inf,-y,+inf) --> INDEFINITE fma (-x,+inf,+inf) --> INDEFINITE fma (-inf,-y,-inf) --> INDEFINITE fma (-x,-inf,-inf) --> INDEFINITE fma (+inf,+y,-inf) --> INDEFINITE fma (+x,+inf,-inf) --> INDEFINITE */ if (((((xx.hi << 1) | xx.lo) == 0) && (((yy.hi << 1) | (yy.lo != 0)) == 0xffe00000U)) || ((((yy.hi << 1) | yy.lo) == 0) && (((xx.hi << 1) | (xx.lo != 0)) == 0xffe00000U))) { xx.hi = 0xfff80000U; xx.lo = 0x00000000U; memcpy(&d, &xx, sizeof(double)); return d; } if (((zz.hi << 1) | (zz.lo != 0)) == 0xffe00000U) { if ((((yy.hi << 1) | (yy.lo != 0)) == 0xffe00000U) || (((xx.hi << 1) | (xx.lo != 0)) == 0xffe00000U)) { if ((int)(xx.hi ^ yy.hi ^ zz.hi) < 0) { xx.hi = 0xfff80000U; xx.lo = 0x00000000U; memcpy(&d, &xx, sizeof(double)); return d; } } } /* fma (inf, y, z) --> inf fma (x, inf, z) --> inf fma (x, y, inf) --> inf */ if (((xx.hi << 1) | (xx.lo != 0)) == 0xffe00000U) { xx.hi = xx.hi ^ (yy.hi & 0x80000000U); memcpy(&d, &xx, sizeof(double)); return d; } if (((yy.hi << 1) | (yy.lo != 0)) == 0xffe00000U) { yy.hi = yy.hi ^ (xx.hi & 0x80000000U); memcpy(&d, &yy, sizeof(double)); return d; } if (((zz.hi << 1) | (zz.lo != 0)) == 0xffe00000U) { memcpy(&d, &zz, sizeof(double)); return d; } /* fma (+0, -y, -0) --> -0 fma (-0, +y, -0) --> -0 fma (+x, -0, -0) --> -0 fma (-x, +0, -0) --> -0 */ if ((zz.hi == 0x80000000U) && (zz.lo == 0)) { if ((((xx.hi << 1) | xx.lo) == 0) || (((yy.hi << 1) | yy.lo) == 0)) { if ((int)(xx.hi ^ yy.hi) < 0) { memcpy(&d, &zz, sizeof(double)); return d; } } } /* fma (0, y, 0) --> +0 (-0 if round down and signs of addend differ) fma (x, 0, 0) --> +0 (-0 if round down and signs of addend differ) */ if ((((zz.hi << 1) | zz.lo) == 0) && ((((xx.hi << 1) | xx.lo) == 0) || (((yy.hi << 1) | yy.lo) == 0))) { zz.hi &= 0x7fffffffU; memcpy(&d, &zz, sizeof(double)); return d; } /* fma (0, y, z) --> z fma (x, 0, z) --> z */ if ((((xx.hi << 1) | xx.lo) == 0) || (((yy.hi << 1) | yy.lo) == 0)) { memcpy(&d, &zz, sizeof(double)); return d; } if (expo_x == 0xffffffffU) { expo_x++; t = xx.hi & 0x80000000U; s = xx.lo >> 21; xx.lo = xx.lo << 11; xx.hi = xx.hi << 11; xx.hi = xx.hi | s; if (!xx.hi) { xx.hi = xx.lo; xx.lo = 0; expo_x -= 32; } while (static_cast(xx.hi) > 0) { s = xx.lo >> 31; xx.lo = xx.lo + xx.lo; xx.hi = xx.hi + xx.hi; xx.hi = xx.hi | s; expo_x--; } xx.lo = (xx.lo >> 11); xx.lo |= (xx.hi << 21); xx.hi = (xx.hi >> 11) | t; } if (expo_y == 0xffffffffU) { expo_y++; t = yy.hi & 0x80000000U; s = yy.lo >> 21; yy.lo = yy.lo << 11; yy.hi = yy.hi << 11; yy.hi = yy.hi | s; if (!yy.hi) { yy.hi = yy.lo; yy.lo = 0; expo_y -= 32; } while (static_cast(yy.hi) > 0) { s = yy.lo >> 31; yy.lo = yy.lo + yy.lo; yy.hi = yy.hi + yy.hi; yy.hi = yy.hi | s; expo_y--; } yy.lo = (yy.lo >> 11); yy.lo |= (yy.hi << 21); yy.hi = (yy.hi >> 11) | t; } if (expo_z == 0xffffffffU) { expo_z++; t = zz.hi & 0x80000000U; s = zz.lo >> 21; zz.lo = zz.lo << 11; zz.hi = zz.hi << 11; zz.hi = zz.hi | s; if (!zz.hi) { zz.hi = zz.lo; zz.lo = 0; expo_z -= 32; } while (static_cast(zz.hi) > 0) { s = zz.lo >> 31; zz.lo = zz.lo + zz.lo; zz.hi = zz.hi + zz.hi; zz.hi = zz.hi | s; expo_z--; } zz.lo = (zz.lo >> 11); zz.lo |= (zz.hi << 21); zz.hi = (zz.hi >> 11) | t; } } expo_x = expo_x + expo_y; expo_y = xx.hi ^ yy.hi; t = xx.lo >> 21; xx.lo = xx.lo << 11; xx.hi = xx.hi << 11; xx.hi = xx.hi | t; yy.hi = yy.hi & 0x000fffffU; xx.hi = xx.hi | 0x80000000U; /* set mantissa hidden bit */ yy.hi = yy.hi | 0x00100000U; /* set mantissa hidden bit */ prod0 = xx.lo * yy.lo; prod1 =(unsigned)((static_cast(xx.lo)*static_cast(yy.lo))>>32ULL); prod2 = xx.hi * yy.lo; prod3 = xx.lo * yy.hi; prod1 += prod2; t = (unsigned)(prod1 < prod2); prod1 += prod3; t += prod1 < prod3; prod2 =(unsigned)((static_cast(xx.hi)*static_cast(yy.lo))>>32ULL); prod3 =(unsigned)((static_cast(xx.lo)*static_cast(yy.hi))>>32ULL); prod2 += prod3; s = (unsigned)(prod2 < prod3); prod3 = xx.hi * yy.hi; prod2 += prod3; s += prod2 < prod3; prod2 += t; s += prod2 < t; prod3 =(unsigned)((static_cast(xx.hi)*static_cast(yy.hi))>>32ULL); prod3 = prod3 + s; yy.lo = prod0; /* mantissa */ yy.hi = prod1; /* mantissa */ xx.lo = prod2; /* mantissa */ xx.hi = prod3; /* mantissa */ expo_x = expo_x - (1023 - 2); /* expo-1 */ expo_y = expo_y & 0x80000000U; /* sign */ if (xx.hi < 0x00100000U) { s = xx.lo >> 31; s = (xx.hi << 1) + s; xx.hi = s; s = yy.hi >> 31; s = (xx.lo << 1) + s; xx.lo = s; s = yy.lo >> 31; s = (yy.hi << 1) + s; yy.hi = s; s = yy.lo << 1; yy.lo = s; expo_x--; } t = 0; if (((zz.hi << 1) | zz.lo) != 0) { /* z is not zero */ s = zz.hi & 0x80000000U; zz.hi &= 0x000fffffU; zz.hi |= 0x00100000U; ww.hi = 0; ww.lo = 0; /* compare and swap. put augend into xx:yy */ if (static_cast(expo_z) > static_cast(expo_x)) { t = expo_z; expo_z = expo_x; expo_x = t; t = zz.hi; zz.hi = xx.hi; xx.hi = t; t = zz.lo; zz.lo = xx.lo; xx.lo = t; t = ww.hi; ww.hi = yy.hi; yy.hi = t; t = ww.lo; ww.lo = yy.lo; yy.lo = t; t = expo_y; expo_y = s; s = t; } /* augend_sign = expo_y, augend_mant = xx:yy, augend_expo = expo_x */ /* addend_sign = s, addend_mant = zz:ww, addend_expo = expo_z */ expo_z = expo_x - expo_z; u = expo_y ^ s; if (expo_z <= 107) { /* denormalize addend */ t = 0; while (expo_z >= 32) { t = ww.lo | (t != 0); ww.lo = ww.hi; ww.hi = zz.lo; zz.lo = zz.hi; zz.hi = 0; expo_z -= 32; } if (expo_z) { t = (t >> expo_z) | (ww.lo << (32 - expo_z)) | ((t << (32 - expo_z)) != 0); ww.lo = (ww.lo >> expo_z) | (ww.hi << (32 - expo_z)); ww.hi = (ww.hi >> expo_z) | (zz.lo << (32 - expo_z)); zz.lo = (zz.lo >> expo_z) | (zz.hi << (32 - expo_z)); zz.hi = (zz.hi >> expo_z); } } else { t = 1; ww.lo = 0; ww.hi = 0; zz.lo = 0; zz.hi = 0; } if (static_cast(u) < 0) { /* signs differ, effective subtraction */ t = (unsigned)(-static_cast(t)); s = (unsigned)(t != 0); u = yy.lo - s; s = (unsigned)(u > yy.lo); yy.lo = u - ww.lo; s += yy.lo > u; u = yy.hi - s; s = (unsigned)(u > yy.hi); yy.hi = u - ww.hi; s += yy.hi > u; u = xx.lo - s; s = (unsigned)(u > xx.lo); xx.lo = u - zz.lo; s += xx.lo > u; xx.hi = (xx.hi - zz.hi) - s; if (!(xx.hi | xx.lo | yy.hi | yy.lo | t)) { /* complete cancelation, return 0 */ memcpy(&d, &xx, sizeof(double)); return d; } if (static_cast(xx.hi) < 0) { /* Oops, augend had smaller mantissa. Negate mantissa and flip sign of result */ t = ~t; yy.lo = ~yy.lo; yy.hi = ~yy.hi; xx.lo = ~xx.lo; xx.hi = ~xx.hi; if (++t == 0) { if (++yy.lo == 0) { if (++yy.hi == 0) { if (++xx.lo == 0) { ++xx.hi; } } } } expo_y ^= 0x80000000U; } /* normalize mantissa, if necessary */ while (!(xx.hi & 0x00100000U)) { xx.hi = (xx.hi << 1) | (xx.lo >> 31); xx.lo = (xx.lo << 1) | (yy.hi >> 31); yy.hi = (yy.hi << 1) | (yy.lo >> 31); yy.lo = (yy.lo << 1); expo_x--; } } else { /* signs are the same, effective addition */ yy.lo = yy.lo + ww.lo; s = (unsigned)(yy.lo < ww.lo); yy.hi = yy.hi + s; u = (unsigned)(yy.hi < s); yy.hi = yy.hi + ww.hi; u += yy.hi < ww.hi; xx.lo = xx.lo + u; s = (unsigned)(xx.lo < u); xx.lo = xx.lo + zz.lo; s += xx.lo < zz.lo; xx.hi = xx.hi + zz.hi + s; if (xx.hi & 0x00200000U) { t = t | (yy.lo << 31); yy.lo = (yy.lo >> 1) | (yy.hi << 31); yy.hi = (yy.hi >> 1) | (xx.lo << 31); xx.lo = (xx.lo >> 1) | (xx.hi << 31); xx.hi = ((xx.hi & 0x80000000U) | (xx.hi >> 1)) & ~0x40000000U; expo_x++; } } } t = yy.lo | (t != 0); t = yy.hi | (t != 0); xx.hi |= expo_y; /* or in sign bit */ if (expo_x <= 0x7FDU) { /* normal */ xx.hi = xx.hi & ~0x00100000U; /* lop off integer bit */ s = xx.lo & 1; /* mantissa lsb */ u = xx.lo; xx.lo += (t == 0x80000000U) ? s : (t >> 31); xx.hi += (u > xx.lo); xx.hi += ((expo_x + 1) << 20); memcpy(&d, &xx, sizeof(double)); return d; } else if (static_cast(expo_x) >= 2046) { /* overflow */ xx.hi = (xx.hi & 0x80000000U) | 0x7ff00000U; xx.lo = 0; memcpy(&d, &xx, sizeof(double)); return d; } /* subnormal */ expo_x = (unsigned)(-static_cast(expo_x)); if (expo_x > 54) { xx.hi = xx.hi & 0x80000000U; xx.lo = 0; memcpy(&d, &xx, sizeof(double)); return d; } yy.hi = xx.hi & 0x80000000U; /* save sign bit */ xx.hi = xx.hi & ~0xffe00000U; if (expo_x >= 32) { t = xx.lo | (t != 0); xx.lo = xx.hi; xx.hi = 0; expo_x -= 32; } if (expo_x) { t = (t >> expo_x) | (xx.lo << (32 - expo_x)) | (t != 0); xx.lo = (xx.lo >> expo_x) | (xx.hi << (32 - expo_x)); xx.hi = (xx.hi >> expo_x); } expo_x = xx.lo & 1; u = xx.lo; xx.lo += (t == 0x80000000U) ? expo_x : (t >> 31); xx.hi += (u > xx.lo); xx.hi |= yy.hi; memcpy(&d, &xx, sizeof(double)); return d; } __func__(double nextafter(double a, double b)) { unsigned long long int ia; unsigned long long int ib; memcpy(&ia, &a, sizeof(double)); memcpy(&ib, &b, sizeof(double)); if (__isnan(a) || __isnan(b)) return a + b; /* NaN */ if (((ia | ib) << 1ULL) == 0ULL) return b; if (a == 0.0) { return copysign (4.9406564584124654e-324, b); /* crossover */ } if ((a < b) && (a < 0.0)) ia--; if ((a < b) && (a > 0.0)) ia++; if ((a > b) && (a < 0.0)) ia++; if ((a > b) && (a > 0.0)) ia--; memcpy(&a, &ia, sizeof(double)); return a; } __func__(double erf(double a)) { double t, r, q; t = fabs(a); if (t >= 1.0) { r = -1.28836351230756500E-019; r = r * t + 1.30597472161093370E-017; r = r * t - 6.33924401259620500E-016; r = r * t + 1.96231865908940140E-014; r = r * t - 4.35272243559990750E-013; r = r * t + 7.37083927929352150E-012; r = r * t - 9.91402142550461630E-011; r = r * t + 1.08817017167760820E-009; r = r * t - 9.93918713097634620E-009; r = r * t + 7.66739923255145500E-008; r = r * t - 5.05440278302806720E-007; r = r * t + 2.87474157099000620E-006; r = r * t - 1.42246725399722510E-005; r = r * t + 6.16994555079419460E-005; r = r * t - 2.36305221938908790E-004; r = r * t + 8.05032844055371070E-004; r = r * t - 2.45833366629108140E-003; r = r * t + 6.78340988296706120E-003; r = r * t - 1.70509103597554640E-002; r = r * t + 3.93322852515666300E-002; r = r * t - 8.37271292613764040E-002; r = r * t + 1.64870423707623280E-001; r = r * t - 2.99729521787681470E-001; r = r * t + 4.99394435612628580E-001; r = r * t - 7.52014596480123030E-001; r = r * t + 9.99933138314926250E-001; r = r * t - 1.12836725321102670E+000; r = r * t + 9.99998988715182450E-001; q = exp (-t * t); r = 1.0 - r * q; if (t >= 6.5) { r = 1.0; } a = copysign (r, a); } else { q = a * a; r = -7.77946848895991420E-010; r = r * q + 1.37109803980285950E-008; r = r * q - 1.62063137584932240E-007; r = r * q + 1.64471315712790040E-006; r = r * q - 1.49247123020098620E-005; r = r * q + 1.20552935769006260E-004; r = r * q - 8.54832592931448980E-004; r = r * q + 5.22397760611847340E-003; r = r * q - 2.68661706431114690E-002; r = r * q + 1.12837916709441850E-001; r = r * q - 3.76126389031835210E-001; r = r * q + 1.12837916709551260E+000; a = r * a; } return a; } __func__(double erfc(double a)) { double p, q, h, l; if (a < 0.75) { return 1.0 - erf(a); } if (a > 27.3) { return 0.0; } if (a < 5.0) { double t; t = 1.0 / a; p = 1.9759923722227928E-008; p = p * t - 1.0000002670474897E+000; p = p * t - 7.4935303236347828E-001; p = p * t - 1.5648136328071860E-001; p = p * t + 1.2871196242447239E-001; p = p * t + 1.1126459974811195E-001; p = p * t + 4.0678642255914332E-002; p = p * t + 7.9915414156678296E-003; p = p * t + 7.1458332107840234E-004; q = t + 2.7493547525030619E+000; q = q * t + 3.3984254815725423E+000; q = q * t + 2.4635304979947761E+000; q = q * t + 1.1405284734691286E+000; q = q * t + 3.4130157606195649E-001; q = q * t + 6.2250967676044953E-002; q = q * t + 5.5661370941268700E-003; q = q * t + 1.0575248365468671E-009; p = p / q; p = p * t; h = ((int)(a * 16.0)) * 0.0625; l = (a - h) * (a + h); q = exp(-h * h) * exp(-l); q = q * 0.5; p = p * q + q; p = p * t; } else { double ooa, ooasq; ooa = 1.0 / a; ooasq = ooa * ooa; p = -4.0025406686930527E+005; p = p * ooasq + 1.4420582543942123E+005; p = p * ooasq - 2.7664185780951841E+004; p = p * ooasq + 4.1144611644767283E+003; p = p * ooasq - 5.8706000519209351E+002; p = p * ooasq + 9.1490086446323375E+001; p = p * ooasq - 1.6659491387740221E+001; p = p * ooasq + 3.7024804085481784E+000; p = p * ooasq - 1.0578553994424316E+000; p = p * ooasq + 4.2314218745087778E-001; p = p * ooasq - 2.8209479177354962E-001; p = p * ooasq + 5.6418958354775606E-001; h = a * a; h = ((int)(a * 16.0)) * 0.0625; l = (a - h) * (a + h); q = exp(-h * h) * exp(-l); p = p * ooa; p = p * q; } return p; } __func__(double lgamma(double a)) { double s; double t; double i; double fa; double sum; long long int quot; if (__isnan(a) || __isinf(a)) { return a * a; } fa = fabs(a); if (fa >= 3.0) { if (fa >= 8.0) { /* Stirling approximation; coefficients from Hart et al, "Computer * Approximations", Wiley 1968. Approximation 5404. */ s = 1.0 / fa; t = s * s; sum = -0.1633436431e-2; sum = sum * t + 0.83645878922e-3; sum = sum * t - 0.5951896861197e-3; sum = sum * t + 0.793650576493454e-3; sum = sum * t - 0.277777777735865004e-2; sum = sum * t + 0.833333333333331018375e-1; sum = sum * s + 0.918938533204672; s = 0.5 * log (fa); t = fa - 0.5; s = s * t; t = s - fa; s = s + sum; t = t + s; } else { i = fa - 3.0; s = -4.02412642744125560E+003; s = s * i - 2.97693796998962000E+005; s = s * i - 6.38367087682528790E+006; s = s * i - 5.57807214576539320E+007; s = s * i - 2.24585140671479230E+008; s = s * i - 4.70690608529125090E+008; s = s * i - 7.62587065363263010E+008; s = s * i - 9.71405112477113250E+008; t = i - 1.02277248359873170E+003; t = t * i - 1.34815350617954480E+005; t = t * i - 4.64321188814343610E+006; t = t * i - 6.48011106025542540E+007; t = t * i - 4.19763847787431360E+008; t = t * i - 1.25629926018000720E+009; t = t * i - 1.40144133846491690E+009; t = s / t; t = t + i; } } else if (fa >= 1.5) { i = fa - 2.0; t = 9.84839283076310610E-009; t = t * i - 6.69743850483466500E-008; t = t * i + 2.16565148880011450E-007; t = t * i - 4.86170275781575260E-007; t = t * i + 9.77962097401114400E-007; t = t * i - 2.03041287574791810E-006; t = t * i + 4.36119725805364580E-006; t = t * i - 9.43829310866446590E-006; t = t * i + 2.05106878496644220E-005; t = t * i - 4.49271383742108440E-005; t = t * i + 9.94570466342226000E-005; t = t * i - 2.23154589559238440E-004; t = t * i + 5.09669559149637430E-004; t = t * i - 1.19275392649162300E-003; t = t * i + 2.89051032936815490E-003; t = t * i - 7.38555102806811700E-003; t = t * i + 2.05808084278121250E-002; t = t * i - 6.73523010532073720E-002; t = t * i + 3.22467033424113040E-001; t = t * i + 4.22784335098467190E-001; t = t * i; } else if (fa >= 0.7) { i = 1.0 - fa; t = 1.17786911519331130E-002; t = t * i + 3.89046747413522300E-002; t = t * i + 5.90045711362049900E-002; t = t * i + 6.02143305254344420E-002; t = t * i + 5.61652708964839180E-002; t = t * i + 5.75052755193461370E-002; t = t * i + 6.21061973447320710E-002; t = t * i + 6.67614724532521880E-002; t = t * i + 7.14856037245421020E-002; t = t * i + 7.69311251313347100E-002; t = t * i + 8.33503129714946310E-002; t = t * i + 9.09538288991182800E-002; t = t * i + 1.00099591546322310E-001; t = t * i + 1.11334278141734510E-001; t = t * i + 1.25509666613462880E-001; t = t * i + 1.44049896457704160E-001; t = t * i + 1.69557177031481600E-001; t = t * i + 2.07385551032182120E-001; t = t * i + 2.70580808427600350E-001; t = t * i + 4.00685634386517050E-001; t = t * i + 8.22467033424113540E-001; t = t * i + 5.77215664901532870E-001; t = t * i; } else { t = -9.04051686831357990E-008; t = t * fa + 7.06814224969349250E-007; t = t * fa - 3.80702154637902830E-007; t = t * fa - 2.12880892189316100E-005; t = t * fa + 1.29108470307156190E-004; t = t * fa - 2.15932815215386580E-004; t = t * fa - 1.16484324388538480E-003; t = t * fa + 7.21883433044470670E-003; t = t * fa - 9.62194579514229560E-003; t = t * fa - 4.21977386992884450E-002; t = t * fa + 1.66538611813682460E-001; t = t * fa - 4.20026350606819980E-002; t = t * fa - 6.55878071519427450E-001; t = t * fa + 5.77215664901523870E-001; t = t * fa; t = t * fa + fa; t = -log (t); } if (a >= 0.0) return t; if (fa < 1e-19) return -log(fa); i = floor(fa); if (fa == i) return 1.0 / (fa - i); /* a is an integer: return infinity */ i = rint (2.0 * fa); quot = static_cast(i); i = fa - 0.5 * i; i = i * CUDART_PI; if (quot & 1) { i = cos(i); } else { i = sin(i); } i = fabs(i); t = log(CUDART_PI / (i * fa)) - t; return t; } __func__(unsigned long long int __internal_host_nan_kernel(const char *s)) { unsigned long long i = 0; int c; int ovfl = 0; int invld = 0; if (s && (*s == '0')) { s++; if ((*s == 'x') || (*s == 'X')) { s++; while (*s == '0') s++; while (*s) { if (i > 0x0fffffffffffffffULL) { ovfl = 1; } c = (((*s) >= 'A') && ((*s) <= 'F')) ? (*s + 'a' - 'A') : (*s); if ((c >= 'a') && (c <= 'f')) { c = c - 'a' + 10; i = i * 16 + c; } else if ((c >= '0') && (c <= '9')) { c = c - '0'; i = i * 16 + c; } else { invld = 1; } s++; } } else { while (*s == '0') s++; while (*s) { if (i > 0x1fffffffffffffffULL) { ovfl = 1; } c = *s; if ((c >= '0') && (c <= '7')) { c = c - '0'; i = i * 8 + c; } else { invld = 1; } s++; } } } else if (s) { while (*s) { c = *s; if ((i > 1844674407370955161ULL) || ((i == 1844674407370955161ULL) && (c > '5'))) { ovfl = 1; } if ((c >= '0') && (c <= '9')) { c = c - '0'; i = i * 10 + c; } else { invld = 1; } s++; } } if (ovfl) { i = ~0ULL; } if (invld) { i = 0ULL; } i = (i & 0x000fffffffffffffULL) | 0x7ff8000000000000ULL; return i; } __func__(double nan(const char *tagp)) { unsigned long long l; double d; l = __internal_host_nan_kernel(tagp); memcpy(&d, &l, sizeof(double)); return d; } __func__(double __host_tgamma_kernel(double a)) { double t; t = - 4.4268934071252475E-010; t = t * a - 2.0266591846658954E-007; t = t * a + 1.1381211721119527E-006; t = t * a - 1.2507734816630748E-006; t = t * a - 2.0136501740408771E-005; t = t * a + 1.2805012607354486E-004; t = t * a - 2.1524140811527418E-004; t = t * a - 1.1651675459704604E-003; t = t * a + 7.2189432248466381E-003; t = t * a - 9.6219715326862632E-003; t = t * a - 4.2197734554722394E-002; t = t * a + 1.6653861138250356E-001; t = t * a - 4.2002635034105444E-002; t = t * a - 6.5587807152025712E-001; t = t * a + 5.7721566490153287E-001; t = t * a + 1.0000000000000000E+000; return t; } __func__(double __host_stirling_poly(double a)) { double x = 1.0 / a; double z = 0.0; z = + 8.3949872067208726e-004; z = z * x - 5.1717909082605919e-005; z = z * x - 5.9216643735369393e-004; z = z * x + 6.9728137583658571e-005; z = z * x + 7.8403922172006662e-004; z = z * x - 2.2947209362139917e-004; z = z * x - 2.6813271604938273e-003; z = z * x + 3.4722222222222220e-003; z = z * x + 8.3333333333333329e-002; z = z * x + 1.0000000000000000e+000; return z; } __func__(double __host_tgamma_stirling(double a)) { double z; double x; z = __host_stirling_poly (a); if (a < 142.0) { x = pow (a, a - 0.5); a = x * exp (-a); a = a * CUDART_SQRT_2PI; return a * z; } else if (a < 172.0) { x = pow (a, 0.5 * a - 0.25); a = x * exp (-a); a = a * CUDART_SQRT_2PI; a = a * z; return a * x; } else { return exp(1000.0); /* INF */ } } __func__(double tgamma(double a)) { double s, xx, x = a; if (__isnan(a)) { return a + a; } if (fabs(x) < 20.0) { if (x >= 0.0) { s = 1.0; xx = x; while (xx > 1.5) { xx = xx - 1.0; s = s * xx; } if (x >= 0.5) { xx = xx - 1.0; } xx = __host_tgamma_kernel (xx); if (x < 0.5) { xx = xx * x; } s = s / xx; } else { xx = x; s = xx; if (x == floor(x)) { return 0.0 / (x - floor(x)); } while (xx < -0.5) { xx = xx + 1.0; s = s * xx; } xx = __host_tgamma_kernel (xx); s = s * xx; s = 1.0 / s; } return s; } else { if (x >= 0.0) { return __host_tgamma_stirling (x); } else { double t; int quot; if (x == floor(x)) { return 0.0 / (x - floor(x)); } if (x < -185.0) { int negative; x = floor(x); negative = ((x - (2.0 * floor(0.5 * x))) == 1.0); return negative ? (-1.0 / 1e308 / 1e308) : CUDART_ZERO; } /* compute sin(pi*x) accurately */ xx = rint (2.0 * x); quot = static_cast(xx); xx = -0.5 * xx + x; xx = xx * CUDART_PI; if (quot & 1) { xx = cos (xx); } else { xx = sin (xx); } if (quot & 2) { xx = -xx; } x = fabs (x); s = exp (-x); t = x - 0.5; if (x > 140.0) t = 0.5 * t; t = pow (x, t); if (x > 140.0) s = s * t; s = s * __host_stirling_poly (x); s = s * x; s = s * xx; s = 1.0 / s; s = s * CUDART_SQRT_PIO2; s = s / t; return s; } } } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ /******************************************************************************* * * * HOST IMPLEMENTATION FOR FLOAT AND LONG DOUBLE ROUTINES FOR WINDOWS PLATFORM * * MAP FLOAT AND LONG DOUBLE ROUTINES TO DOUBLE ROUTINES * * * *******************************************************************************/ __func__(int __signbitl(const long double a)) { return __signbit(static_cast(a)); } __func__(int __signbitf(const float a)) { return __signbit(static_cast(a)); } __func__(int __finitel(const long double a)) { return __finite(static_cast(a)); } __func__(int __finitef(const float a)) { return __finite(static_cast(a)); } __func__(int __isinfl(const long double a)) { return __isinf(static_cast(a)); } __func__(int __isinff(const float a)) { return __isinf(static_cast(a)); } __func__(int __isnanl(const long double a)) { return __isnan(static_cast(a)); } __func__(int __isnanf(const float a)) { return __isnan(static_cast(a)); } #if (!defined(_MSC_VER) || _MSC_VER < 1800) __func__(float fmaxf(const float a, const float b)) { return static_cast(fmax(static_cast(a), static_cast(b))); } __func__(float fminf(const float a, const float b)) { return static_cast(fmin(static_cast(a), static_cast(b))); } __func__(float roundf(const float a)) { return static_cast(round(static_cast(a))); } __func__(long int lroundf(const float a)) { return lround(static_cast(a)); } __func__(long long int llroundf(const float a)) { return llround(static_cast(a)); } __func__(float truncf(const float a)) { return static_cast(trunc(static_cast(a))); } __func__(float rintf(const float a)) { return static_cast(rint(static_cast(a))); } __func__(float nearbyintf(const float a)) { return static_cast(nearbyint(static_cast(a))); } __func__(long int lrintf(const float a)) { return lrint(static_cast(a)); } __func__(long long int llrintf(const float a)) { return llrint(static_cast(a)); } __func__(float logbf(const float a)) { return static_cast(logb(static_cast(a))); } __func__(float scalblnf(const float a, const long int b)) { return static_cast(scalbln(static_cast(a), b)); } __func__(float log2f(const float a)) { return static_cast(log2(static_cast(a))); } __func__(float exp2f(const float a)) { return static_cast(exp2(static_cast(a))); } __func__(float acoshf(const float a)) { return static_cast(acosh(static_cast(a))); } __func__(float asinhf(const float a)) { return static_cast(asinh(static_cast(a))); } __func__(float atanhf(const float a)) { return static_cast(atanh(static_cast(a))); } __func__(float cbrtf(const float a)) { return static_cast(cbrt(static_cast(a))); } __func__(float expm1f(const float a)) { return static_cast(expm1(static_cast(a))); } __func__(float fdimf(const float a, const float b)) { return static_cast(fdim(static_cast(a), static_cast(b))); } __func__(float log1pf(const float a)) { return static_cast(log1p(static_cast(a))); } __func__(float scalbnf(const float a, const int b)) { return static_cast(scalbn(static_cast(a), b)); } __func__(float fmaf(const float a, const float b, const float c)) { return static_cast(fma(static_cast(a), static_cast(b), static_cast(c))); } __func__(int ilogbf(const float a)) { return ilogb(static_cast(a)); } __func__(float erff(const float a)) { return static_cast(erf(static_cast(a))); } __func__(float erfcf(const float a)) { return static_cast(erfc(static_cast(a))); } __func__(float lgammaf(const float a)) { return static_cast(lgamma(static_cast(a))); } __func__(float tgammaf(const float a)) { return static_cast(tgamma(static_cast(a))); } __func__(float remquof(const float a, const float b, int *quo)) { return static_cast(remquo(static_cast(a), static_cast(b), quo)); } __func__(float remainderf(const float a, const float b)) { return static_cast(remainder(static_cast(a), static_cast(b))); } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #if (defined _MSC_VER) && (_MSC_VER >= 1700) __func__(float j0f(const float a)) { return static_cast(_j0(static_cast(a))); } __func__(float j1f(const float a)) { return static_cast(_j1(static_cast(a))); } __func__(float jnf(const int n, const float a)) { return static_cast(_jn(n, static_cast(a))); } __func__(float y0f(const float a)) { return static_cast(_y0(static_cast(a))); } __func__(float y1f(const float a)) { return static_cast(_y1(static_cast(a))); } __func__(float ynf(const int n, const float a)) { return static_cast(_yn(n, static_cast(a))); } #endif /* (defined _MSC_VER) && (_MSC_VER >= 1700) */ /******************************************************************************* * * * HOST IMPLEMENTATION FOR FLOAT ROUTINES FOR WINDOWS PLATFORM * * * *******************************************************************************/ #if (!defined(_MSC_VER) || _MSC_VER < 1800) __func__(float copysignf(float a, const float b)) { unsigned int aa, bb; memcpy(&aa, &a, sizeof(float)); memcpy(&bb, &b, sizeof(float)); aa = (aa & ~0x80000000U) | (bb & 0x80000000U); memcpy(&a, &aa, sizeof(float)); return a; } __func__(float nextafterf(float a, const float b)) { unsigned int ia; unsigned int ib; memcpy(&ia, &a, sizeof(float)); memcpy(&ib, &b, sizeof(float)); if (__isnanf(a) || __isnanf(b)) return a + b; /*NaN*/ if (((ia | ib) << 1U) == 0U) return b; if (a == 0.0F) { return copysignf(1.401298464e-045F, b); /*crossover*/ } if ((a < b) && (a < 0.0F)) ia--; if ((a < b) && (a > 0.0F)) ia++; if ((a > b) && (a < 0.0F)) ia++; if ((a > b) && (a > 0.0F)) ia--; memcpy(&a, &ia, sizeof(float)); return a; } __func__(float nanf(const char *tagp)) { float f; unsigned int i; i = static_cast(__internal_host_nan_kernel(tagp)); i = (i & 0x007fffffU) | 0x7fc00000U; memcpy(&f, &i, sizeof(float)); return f; } #endif /* (!defined(_MSC_VER) || _MSC_VER < 1800) */ #endif /* _WIN32 */ /******************************************************************************* * * * HOST IMPLEMENTATION FOR DOUBLE AND FLOAT ROUTINES. ALL PLATFORMS * * * *******************************************************************************/ __func__(double rsqrt(const double a)) { return 1.0 / sqrt(a); } __func__(double rcbrt(const double a)) { double s, t; if (__isnan(a)) { return a + a; } if (a == 0.0 || __isinf(a)) { return 1.0 / a; } s = fabs(a); t = exp2(-CUDART_THIRD * log2(s)); /* initial approximation */ t = ((t*t) * (-s*t) + 1.0) * (CUDART_THIRD*t) + t;/* refine approximation */ #if defined(__APPLE__) if (__signbitd(a)) #else /* __APPLE__ */ if (__signbit(a)) #endif /* __APPLE__ */ { t = -t; } return t; } __func__(double sinpi(double a)) { int n; if (__isnan(a)) { return a + a; } if (a == 0.0 || __isinf(a)) { return sin (a); } if (a == floor(a)) { return ((a / 1.0e308) / 1.0e308) / 1.0e308; } double twoa = a + a; double rtwoa = round(twoa); long long int l = (long long int)rtwoa; n = (int)l; a -= rtwoa * 0.5; a = a * CUDART_PI; if (n & 1) { a = cos (a); } else { a = sin (a); } if (n & 2) { a = -a; } return a; } __func__(double cospi(double a)) { int n; if (__isnan(a)) { return a + a; } if (__isinf(a)) { return cos (a); } if (fabs(a) > 9.0071992547409920e+015) { a = 0.0; } double twoa = a + a; double rtwoa = round(twoa); long long int l = (long long int)rtwoa; n = (int)l; a -= rtwoa * 0.5; a = a * CUDART_PI; n++; if (n & 1) { a = cos (a); } else { a = sin (a); } if (n & 2) { a = -a; } if (a == 0.0) { a = fabs(a); } return a; } __func__(void sincospi(const double a, double *sptr, double *cptr)) { *sptr = sinpi(a); *cptr = cospi(a); } __func__(double erfinv(const double a)) { double p, q, t, fa; unsigned long long int l; fa = fabs(a); if (fa >= 1.0) { l = 0xfff8000000000000ULL; memcpy(&t, &l, sizeof(double)); /* INDEFINITE */ if (fa == 1.0) { t = a * exp(1000.0); /* Infinity */ } } else if (fa >= 0.9375) { /* Based on: J.M. Blair, C.A. Edwards, J.H. Johnson: Rational Chebyshev Approximations for the Inverse of the Error Function. Mathematics of Computation, Vol. 30, No. 136 (Oct. 1976), pp. 827-830. Table 59 */ t = log1p(-fa); t = 1.0 / sqrt(-t); p = 2.7834010353747001060e-3; p = p * t + 8.6030097526280260580e-1; p = p * t + 2.1371214997265515515e+0; p = p * t + 3.1598519601132090206e+0; p = p * t + 3.5780402569085996758e+0; p = p * t + 1.5335297523989890804e+0; p = p * t + 3.4839207139657522572e-1; p = p * t + 5.3644861147153648366e-2; p = p * t + 4.3836709877126095665e-3; p = p * t + 1.3858518113496718808e-4; p = p * t + 1.1738352509991666680e-6; q = t + 2.2859981272422905412e+0; q = q * t + 4.3859045256449554654e+0; q = q * t + 4.6632960348736635331e+0; q = q * t + 3.9846608184671757296e+0; q = q * t + 1.6068377709719017609e+0; q = q * t + 3.5609087305900265560e-1; q = q * t + 5.3963550303200816744e-2; q = q * t + 4.3873424022706935023e-3; q = q * t + 1.3858762165532246059e-4; q = q * t + 1.1738313872397777529e-6; t = p / (q * t); if (a < 0.0) t = -t; } else if (fa >= 0.75) { /* Based on: J.M. Blair, C.A. Edwards, J.H. Johnson: Rational Chebyshev Approximations for the Inverse of the Error Function. Mathematics of Computation, Vol. 30, No. 136 (Oct. 1976), pp. 827-830. Table 39 */ t = a * a - .87890625; p = .21489185007307062000e+0; p = p * t - .64200071507209448655e+1; p = p * t + .29631331505876308123e+2; p = p * t - .47644367129787181803e+2; p = p * t + .34810057749357500873e+2; p = p * t - .12954198980646771502e+2; p = p * t + .25349389220714893917e+1; p = p * t - .24758242362823355486e+0; p = p * t + .94897362808681080020e-2; q = t - .12831383833953226499e+2; q = q * t + .41409991778428888716e+2; q = q * t - .53715373448862143349e+2; q = q * t + .33880176779595142685e+2; q = q * t - .11315360624238054876e+2; q = q * t + .20369295047216351160e+1; q = q * t - .18611650627372178511e+0; q = q * t + .67544512778850945940e-2; p = p / q; t = a * p; } else { /* Based on: J.M. Blair, C.A. Edwards, J.H. Johnson: Rational Chebyshev Approximations for the Inverse of the Error Function. Mathematics of Computation, Vol. 30, No. 136 (Oct. 1976), pp. 827-830. Table 18 */ t = a * a - .5625; p = - .23886240104308755900e+2; p = p * t + .45560204272689128170e+3; p = p * t - .22977467176607144887e+4; p = p * t + .46631433533434331287e+4; p = p * t - .43799652308386926161e+4; p = p * t + .19007153590528134753e+4; p = p * t - .30786872642313695280e+3; q = t - .83288327901936570000e+2; q = q * t + .92741319160935318800e+3; q = q * t - .35088976383877264098e+4; q = q * t + .59039348134843665626e+4; q = q * t - .48481635430048872102e+4; q = q * t + .18997769186453057810e+4; q = q * t - .28386514725366621129e+3; p = p / q; t = a * p; } return t; } __func__(double erfcinv(const double a)) { double t; unsigned long long int l; if (__isnan(a)) { return a + a; } if (a <= 0.0) { l = 0xfff8000000000000ULL; memcpy(&t, &l, sizeof(double)); /* INDEFINITE */ if (a == 0.0) { t = (1.0 - a) * exp(1000.0); /* Infinity */ } } else if (a >= 0.0625) { t = erfinv (1.0 - a); } else if (a >= 1e-100) { /* Based on: J.M. Blair, C.A. Edwards, J.H. Johnson: Rational Chebyshev Approximations for the Inverse of the Error Function. Mathematics of Computation, Vol. 30, No. 136 (Oct. 1976), pp. 827-830. Table 59 */ double p, q; t = log(a); t = 1.0 / sqrt(-t); p = 2.7834010353747001060e-3; p = p * t + 8.6030097526280260580e-1; p = p * t + 2.1371214997265515515e+0; p = p * t + 3.1598519601132090206e+0; p = p * t + 3.5780402569085996758e+0; p = p * t + 1.5335297523989890804e+0; p = p * t + 3.4839207139657522572e-1; p = p * t + 5.3644861147153648366e-2; p = p * t + 4.3836709877126095665e-3; p = p * t + 1.3858518113496718808e-4; p = p * t + 1.1738352509991666680e-6; q = t + 2.2859981272422905412e+0; q = q * t + 4.3859045256449554654e+0; q = q * t + 4.6632960348736635331e+0; q = q * t + 3.9846608184671757296e+0; q = q * t + 1.6068377709719017609e+0; q = q * t + 3.5609087305900265560e-1; q = q * t + 5.3963550303200816744e-2; q = q * t + 4.3873424022706935023e-3; q = q * t + 1.3858762165532246059e-4; q = q * t + 1.1738313872397777529e-6; t = p / (q * t); } else { /* Based on: J.M. Blair, C.A. Edwards, J.H. Johnson: Rational Chebyshev Approximations for the Inverse of the Error Function. Mathematics of Computation, Vol. 30, No. 136 (Oct. 1976), pp. 827-830. Table 82 */ double p, q; t = log(a); t = 1.0 / sqrt(-t); p = 6.9952990607058154858e-1; p = p * t + 1.9507620287580568829e+0; p = p * t + 8.2810030904462690216e-1; p = p * t + 1.1279046353630280005e-1; p = p * t + 6.0537914739162189689e-3; p = p * t + 1.3714329569665128933e-4; p = p * t + 1.2964481560643197452e-6; p = p * t + 4.6156006321345332510e-9; p = p * t + 4.5344689563209398450e-12; q = t + 1.5771922386662040546e+0; q = q * t + 2.1238242087454993542e+0; q = q * t + 8.4001814918178042919e-1; q = q * t + 1.1311889334355782065e-1; q = q * t + 6.0574830550097140404e-3; q = q * t + 1.3715891988350205065e-4; q = q * t + 1.2964671850944981713e-6; q = q * t + 4.6156017600933592558e-9; q = q * t + 4.5344687377088206783e-12; t = p / (q * t); } return t; } __func__(double normcdfinv(const double a)) { return -1.4142135623730951 * erfcinv(a + a); } __func__(double normcdf(double a)) { double ah, al, t1, t2, u1, u2, v1, v2, z; if (fabs (a) > 38.5) a = copysign (38.5, a); ah = a * 134217729.0; u1 = (a - ah) + ah; u2 = a - u1; v1 = -7.0710678398609161e-01; v2 = 2.7995440410322203e-09; t1 = a * -CUDART_SQRT_HALF_HI; t2 = (((u1 * v1 - t1) + u1 * v2) + u2 * v1) + u2 * v2; t2 = (a * -CUDART_SQRT_HALF_LO) + t2; ah = t1 + t2; z = erfc (ah); if (a < -1.0) { al = (t1 - ah) + t2; t1 = -2.0 * ah * z; z = t1 * al + z; } return 0.5 * z; } __func__(double erfcx(const double a)) { double x, t1, t2, t3; if (__isnan(a)) { return a + a; } x = fabs(a); if (x < 32.0) { /* * This implementation of erfcx() is based on the algorithm in: M. M. * Shepherd and J. G. Laframboise, "Chebyshev Approximation of (1 + 2x) * exp(x^2)erfc x in 0 <= x < INF", Mathematics of Computation, Vol. * 36, No. 153, January 1981, pp. 249-253. For the core approximation, * the input domain [0,INF] is transformed via (x-k) / (x+k) where k is * a precision-dependent constant. Here, we choose k = 4.0, so the input * domain [0, 27.3] is transformed into the core approximation domain * [-1, 0.744409]. */ /* // Compute (1+2*x)*exp(x*x)*erfc(x) */ /* t2 = (x-4.0)/(x+4.0), transforming [0,INF] to [-1,+1] */ t1 = x - 4.0; t2 = x + 4.0; t2 = t1 / t2; /* approximate on [-1, 0.744409] */ t1 = - 3.5602694826817400E-010; t1 = t1 * t2 - 9.7239122591447274E-009; t1 = t1 * t2 - 8.9350224851649119E-009; t1 = t1 * t2 + 1.0404430921625484E-007; t1 = t1 * t2 + 5.8806698585341259E-008; t1 = t1 * t2 - 8.2147414929116908E-007; t1 = t1 * t2 + 3.0956409853306241E-007; t1 = t1 * t2 + 5.7087871844325649E-006; t1 = t1 * t2 - 1.1231787437600085E-005; t1 = t1 * t2 - 2.4399558857200190E-005; t1 = t1 * t2 + 1.5062557169571788E-004; t1 = t1 * t2 - 1.9925637684786154E-004; t1 = t1 * t2 - 7.5777429182785833E-004; t1 = t1 * t2 + 5.0319698792599572E-003; t1 = t1 * t2 - 1.6197733895953217E-002; t1 = t1 * t2 + 3.7167515553018733E-002; t1 = t1 * t2 - 6.6330365827532434E-002; t1 = t1 * t2 + 9.3732834997115544E-002; t1 = t1 * t2 - 1.0103906603555676E-001; t1 = t1 * t2 + 6.8097054254735140E-002; t1 = t1 * t2 + 1.5379652102605428E-002; t1 = t1 * t2 - 1.3962111684056291E-001; t1 = t1 * t2 + 1.2329951186255526E+000; /* // Note: (1+2*x)*exp(x*x)*erfc(x) / (1+2*x) = exp(x*x)*erfc(x) */ t2 = 2.0 * x + 1.0; t1 = t1 / t2; } else { /* asymptotic expansion for large aguments */ t2 = 1.0 / x; t3 = t2 * t2; t1 = -29.53125; t1 = t1 * t3 + 6.5625; t1 = t1 * t3 - 1.875; t1 = t1 * t3 + 0.75; t1 = t1 * t3 - 0.5; t1 = t1 * t3 + 1.0; t2 = t2 * 5.6418958354775628e-001; t1 = t1 * t2; } if (a < 0.0) { /* // Note: erfcx(x) = 2*exp(x^2) - erfcx(|x|) */ t2 = (static_cast(x * 16.0)) * 0.0625; t3 = (x - t2) * (x + t2); t3 = exp(t2 * t2) * exp(t3); t3 = t3 + t3; t1 = t3 - t1; } return t1; } __func__(float rsqrtf(const float a)) { return static_cast(rsqrt(static_cast(a))); } __func__(float rcbrtf(const float a)) { return static_cast(rcbrt(static_cast(a))); } __func__(float sinpif(const float a)) { return static_cast(sinpi(static_cast(a))); } __func__(float cospif(const float a)) { return static_cast(cospi(static_cast(a))); } __func__(void sincospif(const float a, float *sptr, float *cptr)) { double s, c; sincospi(static_cast(a), &s, &c); *sptr = static_cast(s); *cptr = static_cast(c); } __func__(float erfinvf(const float a)) { return static_cast(erfinv(static_cast(a))); } __func__(float erfcinvf(const float a)) { return static_cast(erfcinv(static_cast(a))); } __func__(float normcdfinvf(const float a)) { return static_cast(normcdfinv(static_cast(a))); } __func__(float normcdff(const float a)) { return static_cast(normcdf(static_cast(a))); } __func__(float erfcxf(const float a)) { return static_cast(erfcx(static_cast(a))); } #if defined(_WIN32) #pragma warning (pop) #endif /* _WIN32 */ #endif /* !__CUDACC__ */ #endif /* !__MATH_FUNCTIONS_HPP__ */ #if defined(__UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_MATH_FUNCTIONS_HPP__) #undef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #undef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_MATH_FUNCTIONS_HPP__ #endif