/*
* Copyright 1993-2021 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
//NOTE: For NVRTC, these declarations have been moved into the compiler (to reduce compile time)
#define EXCLUDE_FROM_RTC
#if !defined(__SM_20_INTRINSICS_H__)
#define __SM_20_INTRINSICS_H__
#if defined(__CUDACC_RTC__)
#define __SM_20_INTRINSICS_DECL__ __device__
#else /* __CUDACC_RTC__ */
#define __SM_20_INTRINSICS_DECL__ static __inline__ __device__
#endif /* __CUDACC_RTC__ */
#if defined(__cplusplus) && defined(__CUDACC__)
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
#include "cuda_runtime_api.h"
#if !defined(__CUDA_ARCH__) && !defined(_NVHPC_CUDA)
#define __DEF_IF_HOST { }
#else /* !__CUDA_ARCH__ && !_NVHPC_CUDA */
#define __DEF_IF_HOST ;
#endif /* __CUDA_ARCH__ || _NVHPC_CUDA */
#if defined(_WIN32)
# define __DEPRECATED__(msg) __declspec(deprecated(msg))
#elif (defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5 && !defined(__clang__))))
# define __DEPRECATED__(msg) __attribute__((deprecated))
#else
# define __DEPRECATED__(msg) __attribute__((deprecated(msg)))
#endif
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
#define __WSB_DEPRECATION_MESSAGE(x) #x"() is not valid on compute_70 and above, and should be replaced with "#x"_sync()."\
"To continue using "#x"(), specify virtual architecture compute_60 when targeting sm_70 and above, for example, using the pair of compiler options: -arch=compute_60 -code=sm_70."
#elif defined(_NVHPC_CUDA)
#define __WSB_DEPRECATION_MESSAGE(x) #x"() is not valid on cc70 and above, and should be replaced with "#x"_sync()."
#else
#define __WSB_DEPRECATION_MESSAGE(x) #x"() is deprecated in favor of "#x"_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."
#endif
extern "C"
{
extern __device__ __device_builtin__ void __threadfence_system(void);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Divide two floating-point values in round-to-nearest-even mode.
*
* Divides two floating-point values \p x by \p y in round-to-nearest-even mode.
*
* \return Returns \p x / \p y.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __ddiv_rn(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Divide two floating-point values in round-towards-zero mode.
*
* Divides two floating-point values \p x by \p y in round-towards-zero mode.
*
* \return Returns \p x / \p y.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __ddiv_rz(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Divide two floating-point values in round-up mode.
*
* Divides two floating-point values \p x by \p y in round-up (to positive infinity) mode.
*
* \return Returns \p x / \p y.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __ddiv_ru(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Divide two floating-point values in round-down mode.
*
* Divides two floating-point values \p x by \p y in round-down (to negative infinity) mode.
*
* \return Returns \p x / \p y.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __ddiv_rd(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
*
* \endxmlonly
* in round-to-nearest-even mode.
*
* Compute the reciprocal of \p x in round-to-nearest-even mode.
*
* \return Returns
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __drcp_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
*
* \endxmlonly
* in round-towards-zero mode.
*
* Compute the reciprocal of \p x in round-towards-zero mode.
*
* \return Returns
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __drcp_rz(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
*
* \endxmlonly
* in round-up mode.
*
* Compute the reciprocal of \p x in round-up (to positive infinity) mode.
*
* \return Returns
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __drcp_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
*
* \endxmlonly
* in round-down mode.
*
* Compute the reciprocal of \p x in round-down (to negative infinity) mode.
*
* \return Returns
* \latexonly $\frac{1}{x}$ \endlatexonly
* \xmlonly
*
*
*
* 1
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __drcp_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
*
* \endxmlonly
* in round-to-nearest-even mode.
*
* Compute the square root of \p x in round-to-nearest-even mode.
*
* \return Returns
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __dsqrt_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
*
* \endxmlonly
* in round-towards-zero mode.
*
* Compute the square root of \p x in round-towards-zero mode.
*
* \return Returns
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __dsqrt_rz(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
*
* \endxmlonly
* in round-up mode.
*
* Compute the square root of \p x in round-up (to positive infinity) mode.
*
* \return Returns
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __dsqrt_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
*
* \endxmlonly
* in round-down mode.
*
* Compute the square root of \p x in round-down (to negative infinity) mode.
*
* \return Returns
* \latexonly $\sqrt{x}$ \endlatexonly
* \xmlonly
*
*
*
* x
*
*
* \endxmlonly.
*
* \note_accuracy_double
* \note_requires_fermi
*/
extern __device__ __device_builtin__ double __dsqrt_rd(double x);
extern __device__ __device_builtin__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__ballot)) unsigned int __ballot(int);
extern __device__ __device_builtin__ int __syncthreads_count(int);
extern __device__ __device_builtin__ int __syncthreads_and(int);
extern __device__ __device_builtin__ int __syncthreads_or(int);
extern __device__ __device_builtin__ long long int clock64(void);
/**
* \ingroup CUDA_MATH_INTRINSIC_SINGLE
* \brief Compute fused multiply-add operation in round-to-nearest-even mode, ignore \p -ftz=true compiler flag
*
* Behavior is the same as ::__fmaf_rn(\p x, \p y, \p z), the difference is in
* handling denormalized inputs and outputs: \p -ftz compiler flag has no effect.
*/
extern __device__ __device_builtin__ float __fmaf_ieee_rn(float x, float y, float z);
/**
* \ingroup CUDA_MATH_INTRINSIC_SINGLE
* \brief Compute fused multiply-add operation in round-down mode, ignore \p -ftz=true compiler flag
*
* Behavior is the same as ::__fmaf_rd(\p x, \p y, \p z), the difference is in
* handling denormalized inputs and outputs: \p -ftz compiler flag has no effect.
*/
extern __device__ __device_builtin__ float __fmaf_ieee_rd(float x, float y, float z);
/**
* \ingroup CUDA_MATH_INTRINSIC_SINGLE
* \brief Compute fused multiply-add operation in round-up mode, ignore \p -ftz=true compiler flag
*
* Behavior is the same as ::__fmaf_ru(\p x, \p y, \p z), the difference is in
* handling denormalized inputs and outputs: \p -ftz compiler flag has no effect.
*/
extern __device__ __device_builtin__ float __fmaf_ieee_ru(float x, float y, float z);
/**
* \ingroup CUDA_MATH_INTRINSIC_SINGLE
* \brief Compute fused multiply-add operation in round-towards-zero mode, ignore \p -ftz=true compiler flag
*
* Behavior is the same as ::__fmaf_rz(\p x, \p y, \p z), the difference is in
* handling denormalized inputs and outputs: \p -ftz compiler flag has no effect.
*/
extern __device__ __device_builtin__ float __fmaf_ieee_rz(float x, float y, float z);
// SM_13 intrinsics
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Reinterpret bits in a double as a 64-bit signed integer.
*
* Reinterpret the bits in the double-precision floating-point value \p x
* as a signed 64-bit integer.
* \return Returns reinterpreted value.
*/
extern __device__ __device_builtin__ long long int __double_as_longlong(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Reinterpret bits in a 64-bit signed integer as a double.
*
* Reinterpret the bits in the 64-bit signed integer value \p x as
* a double-precision floating-point value.
* \return Returns reinterpreted value.
*/
extern __device__ __device_builtin__ double __longlong_as_double(long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation in round-to-nearest-even mode.
*
* Computes the value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single ternary operation, rounding the
* result once in round-to-nearest-even mode.
*
* \return Returns the rounded value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation.
* - fmaf(
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* ,
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* ,
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(\p x, \p y,
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* .
* - fmaf(\p x, \p y,
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* .
*
* \note_accuracy_double
*/
extern __device__ __device_builtin__ double __fma_rn(double x, double y, double z);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation in round-towards-zero mode.
*
* Computes the value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single ternary operation, rounding the
* result once in round-towards-zero mode.
*
* \return Returns the rounded value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation.
* - fmaf(
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* ,
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* ,
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(\p x, \p y,
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* .
* - fmaf(\p x, \p y,
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* .
*
* \note_accuracy_double
*/
extern __device__ __device_builtin__ double __fma_rz(double x, double y, double z);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation in round-up mode.
*
* Computes the value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single ternary operation, rounding the
* result once in round-up (to positive infinity) mode.
*
* \return Returns the rounded value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation.
* - fmaf(
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* ,
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* ,
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(\p x, \p y,
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* .
* - fmaf(\p x, \p y,
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* .
*
* \note_accuracy_double
*/
extern __device__ __device_builtin__ double __fma_ru(double x, double y, double z);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Compute
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation in round-down mode.
*
* Computes the value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single ternary operation, rounding the
* result once in round-down (to negative infinity) mode.
*
* \return Returns the rounded value of
* \latexonly $x \times y + z$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
* +
* z
*
*
* \endxmlonly
* as a single operation.
* - fmaf(
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* ,
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(
* \latexonly $\pm 0$ \endlatexonly
* \xmlonly
*
*
* ±
* 0
*
*
* \endxmlonly
* ,
* \latexonly $\pm \infty$ \endlatexonly
* \xmlonly
*
*
* ±
* ∞
*
*
* \endxmlonly
* , \p z) returns NaN.
* - fmaf(\p x, \p y,
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* .
* - fmaf(\p x, \p y,
* \latexonly $+\infty$ \endlatexonly
* \xmlonly
*
*
* +
* ∞
*
*
* \endxmlonly
* ) returns NaN if
* \latexonly $x \times y$ \endlatexonly
* \xmlonly
*
*
* x
* ×
* y
*
*
* \endxmlonly
* is an exact
* \latexonly $-\infty$ \endlatexonly
* \xmlonly
*
*
* -
* ∞
*
*
* \endxmlonly
* .
*
* \note_accuracy_double
*/
extern __device__ __device_builtin__ double __fma_rd(double x, double y, double z);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Add two floating-point values in round-to-nearest-even mode.
*
* Adds two floating-point values \p x and \p y in round-to-nearest-even mode.
*
* \return Returns \p x + \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dadd_rn(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Add two floating-point values in round-towards-zero mode.
*
* Adds two floating-point values \p x and \p y in round-towards-zero mode.
*
* \return Returns \p x + \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dadd_rz(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Add two floating-point values in round-up mode.
*
* Adds two floating-point values \p x and \p y in round-up (to positive infinity) mode.
*
* \return Returns \p x + \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dadd_ru(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Add two floating-point values in round-down mode.
*
* Adds two floating-point values \p x and \p y in round-down (to negative infinity) mode.
*
* \return Returns \p x + \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dadd_rd(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Subtract two floating-point values in round-to-nearest-even mode.
*
* Subtracts two floating-point values \p x and \p y in round-to-nearest-even mode.
*
* \return Returns \p x - \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dsub_rn(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Subtract two floating-point values in round-towards-zero mode.
*
* Subtracts two floating-point values \p x and \p y in round-towards-zero mode.
*
* \return Returns \p x - \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dsub_rz(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Subtract two floating-point values in round-up mode.
*
* Subtracts two floating-point values \p x and \p y in round-up (to positive infinity) mode.
*
* \return Returns \p x - \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dsub_ru(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Subtract two floating-point values in round-down mode.
*
* Subtracts two floating-point values \p x and \p y in round-down (to negative infinity) mode.
*
* \return Returns \p x - \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dsub_rd(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Multiply two floating-point values in round-to-nearest-even mode.
*
* Multiplies two floating-point values \p x and \p y in round-to-nearest-even mode.
*
* \return Returns \p x * \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dmul_rn(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Multiply two floating-point values in round-towards-zero mode.
*
* Multiplies two floating-point values \p x and \p y in round-towards-zero mode.
*
* \return Returns \p x * \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dmul_rz(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Multiply two floating-point values in round-up mode.
*
* Multiplies two floating-point values \p x and \p y in round-up (to positive infinity) mode.
*
* \return Returns \p x * \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dmul_ru(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_DOUBLE
* \brief Multiply two floating-point values in round-down mode.
*
* Multiplies two floating-point values \p x and \p y in round-down (to negative infinity) mode.
*
* \return Returns \p x * \p y.
*
* \note_accuracy_double
* \note_nofma
*/
extern __device__ __device_builtin__ double __dmul_rd(double x, double y);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a float in round-to-nearest-even mode.
*
* Convert the double-precision floating-point value \p x to a single-precision
* floating-point value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ float __double2float_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a float in round-towards-zero mode.
*
* Convert the double-precision floating-point value \p x to a single-precision
* floating-point value in round-towards-zero mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ float __double2float_rz(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a float in round-up mode.
*
* Convert the double-precision floating-point value \p x to a single-precision
* floating-point value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ float __double2float_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a float in round-down mode.
*
* Convert the double-precision floating-point value \p x to a single-precision
* floating-point value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ float __double2float_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed int in round-to-nearest-even mode.
*
* Convert the double-precision floating-point value \p x to a
* signed integer value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ int __double2int_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed int in round-up mode.
*
* Convert the double-precision floating-point value \p x to a
* signed integer value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ int __double2int_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed int in round-down mode.
*
* Convert the double-precision floating-point value \p x to a
* signed integer value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ int __double2int_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned int in round-to-nearest-even mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned integer value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned int __double2uint_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned int in round-up mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned integer value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned int __double2uint_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned int in round-down mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned integer value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned int __double2uint_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed 64-bit int in round-to-nearest-even mode.
*
* Convert the double-precision floating-point value \p x to a
* signed 64-bit integer value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ long long int __double2ll_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed 64-bit int in round-up mode.
*
* Convert the double-precision floating-point value \p x to a
* signed 64-bit integer value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ long long int __double2ll_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to a signed 64-bit int in round-down mode.
*
* Convert the double-precision floating-point value \p x to a
* signed 64-bit integer value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ long long int __double2ll_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned 64-bit int in round-to-nearest-even mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned 64-bit integer value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned long long int __double2ull_rn(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned 64-bit int in round-up mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned 64-bit integer value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned long long int __double2ull_ru(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a double to an unsigned 64-bit int in round-down mode.
*
* Convert the double-precision floating-point value \p x to an
* unsigned 64-bit integer value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ unsigned long long int __double2ull_rd(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a signed int to a double.
*
* Convert the signed integer value \p x to a double-precision floating-point value.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __int2double_rn(int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert an unsigned int to a double.
*
* Convert the unsigned integer value \p x to a double-precision floating-point value.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __uint2double_rn(unsigned int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a signed 64-bit int to a double in round-to-nearest-even mode.
*
* Convert the signed 64-bit integer value \p x to a double-precision floating-point
* value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ll2double_rn(long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a signed 64-bit int to a double in round-towards-zero mode.
*
* Convert the signed 64-bit integer value \p x to a double-precision floating-point
* value in round-towards-zero mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ll2double_rz(long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a signed 64-bit int to a double in round-up mode.
*
* Convert the signed 64-bit integer value \p x to a double-precision floating-point
* value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ll2double_ru(long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert a signed 64-bit int to a double in round-down mode.
*
* Convert the signed 64-bit integer value \p x to a double-precision floating-point
* value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ll2double_rd(long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert an unsigned 64-bit int to a double in round-to-nearest-even mode.
*
* Convert the unsigned 64-bit integer value \p x to a double-precision floating-point
* value in round-to-nearest-even mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ull2double_rn(unsigned long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert an unsigned 64-bit int to a double in round-towards-zero mode.
*
* Convert the unsigned 64-bit integer value \p x to a double-precision floating-point
* value in round-towards-zero mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ull2double_rz(unsigned long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert an unsigned 64-bit int to a double in round-up mode.
*
* Convert the unsigned 64-bit integer value \p x to a double-precision floating-point
* value in round-up (to positive infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ull2double_ru(unsigned long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Convert an unsigned 64-bit int to a double in round-down mode.
*
* Convert the unsigned 64-bit integer value \p x to a double-precision floating-point
* value in round-down (to negative infinity) mode.
* \return Returns converted value.
*/
extern __device__ __device_builtin__ double __ull2double_rd(unsigned long long int x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Reinterpret high 32 bits in a double as a signed integer.
*
* Reinterpret the high 32 bits in the double-precision floating-point value \p x
* as a signed integer.
* \return Returns reinterpreted value.
*/
extern __device__ __device_builtin__ int __double2hiint(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Reinterpret low 32 bits in a double as a signed integer.
*
* Reinterpret the low 32 bits in the double-precision floating-point value \p x
* as a signed integer.
* \return Returns reinterpreted value.
*/
extern __device__ __device_builtin__ int __double2loint(double x);
/**
* \ingroup CUDA_MATH_INTRINSIC_CAST
* \brief Reinterpret high and low 32-bit integer values as a double.
*
* Reinterpret the integer value of \p hi as the high 32 bits of a
* double-precision floating-point value and the integer value of \p lo
* as the low 32 bits of the same double-precision floating-point value.
* \return Returns reinterpreted value.
*/
extern __device__ __device_builtin__ double __hiloint2double(int hi, int lo);
}
/*******************************************************************************
* *
* *
* *
*******************************************************************************/
__SM_20_INTRINSICS_DECL__ __DEPRECATED__(__WSB_DEPRECATION_MESSAGE(__ballot)) unsigned int ballot(bool pred) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ int syncthreads_count(bool pred) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ bool syncthreads_and(bool pred) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ bool syncthreads_or(bool pred) __DEF_IF_HOST
#undef __DEPRECATED__
#undef __WSB_DEPRECATION_MESSAGE
__SM_20_INTRINSICS_DECL__ unsigned int __isGlobal(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ unsigned int __isShared(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ unsigned int __isConstant(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ unsigned int __isLocal(const void *ptr) __DEF_IF_HOST
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)
__SM_20_INTRINSICS_DECL__ unsigned int __isGridConstant(const void *ptr) __DEF_IF_HOST
#endif /* !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700) */
__SM_20_INTRINSICS_DECL__ size_t __cvta_generic_to_global(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ size_t __cvta_generic_to_shared(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ size_t __cvta_generic_to_constant(const void *ptr) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ size_t __cvta_generic_to_local(const void *ptr) __DEF_IF_HOST
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)
__SM_20_INTRINSICS_DECL__ size_t __cvta_generic_to_grid_constant(const void *ptr) __DEF_IF_HOST
#endif /* !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700) */
__SM_20_INTRINSICS_DECL__ void * __cvta_global_to_generic(size_t rawbits) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ void * __cvta_shared_to_generic(size_t rawbits) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ void * __cvta_constant_to_generic(size_t rawbits) __DEF_IF_HOST
__SM_20_INTRINSICS_DECL__ void * __cvta_local_to_generic(size_t rawbits) __DEF_IF_HOST
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700)
__SM_20_INTRINSICS_DECL__ void * __cvta_grid_constant_to_generic(size_t rawbits) __DEF_IF_HOST
#endif /* !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 700) */
#endif /* __cplusplus && __CUDACC__ */
#undef __DEF_IF_HOST
#undef __SM_20_INTRINSICS_DECL__
#if (!defined(__CUDACC_RTC__) && defined(__CUDA_ARCH__)) || defined(_NVHPC_CUDA)
#include "sm_20_intrinsics.hpp"
#endif /* (!__CUDACC_RTC__ && __CUDA_ARCH__) || _NVHPC_CUDA */
#endif /* !__SM_20_INTRINSICS_H__ */
#undef EXCLUDE_FROM_RTC