/* * Copyright 2011-2023 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #if !defined(_CUPTI_PROFILER_TARGET_H_) #define _CUPTI_PROFILER_TARGET_H_ #include #include #include #include #ifdef __cplusplus extern "C" { #endif #if defined(__GNUC__) && defined(CUPTI_LIB) #pragma GCC visibility push(default) #endif /** * \defgroup CUPTI_PROFILER_API CUPTI Profiling API * Functions, types, and enums that implement the CUPTI Profiling API. * @{ */ #ifndef CUPTI_PROFILER_STRUCT_SIZE #define CUPTI_PROFILER_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_)) #endif /** * \brief Profiler range attribute * * A metric enabled in the session's configuration is collected separately per unique range-stack in the pass. * This is an attribute to collect metrics around each kernel in a profiling session or in an user defined range. */ typedef enum { /** * Invalid value */ CUPTI_Range_INVALID, /** * Ranges are auto defined around each kernel in a profiling session */ CUPTI_AutoRange, /** * A range in which metric data to be collected is defined by the user */ CUPTI_UserRange, /** * Range count */ CUPTI_Range_COUNT, } CUpti_ProfilerRange; /** * \brief Profiler replay attribute * * For metrics which require multipass collection, a replay of the GPU kernel(s) is required. * This is an attribute which specify how the replay of the kernel(s) to be measured is done. */ typedef enum { /** * Invalid Value */ CUPTI_Replay_INVALID, /** * Replay is done by CUPTI user around the process */ CUPTI_ApplicationReplay, /** * Replay is done around kernel implicitly by CUPTI */ CUPTI_KernelReplay, /** * Replay is done by CUPTI user within a process */ CUPTI_UserReplay, /** * Replay count */ CUPTI_Replay_COUNT, } CUpti_ProfilerReplayMode; /** * \brief Default parameter for cuptiProfilerInitialize */ typedef struct CUpti_Profiler_Initialize_Params { size_t structSize; //!< [in] CUpti_Profiler_Initialize_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL } CUpti_Profiler_Initialize_Params; #define CUpti_Profiler_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Initialize_Params, pPriv) /** * \brief Default parameter for cuptiProfilerDeInitialize */ typedef struct CUpti_Profiler_DeInitialize_Params { size_t structSize; //!< [in] CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL } CUpti_Profiler_DeInitialize_Params; #define CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeInitialize_Params, pPriv) /** * \brief Initializes the profiler interface * * Loads the required libraries in the process address space. * Sets up the hooks with the CUDA driver. */ CUptiResult CUPTIAPI cuptiProfilerInitialize(CUpti_Profiler_Initialize_Params *pParams); /** * \brief DeInitializes the profiler interface */ CUptiResult CUPTIAPI cuptiProfilerDeInitialize(CUpti_Profiler_DeInitialize_Params *pParams); /** * \brief Input parameter to define the counterDataImage */ typedef struct CUpti_Profiler_CounterDataImageOptions { size_t structSize; //!< [in] CUpti_Profiler_CounterDataImageOptions_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL const uint8_t* pCounterDataPrefix; /**< [in] Address of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix(). Must be align(8).*/ size_t counterDataPrefixSize; //!< [in] Size of CounterDataPrefix generated from NVPW_CounterDataBuilder_GetCounterDataPrefix(). uint32_t maxNumRanges; //!< [in] Maximum number of ranges that can be profiled uint32_t maxNumRangeTreeNodes; //!< [in] Maximum number of RangeTree nodes; must be >= maxNumRanges uint32_t maxRangeNameLength; //!< [in] Maximum string length of each RangeName, including the trailing NULL character } CUpti_Profiler_CounterDataImageOptions; #define CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImageOptions, maxRangeNameLength) /** * \brief Params for cuptiProfilerCounterDataImageCalculateSize */ typedef struct CUpti_Profiler_CounterDataImage_CalculateSize_Params { size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options size_t counterDataImageSize; //!< [out] } CUpti_Profiler_CounterDataImage_CalculateSize_Params; #define CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateSize_Params, counterDataImageSize) /** * \brief Params for cuptiProfilerCounterDataImageInitialize */ typedef struct CUpti_Profiler_CounterDataImage_Initialize_Params { size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL size_t sizeofCounterDataImageOptions; //!< [in] CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE const CUpti_Profiler_CounterDataImageOptions* pOptions; //!< [in] Pointer to Counter Data Image Options size_t counterDataImageSize; //!< [in] Size calculated from cuptiProfilerCounterDataImageCalculateSize uint8_t* pCounterDataImage; //!< [in] The buffer to be initialized. } CUpti_Profiler_CounterDataImage_Initialize_Params; #define CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_Initialize_Params, pCounterDataImage) /** * \brief A CounterData image allocates space for values for each counter for each range. * * User borne the resposibility of managing the counterDataImage allocations. * CounterDataPrefix contains meta data about the metrics that will be stored in counterDataImage. * Use these APIs to calculate the allocation size and initialize counterData image. */ CUptiResult cuptiProfilerCounterDataImageCalculateSize(CUpti_Profiler_CounterDataImage_CalculateSize_Params* pParams); CUptiResult cuptiProfilerCounterDataImageInitialize(CUpti_Profiler_CounterDataImage_Initialize_Params* pParams); /** * \brief Params for cuptiProfilerCounterDataImageCalculateScratchBufferSize */ typedef struct CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params { size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize uint8_t* pCounterDataImage; //!< [in] size_t counterDataScratchBufferSize; //!< [out] } CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params; #define CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params, counterDataScratchBufferSize) /** * \brief Params for cuptiProfilerCounterDataImageInitializeScratchBuffer */ typedef struct CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params { size_t structSize; //!< [in] CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize uint8_t* pCounterDataImage; //!< [in] size_t counterDataScratchBufferSize; //!< [in] size calculated using cuptiProfilerCounterDataImageCalculateScratchBufferSize uint8_t* pCounterDataScratchBuffer; //!< [in] the scratch buffer to be initialized. } CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params; #define CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params, pCounterDataScratchBuffer) /** * \brief A temporary storage for CounterData image needed for internal operations * * Use these APIs to calculate the allocation size and initialize counterData image scratch buffer. */ CUptiResult cuptiProfilerCounterDataImageCalculateScratchBufferSize(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params* pParams); CUptiResult cuptiProfilerCounterDataImageInitializeScratchBuffer(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params* pParams); /** * \brief Params for cuptiProfilerBeginSession */ typedef struct CUpti_Profiler_BeginSession_Params { size_t structSize; //!< [in] CUpti_Profiler_BeginSession_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used size_t counterDataImageSize; //!< [in] size calculated from cuptiProfilerCounterDataImageCalculateSize uint8_t* pCounterDataImage; //!< [in] address of CounterDataImage size_t counterDataScratchBufferSize; //!< [in] size calculated from cuptiProfilerCounterDataImageInitializeScratchBuffer uint8_t* pCounterDataScratchBuffer; //!< [in] address of CounterDataImage scratch buffer uint8_t bDumpCounterDataInFile; //!< [in] [optional] const char* pCounterDataFilePath; //!< [in] [optional] CUpti_ProfilerRange range; //!< [in] CUpti_ProfilerRange CUpti_ProfilerReplayMode replayMode; //!< [in] CUpti_ProfilerReplayMode /* Replay options, required when replay is done by cupti user */ size_t maxRangesPerPass; //!< [in] Maximum number of ranges that can be recorded in a single pass. size_t maxLaunchesPerPass; //!< [in] Maximum number of kernel launches that can be recorded in a single pass; must be >= maxRangesPerPass. } CUpti_Profiler_BeginSession_Params; #define CUpti_Profiler_BeginSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginSession_Params, maxLaunchesPerPass) /** * \brief Params for cuptiProfilerEndSession */ typedef struct CUpti_Profiler_EndSession_Params { size_t structSize; //!< [in] CUpti_Profiler_EndSession_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_EndSession_Params; #define CUpti_Profiler_EndSession_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndSession_Params, ctx) /** * \brief Begin profiling session sets up the profiling on the device * * Although, it doesn't start the profiling but GPU resources needed for profiling are allocated. * Outside of a session, the GPU will return to its normal operating state. */ CUptiResult CUPTIAPI cuptiProfilerBeginSession(CUpti_Profiler_BeginSession_Params* pParams); /** * \brief Ends profiling session * * Frees up the GPU resources acquired for profiling. * Outside of a session, the GPU will return to it's normal operating state. */ CUptiResult CUPTIAPI cuptiProfilerEndSession(CUpti_Profiler_EndSession_Params* pParams); /** * \brief Params for cuptiProfilerSetConfig */ typedef struct CUpti_Profiler_SetConfig_Params { size_t structSize; //!< [in] CUpti_Profiler_SetConfig_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used const uint8_t* pConfig; //!< [in] Config created by NVPW_RawMetricsConfig_GetConfigImage(). Must be align(8). size_t configSize; //!< [in] size of config uint16_t minNestingLevel; //!< [in] the lowest nesting level to be profiled; must be >= 1 uint16_t numNestingLevels; //!< [in] the number of nesting levels to profile; must be >= 1 size_t passIndex; //!< [in] Set this to zero for in-app replay; set this to the output of EndPass() for application replay uint16_t targetNestingLevel; //!< [in] Set this to minNestingLevel for in-app replay; set this to the output of EndPass() for application } CUpti_Profiler_SetConfig_Params; #define CUpti_Profiler_SetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_SetConfig_Params, targetNestingLevel) /** * \brief Params for cuptiProfilerUnsetConfig */ typedef struct CUpti_Profiler_UnsetConfig_Params { size_t structSize; //!< [in] CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_UnsetConfig_Params; #define CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_UnsetConfig_Params, ctx) /** * \brief Set metrics configuration to be profiled * * Use these APIs to set the config to profile in a session. It can be used for advanced cases such as where multiple * configurations are collected into a single CounterData Image on the need basis, without restarting the session. */ CUptiResult CUPTIAPI cuptiProfilerSetConfig(CUpti_Profiler_SetConfig_Params* pParams); /** * \brief Unset metrics configuration profiled * */ CUptiResult CUPTIAPI cuptiProfilerUnsetConfig(CUpti_Profiler_UnsetConfig_Params* pParams); /** * \brief Params for cuptiProfilerBeginPass */ typedef struct CUpti_Profiler_BeginPass_Params { size_t structSize; //!< [in] CUpti_Profiler_BeginPass_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_BeginPass_Params; #define CUpti_Profiler_BeginPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_BeginPass_Params, ctx) /** * \brief Params for cuptiProfilerEndPass */ typedef struct CUpti_Profiler_EndPass_Params { size_t structSize; //!< [in] CUpti_Profiler_EndPass_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used uint16_t targetNestingLevel; //! [out] The targetNestingLevel that will be collected by the *next* BeginPass. size_t passIndex; //!< [out] The passIndex that will be collected by the *next* BeginPass uint8_t allPassesSubmitted; //!< [out] becomes true when the last pass has been queued to the GPU } CUpti_Profiler_EndPass_Params; #define CUpti_Profiler_EndPass_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EndPass_Params, allPassesSubmitted) /** * \brief Replay API: used for multipass collection. * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay * for multipass collection of the metrics configurations. * It's a no-op in case of \ref CUPTI_KernelReplay. */ CUptiResult cuptiProfilerBeginPass(CUpti_Profiler_BeginPass_Params* pParams); /** * \brief Replay API: used for multipass collection. * These APIs are used if user chooses to replay by itself \ref CUPTI_UserReplay or \ref CUPTI_ApplicationReplay * for multipass collection of the metrics configurations. * Its a no-op in case of \ref CUPTI_KernelReplay. * Returns information for next pass. */ CUptiResult cuptiProfilerEndPass(CUpti_Profiler_EndPass_Params* pParams); /** * \brief Params for cuptiProfilerEnableProfiling */ typedef struct CUpti_Profiler_EnableProfiling_Params { size_t structSize; //!< [in] CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_EnableProfiling_Params; #define CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_EnableProfiling_Params, ctx) /** * \brief Params for cuptiProfilerDisableProfiling */ typedef struct CUpti_Profiler_DisableProfiling_Params { size_t structSize; //!< [in] CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_DisableProfiling_Params; #define CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DisableProfiling_Params, ctx) /** * \brief Enables Profiling * * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in * a profiling session. */ CUptiResult CUPTIAPI cuptiProfilerEnableProfiling(CUpti_Profiler_EnableProfiling_Params* pParams); /** * \brief Disable Profiling * * In \ref CUPTI_AutoRange, these APIs are used to enable/disable profiling for the kernels to be executed in * a profiling session. */ CUptiResult CUPTIAPI cuptiProfilerDisableProfiling(CUpti_Profiler_DisableProfiling_Params* pParams); /** * \brief Params for cuptiProfilerIsPassCollected */ typedef struct CUpti_Profiler_IsPassCollected_Params { size_t structSize; //!< [in] CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed pass size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full uint8_t onePassCollected; //!< [out] true if a pass was successfully decoded uint8_t allPassesCollected; //!< [out] becomes true when the last pass has been decoded } CUpti_Profiler_IsPassCollected_Params; #define CUpti_Profiler_IsPassCollected_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_IsPassCollected_Params, allPassesCollected) /** * \brief Asynchronous call to query if the submitted pass to GPU is collected * */ CUptiResult CUPTIAPI cuptiProfilerIsPassCollected(CUpti_Profiler_IsPassCollected_Params* pParams); /** * \brief Params for cuptiProfilerFlushCounterData */ typedef struct CUpti_Profiler_FlushCounterData_Params { size_t structSize; //!< [in] CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used size_t numRangesDropped; //!< [out] number of ranges whose data was dropped in the processed passes size_t numTraceBytesDropped; //!< [out] number of bytes not written to TraceBuffer due to buffer full } CUpti_Profiler_FlushCounterData_Params; #define CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_FlushCounterData_Params, numTraceBytesDropped) /** * \brief Decode all the submitted passes * * Flush Counter data API to ensure every pass is decoded into the counterDataImage passed at beginSession. * This will cause the CPU/GPU sync to collect all the undecoded pass. */ CUptiResult CUPTIAPI cuptiProfilerFlushCounterData(CUpti_Profiler_FlushCounterData_Params* pParams); typedef struct CUpti_Profiler_PushRange_Params { size_t structSize; //!< [in] CUpti_Profiler_PushRange_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used const char* pRangeName; //!< [in] specifies the range for subsequent launches; must not be NULL size_t rangeNameLength; //!< [in] assign to strlen(pRangeName) if known; if set to zero, the library will call strlen() } CUpti_Profiler_PushRange_Params; #define CUpti_Profiler_PushRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PushRange_Params, rangeNameLength) typedef struct CUpti_Profiler_PopRange_Params { size_t structSize; //!< [in] CUpti_Profiler_PopRange_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used } CUpti_Profiler_PopRange_Params; #define CUpti_Profiler_PopRange_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_PopRange_Params, ctx) /** * \brief Range API's : Push user range * * Counter data is collected per unique range-stack. Identified by a string label passsed by the user. * It's an invalid operation in case of \ref CUPTI_AutoRange. */ CUptiResult CUPTIAPI cuptiProfilerPushRange(CUpti_Profiler_PushRange_Params *pParams); /** * \brief Range API's : Pop user range * * Counter data is collected per unique range-stack. Identified by a string label passsed by the user. * It's an invalid operation in case of \ref CUPTI_AutoRange. */ CUptiResult CUPTIAPI cuptiProfilerPopRange(CUpti_Profiler_PopRange_Params *pParams); /** * \brief Params for cuptiProfilerGetCounterAvailability */ typedef struct CUpti_Profiler_GetCounterAvailability_Params { size_t structSize; //!< [in] CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE void* pPriv; //!< [in] assign to NULL CUcontext ctx; //!< [in] if NULL, the current CUcontext is used size_t counterAvailabilityImageSize; //!< [in/out] If `pCounterAvailabilityImage` is NULL, then the required size is returned in //!< `counterAvailabilityImageSize`, otherwise `counterAvailabilityImageSize` should be set to the size of //!< `pCounterAvailabilityImage`, and on return it would be overwritten with number of actual bytes copied uint8_t* pCounterAvailabilityImage; //!< [in] buffer receiving counter availability image, may be NULL } CUpti_Profiler_GetCounterAvailability_Params; #define CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_GetCounterAvailability_Params, pCounterAvailabilityImage) /** * \brief Query counter availibility * * Use this API to query counter availability information in a buffer which can be used to filter unavailable raw metrics on host. * Note: This API may fail, if any profiling or sampling session is active on the specified context or its device. */ CUptiResult CUPTIAPI cuptiProfilerGetCounterAvailability(CUpti_Profiler_GetCounterAvailability_Params *pParams); /// Generic support level enum for CUPTI typedef enum { CUPTI_PROFILER_CONFIGURATION_UNKNOWN = 0, //!< Configuration support level unknown - either detection code errored out before setting this value, or unable to determine it CUPTI_PROFILER_CONFIGURATION_UNSUPPORTED, //!< Profiling is unavailable. For specific feature fields, this means that the current configuration of this feature does not work with profiling. For instance, SLI-enabled devices do not support profiling, and this value would be returned for SLI on an SLI-enabled device. CUPTI_PROFILER_CONFIGURATION_DISABLED, //!< Profiling would be available for this configuration, but was disabled by the system CUPTI_PROFILER_CONFIGURATION_SUPPORTED //!< Profiling is supported. For specific feature fields, this means that the current configuration of this feature works with profiling. For instance, SLI-enabled devices do not support profiling, and this value would only be returned for devices which are not SLI-enabled. } CUpti_Profiler_Support_Level; /** * \brief Profiler API types */ typedef enum { CUPTI_PROFILER_RANGE_PROFILING = 0, //!< CUPTI APIs for range based profiling (cuptiProfiler*) CUPTI_PROFILER_PC_SAMPLING, //!< CUPTI APIs collecting pc sampling data (cuptiPcSampling*) CUPTI_PROFILER_SASS_METRICS, //!< CUPTI APIs collecting SASS metrics data (cuptiSassMetrics*) CUPTI_PROFILER_UNKNOWN } CUpti_Profiler_API; /** * \brief Params for cuptiProfilerDeviceSupported */ typedef struct { size_t structSize; //!< [in] Must be CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE void *pPriv; //!< [in] assign to NULL CUdevice cuDevice; //!< [in] if NULL, the current CUcontext is used CUpti_Profiler_Support_Level isSupported; //!< [out] overall SUPPORTED / UNSUPPORTED flag representing whether Profiling and PC Sampling APIs work on the given device and configuration. SUPPORTED if all following flags are SUPPORTED, UNSUPPORTED otherwise. CUpti_Profiler_Support_Level architecture; //!< [out] SUPPORTED if the device architecture level supports the Profiling API (Compute Capability >= 7.0), UNSUPPORTED otherwise CUpti_Profiler_Support_Level sli; //!< [out] SUPPORTED if SLI is not enabled, UNSUPPORTED otherwise CUpti_Profiler_Support_Level vGpu; //!< [out] SUPPORTED if vGPU is supported and profiling is enabled, DISABLED if profiling is supported but not enabled, UNSUPPORTED otherwise CUpti_Profiler_Support_Level confidentialCompute; //!< [out] SUPPORTED if confidential compute is not enabled, UNSUPPORTED otherwise CUpti_Profiler_Support_Level cmp; //!< [out] SUPPORTED if not NVIDIA Crypto Mining Processors (CMP), UNSUPPORTED otherwise CUpti_Profiler_Support_Level wsl; //!< [out] SUPPORTED if WSL supported, UNSUPPORTED otherwise CUpti_Profiler_API api; //!< [in] the CUPTI API type for which device support will be checked } CUpti_Profiler_DeviceSupported_Params; #define CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, api) /** * \brief Query device compatibility with Profiling API * * Use this call to determine whether a compute device and configuration are compatible with the Profiling API. * If the configuration does not support profiling, one of several flags will indicate why. */ CUptiResult CUPTIAPI cuptiProfilerDeviceSupported(CUpti_Profiler_DeviceSupported_Params *pParams); /** @} */ /* END CUPTI_METRIC_API */ #if defined(__GNUC__) && defined(CUPTI_LIB) #pragma GCC visibility pop #endif #ifdef __cplusplus } /* extern "C" */ #endif #endif /*_CUPTI_PROFILER_TARGET_H_*/