sglang_v0.5.2/pytorch_2.8.0/third_party/NVTX/tests/InjectionHelper.h

583 lines
23 KiB
C++

/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Licensed under the Apache License v2.0 with LLVM Exceptions.
* See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
*/
#pragma once
// [Best practices for injection implementions]
// Set NVTX_NO_IMPL to make the NVTX headers define the API types and function
// prototypes only, not the inline impls. Be sure on GCC to use -Wno-unused-function
// to avoid warnings for undefined static prototypes.
#define NVTX_NO_IMPL
// [Best practices for injection implementions]
// Microsoft's compiler issues warning 26812 when compiling a C-style enum in C++
// instead of using the new "enum class" style. Since the NVTX headers are written in
// C, the enums defined there will trigger this warning. Use this code to disable it.
#if defined(_MSC_VER)
#pragma warning (disable : 26812)
#endif
#include <nvtx3/nvToolsExt.h>
#include <type_traits>
#include <utility>
#include <tuple>
#include <limits>
namespace NvtxInjectionHelper {
//============ Generic utility functions ======================================
inline namespace detail_generic {
//--- maxVal ---
// Variadic alternative to std::max that doesn't need an initializer list,
// doesn't conflict with MSVC's #define for max, and has no trouble with
// constexpr usage. Handles having zero parameters passed, returning
// std::numeric_limits<T>::min in that case, as long as the template
// parameter T is explicitly specified. Takes arguments by value, which
// avoids the issue of returning a reference to something when called
// with no parameters. Example uses:
//
template <typename T>
constexpr inline T maxVal() { return std::numeric_limits<T>::min(); }
template <typename T, typename... Rest>
constexpr inline T maxVal(T first, Rest... rest)
{
T restMax = maxVal<T>(rest...);
return (first > restMax) ? first : restMax;
}
//--- tuple size helper ---
// Generic utility for getting the size of a std::tuple, using its value
// as opposed to std::tuple_size<> which takes the tuple's type. In a
// generic lambda where the parameter's type is "auto", it's extra work
// to figure out the type
template <typename... Ts>
constexpr inline size_t size_of_tuple(std::tuple<Ts...> const&)
{
return sizeof...(Ts);
}
//--- tuple helpers to loop over items ---
// We need a way to call a function f on each element of a tuple, like this:
//
// f(std::get<0>(t));
// f(std::get<1>(t));
// f(std::get<2>(t)); etc.
//
// We want something like this, where "Is" is a parameter pack of 0,1,2,etc.:
//
// f(std::get<Is>(t))...;
//
// ...but parameter pack expansion is only allowed within the context of args
// to a function call or a braced init list. We also must handle the case
// where the tuple is empty, we should discard the results of all the calls
// to f, even if it returns different types for each call. Easiest way to
// do this is by forwarding the elements of the tuple as args to a helper
// function that calls f on each arg, like this:
//
// for_each_in_parameter_pack(f, std::get<Is>(t)...);
//
// But we also want perfect forwarding of the function and the tuple.
// The following utilites "for_each_in_tuple", "for_each_in_tuple_helper",
// and "for_each_in_parameter_pack" are provided to allow code such as this
// "loop" over tuple elements. Note that "thing" in each iteration can be
// a different type, because a tuple's elements may be different types, so
// generic lambdas are very convenient here:
//
// for_each_in_tuple(tuple_of_things,
// [](auto const& thing)
// {
// std::cout << thing << std::endl;
// }
// );
template<typename F>
inline void for_each_in_parameter_pack(F&& f) {}
template<typename F, typename First, typename... Rest>
inline void for_each_in_parameter_pack(F&& f, First const& first, Rest const&... rest)
{
// Call f on the first argument, and explicitly discard the result by casting to void
static_cast<void>(std::forward<F>(f)(first));
// Recurse to call f on the rest of the arguments
for_each_in_parameter_pack(std::forward<F>(f), rest...);
}
// Generic utility for calling a function f for each element of a tuple t
template<typename T, typename F, size_t... Is>
inline void for_each_in_tuple_helper(T const& t, F&& f, std::index_sequence<Is...>)
{
for_each_in_parameter_pack(
std::forward<F>(f),
std::get<Is>(t)...
);
}
template<typename... Ts, typename F>
inline void for_each_in_tuple(std::tuple<Ts...> const& t, F&& f)
{
for_each_in_tuple_helper(t, std::forward<F>(f), std::make_index_sequence<sizeof...(Ts)>());
}
} // namespace detail_generic
//============ NVTX injection helper internal utilities =======================
inline namespace detail_nvtx {
//--- id_t ---
// Define generic integer type for holding all modules' callback id enum values.
// These are used as indexes into the handler arrays for each module.
using id_t = unsigned int;
//--- id_v ---
// Nickname for std::integral_constant, which is used for all callback enum values.
// Using an integral constant allows performing correctness checks at compile time,
// which is not possible in C++ with function parameter values, only their types.
// Including the value in the type works around this problem.
template <typename EnumT, EnumT EnumVal>
using id_v = std::integral_constant<EnumT, EnumVal>;
//--- NVTX_CBID ---
// Macro to succinctly turn an NVTX_CBID_* enum value into a compile-time constant,
// using std::integral_constant. This makes it possible to perform correctness
// checks at compile time, for example ensuring a handler's signature is compatible
// with the NVTX API call it is being installed to handle. Syntax is meant to look
// familiar. For example, replace:
// NVTX_CBID_CORE_MarkA
// with:
// NVTX_CBID(CORE_MarkA)
// when passing CBID values to NvtxInjectionHelper::MakeHandlerTable.
#define NVTX_CBID(func) NvtxInjectionHelper::id_v<decltype(NVTX_CBID_##func), NVTX_CBID_##func>{}
//--- EnumTypeToModuleId ---
// Template variable to map from call id enum types to module id values (see nvtxTypes.h)
// For example, EnumTypeToModuleId<NVTX_CBID_CORE_MarkA> == NVTX_CB_MODULE_CORE.
template <typename EnumT>
constexpr static NvtxCallbackModule EnumTypeToModuleId = NVTX_CB_MODULE_INVALID;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdCore > = NVTX_CB_MODULE_CORE;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdCuda > = NVTX_CB_MODULE_CUDA;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdOpenCL> = NVTX_CB_MODULE_OPENCL;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdCudaRt> = NVTX_CB_MODULE_CUDART;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdCore2 > = NVTX_CB_MODULE_CORE2;
template<> constexpr NvtxCallbackModule EnumTypeToModuleId<NvtxCallbackIdSync > = NVTX_CB_MODULE_SYNC;
//--- IdToModuleId ---
// Helper for EnumTypeToModuleId to convert directly from an integral_constant of a call id enum
// to its module id. For example, since NVTX_CBID(CORE_MarkA) is an integral_constant, it cannot
// be used directly as in EnumTypeToModuleId<NVTX_CBID(CORE_MarkA)>, since NVTX_CBID(CORE_MarkA)'s
// type is std::integral_constant<NvtxCallbackIdCore, NVTX_CBID_CORE_MarkA>. This helper extracts
// the enum's type from the integral_constant, allowing EnumConstToModuleId<NVTX_CBID(CORE_MarkA)>.
template <typename IdT>
constexpr static NvtxCallbackModule IdToModuleId = EnumTypeToModuleId<typename IdT::value_type>;
//--- IdToHandlerType
// Template using to map from call id values to matching function pointer types.
template <typename IdT> struct IdToHandlerType { using type = nullptr_t; };
// Macro for defining IdToHandlerType specializations for each id.
// mod = module, i.e. CORE, CORE2
// func = prefixless function name, i.e. MarkEx, DomainCreateA
// impl = impl or fakeimpl, depending on whether or not to use real types or the
// nvtxTypes.h "fakeimpl" types, which don't depend on CUDA/OpenCL headers.
#define NVTX_ID_TO_TYPE(mod, func, impl) \
template <> struct IdToHandlerType<decltype(NVTX_CBID(mod##_##func))> { using type = nvtx##func##_##impl##_fntype; }
NVTX_ID_TO_TYPE(CORE, MarkEx , impl);
NVTX_ID_TO_TYPE(CORE, MarkA , impl);
NVTX_ID_TO_TYPE(CORE, MarkW , impl);
NVTX_ID_TO_TYPE(CORE, RangeStartEx , impl);
NVTX_ID_TO_TYPE(CORE, RangeStartA , impl);
NVTX_ID_TO_TYPE(CORE, RangeStartW , impl);
NVTX_ID_TO_TYPE(CORE, RangeEnd , impl);
NVTX_ID_TO_TYPE(CORE, RangePushEx , impl);
NVTX_ID_TO_TYPE(CORE, RangePushA , impl);
NVTX_ID_TO_TYPE(CORE, RangePushW , impl);
NVTX_ID_TO_TYPE(CORE, RangePop , impl);
NVTX_ID_TO_TYPE(CORE, NameCategoryA, impl);
NVTX_ID_TO_TYPE(CORE, NameCategoryW, impl);
NVTX_ID_TO_TYPE(CORE, NameOsThreadA, impl);
NVTX_ID_TO_TYPE(CORE, NameOsThreadW, impl);
NVTX_ID_TO_TYPE(CORE2, DomainMarkEx , impl);
NVTX_ID_TO_TYPE(CORE2, DomainRangeStartEx , impl);
NVTX_ID_TO_TYPE(CORE2, DomainRangeEnd , impl);
NVTX_ID_TO_TYPE(CORE2, DomainRangePushEx , impl);
NVTX_ID_TO_TYPE(CORE2, DomainRangePop , impl);
NVTX_ID_TO_TYPE(CORE2, DomainResourceCreate , impl);
NVTX_ID_TO_TYPE(CORE2, DomainResourceDestroy, impl);
NVTX_ID_TO_TYPE(CORE2, DomainNameCategoryA , impl);
NVTX_ID_TO_TYPE(CORE2, DomainNameCategoryW , impl);
NVTX_ID_TO_TYPE(CORE2, DomainRegisterStringA, impl);
NVTX_ID_TO_TYPE(CORE2, DomainRegisterStringW, impl);
NVTX_ID_TO_TYPE(CORE2, DomainCreateA , impl);
NVTX_ID_TO_TYPE(CORE2, DomainCreateW , impl);
NVTX_ID_TO_TYPE(CORE2, DomainDestroy , impl);
NVTX_ID_TO_TYPE(CORE2, Initialize , impl);
#undef NVTX_ID_TO_TYPE
//--- CheckHandlerTypeMatchesId ---
// Compile-time check provides easy-to-read error if FuncT isn't compatible with EnumT
template <typename IdT, typename FuncT>
constexpr inline void CheckHandlerTypeMatchesId()
{
using ExpectedFuncT = typename IdToHandlerType<IdT>::type;
static_assert(std::is_same<ExpectedFuncT, FuncT>(),
"NVTX Injection Helper: The provided handler function's signature does not match the NVTX API for the given call id.");
}
//--- Handler ---
// Represents id/handler pair for an NVTX call. Provides:
// - the call's id (NVTX_CBID_* enum values)
// - handler function pointer
// Preserves the type of the function as a template parameter.
// Erases the type of the enum, so it's not module-specific anymore.
// Allows being constructed and placed into a container at compile time, then
// later at run time doing the run-time-only cast of the function pointer.
// This enables processing of ids to occur at compile time.
template <typename FuncT>
class Handler
{
public:
id_t id;
FuncT pfn;
template <typename EnumT, EnumT EnumVal>
constexpr Handler(id_v<EnumT, EnumVal> e, FuncT pfn_)
: id(static_cast<id_t>(EnumVal)) // Erase enum's type
, pfn(pfn_)
{}
NvtxFunctionPointer Address() const noexcept
{
return reinterpret_cast<NvtxFunctionPointer>(pfn);
}
};
//--- MakeHandler ---
// "Make" function for Handler to automatically deduce types from parameters
template <typename IdT, typename FuncT>
constexpr inline Handler<FuncT> MakeHandler(IdT id_, FuncT func)
{
CheckHandlerTypeMatchesId<IdT, FuncT>();
return Handler<FuncT>(id_, func);
}
//--- ModuleHandlerTable ---
// Represents the set of Handlers for one module. Provides:
// - the module's id (NVTX_CB_MODULE_* enum values)
// - iterable container of id/handler pairs (empty means skip getting etbl for module)
// - highest call id value of handler in module (to confirm client has sufficient size)
// - a method to assign all the stored handlers into a client's handler table
// These objects can be constructed at compile time, including the highest call id used.
template <NvtxCallbackModule mod, typename... Funcs>
class ModuleHandlerTable
{
public:
using tuple_t = std::tuple<Handler<Funcs>...>;
static constexpr NvtxCallbackModule moduleId = mod;
tuple_t handlers;
id_t highestIdUsed;
constexpr ModuleHandlerTable(tuple_t t)
: handlers(t)
, highestIdUsed(FindHighestId(t))
{}
void AssignToClient(NvtxFunctionTable clientTable) const noexcept
{
for_each_in_tuple(handlers,
[clientTable](auto const& handler)
{
if (handler.id != 0 && handler.pfn != nullptr)
{
*clientTable[handler.id] = handler.Address();
}
}
);
}
private:
template <size_t... Is>
static constexpr id_t FindHighestIdHelper(tuple_t t, std::index_sequence<Is...>)
{
return maxVal<id_t>(std::get<Is>(t).id...);
}
static constexpr id_t FindHighestId(tuple_t t)
{
return FindHighestIdHelper(t, std::make_index_sequence<sizeof...(Funcs)>());
}
};
//--- MakeModuleHandlerTuple ---
// MakeModuleHandlerTuple takes NvtxCallbackModule "mod" as a template parameter,
// and loops over pairs of arguments (an enum and a handler function), building a
// tuple of Handler objects for the enums that are in module "mod", and ignoring
// ones that aren't. This lets the user pass in handlers for for all modules in
// one simple call, and we can build up separate handler tables for each module.
// MakeModuleHandlerTuple is recursive, peeling off two arguments in each recursive
// case, and having no args be the base case. The recursive case has a pair of
// overloads for whether or not the enum's type matches "mod" or not. Since these
// overloads are separate functions, it's mutual recursion, so both are declared
// first before the definitions.
// Base case: no more arguments
template <NvtxCallbackModule mod>
constexpr inline auto MakeModuleHandlerTuple()
{
return std::tuple<>{};
}
// Prototypes of recursive cases -- needed since they can call each other
template <NvtxCallbackModule mod, typename IdT, typename FuncT,
std::enable_if_t<IdToModuleId<IdT> == mod, int> = 0,
typename... Args>
constexpr inline auto MakeModuleHandlerTuple(IdT, FuncT, Args...);
template <NvtxCallbackModule mod, typename IdT, typename FuncT,
std::enable_if_t<IdToModuleId<IdT> != mod, int> = 0,
typename... Args>
constexpr inline auto MakeModuleHandlerTuple(IdT, FuncT, Args...);
// Recursive case 1: enum's type matches mod, so add it to the tuple
template <NvtxCallbackModule mod, typename IdT, typename FuncT,
std::enable_if_t<IdToModuleId<IdT> == mod, int>,
typename... Args>
constexpr inline auto MakeModuleHandlerTuple(IdT id, FuncT f, Args... rest)
{
// Verify types of id and function, using static_assert to provide a
// clear compile error if the types don't meet the requirements.
static_assert(IdToModuleId<IdT> != NVTX_CB_MODULE_INVALID,
"MakeHandlerTable arguments must be pairs of IDs and handler functions. IDs must be enums starting with NVTX_CBID_. An invalid ID value was provided.");
// Before adding this id/handler pair to the tuple, check to make sure
// there's not already an entry in the tuple with the same id. If so,
// provide a clear compile-time error message.
auto restTuple = MakeModuleHandlerTuple<mod>(rest...);
return std::tuple_cat(
std::make_tuple(MakeHandler(id, f)),
restTuple);
}
// Recursive case 2: id is not in module, so fwd result from remaining args
template <NvtxCallbackModule mod, typename IdT, typename FuncT,
std::enable_if_t<IdToModuleId<IdT> != mod, int>,
typename... Args>
constexpr inline auto MakeModuleHandlerTuple(IdT id, FuncT f, Args... rest)
{
return MakeModuleHandlerTuple<mod>(rest...);
}
//--- MakeModuleHandlerFromTuple ---
// Helper function for MakeModuleHandlerTable. Coverts type of Handlers into
// a ModuleHandlerTable object. This approach was simpler than building up the
// ModuleHandlerTable incrementally, since std::tuple_cat makes it so easy to
// build up a tuple.
template <NvtxCallbackModule mod, typename... Funcs>
constexpr inline auto MakeModuleHandlerFromTuple(std::tuple<Handler<Funcs>...> t)
{
return ModuleHandlerTable<mod, Funcs...>(t);
}
//--- "Make" function for ModuleHandlerTable to automatically deduce type ---
// First, create a tuple of just the handlers in the argument list in module "mod".
// Uses the mutually-recursive MakeModuleHandlerTuple overloads, which only add
// handlers into the tuple if the module matches. Then, MakeModuleHandlerFromTuple
// converts the tuple into a properly-typed ModuleHandlerTable object.
template <NvtxCallbackModule mod, typename... Args>
constexpr inline auto MakeModuleHandlerTable(Args... args)
{
const auto handlerTuple = MakeModuleHandlerTuple<mod>(args...);
return MakeModuleHandlerFromTuple<mod>(handlerTuple);
}
} // namespace detail_nvtx
//============ NVTX injection helper public interface =========================
// Define sentinel-value constants for use in handler implementations
namespace ReturnCodes {
constexpr auto NVTX_TOOL_ATTACHED_UNUSED_RANGE_ID = static_cast<nvtxRangeId_t>(-1LL);
constexpr int NVTX_TOOL_ATTACHED_UNUSED_PUSH_POP_ID = -1;
const auto NVTX_TOOL_ATTACHED_UNUSED_DOMAIN_HANDLE = reinterpret_cast<nvtxDomainHandle_t>(-1LL);
const auto NVTX_TOOL_ATTACHED_UNUSED_STRING_HANDLE = reinterpret_cast<nvtxStringHandle_t>(-1LL);
// Note: In C++20, use bit_cast instead of reinterpret_cast, so the handles
// (which are pointer types) can also be made constexpr.
}
template <typename... Args>
constexpr inline auto MakeHandlerTable(Args... args)
{
return std::make_tuple(
MakeModuleHandlerTable<NVTX_CB_MODULE_CORE >(args...),
MakeModuleHandlerTable<NVTX_CB_MODULE_CUDA >(args...),
MakeModuleHandlerTable<NVTX_CB_MODULE_OPENCL>(args...),
MakeModuleHandlerTable<NVTX_CB_MODULE_CUDART>(args...),
MakeModuleHandlerTable<NVTX_CB_MODULE_CORE2 >(args...),
MakeModuleHandlerTable<NVTX_CB_MODULE_SYNC >(args...)
);
}
enum class InstallResult
{
Success,
ExportTableVersionInfoMissing,
ExportTableVersionInfoTooSmall,
ClientVersionTooOld,
ExportTableCallbacksMissing,
ExportTableCallbacksTooSmall,
ModuleNotSupported,
ModuleTableTooSmall
};
template <typename HandlerTableT>
inline InstallResult InstallHandlers(
NvtxGetExportTableFunc_t getExportTable,
HandlerTableT const& injectionHandlerTable,
std::ostringstream* errStream = nullptr,
uint32_t* pVersion = nullptr)
{
uint32_t version = 0;
auto pVersionInfo =
reinterpret_cast<const NvtxExportTableVersionInfo*>(getExportTable(NVTX_ETID_VERSIONINFO));
if (!pVersionInfo)
{
if (errStream) *errStream
<< "Client NVTX instance doesn't support NVTX_ETID_VERSIONINFO";
return InstallResult::ExportTableVersionInfoMissing;
}
if (pVersionInfo->struct_size < sizeof(*pVersionInfo))
{
if (errStream) *errStream
<< "NvtxExportTableVersionInfo structure size is " << pVersionInfo->struct_size
<< ", expected " << sizeof(*pVersionInfo) << "!";
return InstallResult::ExportTableVersionInfoTooSmall;
}
version = pVersionInfo->version;
if (version < 2)
{
if (errStream) *errStream
<< "client's NVTX version is " << version << ", expected 2+";
return InstallResult::ClientVersionTooOld;
}
if (pVersion) *pVersion = version;
auto pCallbacks =
reinterpret_cast<const NvtxExportTableCallbacks*>(getExportTable(NVTX_ETID_CALLBACKS));
if (!pCallbacks)
{
if (errStream) *errStream
<< "Client NVTX instance doesn't support NVTX_ETID_CALLBACKS";
return InstallResult::ExportTableCallbacksMissing;
}
if (pCallbacks->struct_size < sizeof(*pCallbacks))
{
if (errStream) *errStream
<< "NvtxExportTableCallbacks structure size is " << pCallbacks->struct_size
<< ", expected " << sizeof(*pCallbacks) << "!";
return InstallResult::ExportTableCallbacksTooSmall;
}
#if defined(DEBUG) || true
// Simple loop to print handler table internal details
for_each_in_tuple(injectionHandlerTable,
[](auto const& handlerModule)
{
auto count = size_of_tuple(handlerModule.handlers);
printf("Module: %d Count: %d Highest: %d\n",
static_cast<int>(handlerModule.moduleId),
static_cast<int>(count),
static_cast<int>(handlerModule.highestIdUsed));
if (count > 0)
{
for_each_in_tuple(handlerModule.handlers,
[](auto const& handler)
{
auto addr = static_cast<long long>(handler.Address());
printf(" Id: %d Address: 0x%llx\n",
static_cast<int>(handler.id), addr);
}
);
}
}
);
#endif
// Loop over module handler tables and install handlers into client
bool errors = false;
for_each_in_tuple(injectionHandlerTable,
[&](auto const& handlerModule)
{
NvtxFunctionTable clientTable = 0;
unsigned int clientTableSize = 0;
int success;
if (handlerModule.moduleId == NVTX_CB_MODULE_INVALID) return;
success = pCallbacks->GetModuleFunctionTable(handlerModule.moduleId, &clientTable, &clientTableSize);
if (!success || !clientTable)
{
if (errStream) *errStream
<< "Client NVTX instance doesn't support callback module with id " << handlerModule.moduleId;
// TODO: return InstallResult::ModuleNotSupported;
errors = true;
}
// Ensure client's table is new enough to support the function pointers we want to register
if (clientTableSize <= handlerModule.highestIdUsed)
{
if (errStream) *errStream
<< "Size of client NVTX instance's handler table with module id " << handlerModule.moduleId
<< " too small. Size is " << clientTableSize
<< ", but injection needs to assign table[" << handlerModule.highestIdUsed << "]";
// TODO: return InstallResult::ModuleTableTooSmall;
errors = true;
}
handlerModule.AssignToClient(clientTable);
}
);
if (errors) return InstallResult::ModuleNotSupported;
return InstallResult::Success;
}
} // namespace NvtxInjectionHelper