GemmPipelineAgBgCrCompV4< Problem, Policy > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy > Struct Template Reference
Compute optimized pipeline version 4. More...
#include <gemm_pipeline_ag_bg_cr_comp_v4.hpp>
Inheritance diagram for ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >:
Classes | |
| struct | PipelineImpl |
| struct | PipelineImpl< GemmPipelineScheduler::Intrawave > |
Public Types | |
| using | Base = BaseGemmPipelineAgBgCrCompV4<Problem> |
| using | PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy> |
| using | AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple> |
| using | BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple> |
| using | CDataType = remove_cvref_t<typename Problem::CDataType> |
| using | BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
| using | AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple> |
| using | BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple> |
| using | CLayout = remove_cvref_t<typename Problem::CLayout> |
| using | AElementWise = remove_cvref_t<typename Problem::AElementWise> |
| using | BElementWise = remove_cvref_t<typename Problem::BElementWise> |
| using | ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>> |
| using | BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>> |
| using | ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>> |
| using | BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>> |
| using | BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())> |
| using | I0 = number<0> |
| using | I1 = number<1> |
| using | I2 = number<2> |
Public Member Functions | |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_0, void *p_smem_1) const |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const index_t num_loop, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const |
| template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const |
| template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem_0, void *p_smem_1) const |
| template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, const index_t num_loop, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const |
| template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr> | |
| CK_TILE_DEVICE auto | operator() (const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *__restrict__ p_smem_0, void *__restrict__ p_smem_1) const |
Static Public Member Functions | |
| template<bool IsWave32Host = false> | |
| static constexpr index_t | GetVectorSizeA () |
| template<bool IsWave32Host = false> | |
| static constexpr index_t | GetVectorSizeB () |
| static constexpr index_t | GetVectorSizeC () |
| static constexpr index_t | GetSmemPackA () |
| static constexpr index_t | GetSmemPackB () |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST_DEVICE constexpr index_t | GetSmemSize () |
| static CK_TILE_HOST_DEVICE constexpr auto | IsTransposeC () |
| Static Public Member Functions inherited from ck_tile::BaseGemmPipelineAgBgCrCompV4< Problem > | |
| static CK_TILE_HOST_DEVICE constexpr bool | BlockHasHotloop (index_t num_loop) |
| static CK_TILE_HOST_DEVICE constexpr TailNumber | GetBlockLoopTailNum (index_t num_loop) |
| template<typename RunFunction> | |
| static CK_TILE_HOST_DEVICE auto | TailHandler (const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number) |
Static Public Attributes | |
| static constexpr index_t | APackedSize |
| static constexpr index_t | BPackedSize |
| static constexpr index_t | BlockSize = Problem::kBlockSize |
| static constexpr index_t | MPerBlock = BlockGemmShape::kM |
| static constexpr index_t | NPerBlock = BlockGemmShape::kN |
| static constexpr index_t | KPerBlock = BlockGemmShape::kK |
| static constexpr bool | kPadM = Problem::kPadM |
| static constexpr bool | kPadN = Problem::kPadN |
| static constexpr bool | kPadK = Problem::kPadK |
| static constexpr bool | DoubleSmemBuffer = Problem::DoubleSmemBuffer |
| static constexpr index_t | NumWaveGroups = Problem::NumWaveGroups |
| static constexpr index_t | Preshuffle = Problem::Preshuffle |
| static constexpr bool | HasHotLoop = Problem::HasHotLoop |
| static constexpr auto | TailNum = Problem::TailNum |
| static constexpr auto | Scheduler = Problem::Scheduler |
| static constexpr auto | is_a_load_tr_v = bool_constant<PipelineImplBase::is_a_load_tr>{} |
| static constexpr auto | is_b_load_tr_v = bool_constant<PipelineImplBase::is_b_load_tr>{} |
| Static Public Attributes inherited from ck_tile::BaseGemmPipelineAgBgCrCompV4< Problem > | |
| static constexpr index_t | PrefetchStages = 2 |
| static constexpr index_t | PrefillStages = 1 |
| static constexpr index_t | GlobalBufferNum = 1 |
| static constexpr bool | UsePersistentKernel = Problem::Traits::UsePersistentKernel |
Detailed Description
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
struct ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >
struct ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >
Compute optimized pipeline version 4.
This version introduces a dual LDS window mechanism using a ping-pong buffer approach for more efficient data handling from global memory. Unlike compute version 3, this method allows one LDS to fetch data from global memory while the other LDS executes warps for MFMA matrix multiplication. This dual operation helps in keeping the Warp unit continuously busy, thereby significantly reducing memory load times and enhancing overall performance.
- Note
- This version shows improved performance over Compute Version 3 with the same block tile. It is particularly more efficient for large matrices where M, N, and K are greater than 8K, even when Compute Version 3's block size is twice that of Compute Version 4.
Member Typedef Documentation
◆ ADataType
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::ADataType = remove_cvref_t<std::tuple_element_t<0, AsDataType>> |
◆ AElementWise
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::AElementWise = remove_cvref_t<typename Problem::AElementWise> |
◆ ALayout
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::ALayout = remove_cvref_t<std::tuple_element_t<0, AsLayout>> |
◆ AsDataType
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::AsDataType = remove_cvref_t<typename Problem::AsDataTypeTuple> |
◆ AsLayout
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::AsLayout = remove_cvref_t<typename Problem::AsLayoutTuple> |
◆ Base
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::Base = BaseGemmPipelineAgBgCrCompV4<Problem> |
◆ BDataType
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BDataType = remove_cvref_t<std::tuple_element_t<0, BsDataType>> |
◆ BElementWise
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BElementWise = remove_cvref_t<typename Problem::BElementWise> |
◆ BLayout
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BLayout = remove_cvref_t<std::tuple_element_t<0, BsLayout>> |
◆ BlockGemm
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BlockGemm = remove_cvref_t<decltype(Policy::template GetBlockGemm<Problem>())> |
◆ BlockGemmShape
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BlockGemmShape = remove_cvref_t<typename Problem::BlockGemmShape> |
◆ BsDataType
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BsDataType = remove_cvref_t<typename Problem::BsDataTypeTuple> |
◆ BsLayout
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::BsLayout = remove_cvref_t<typename Problem::BsLayoutTuple> |
◆ CDataType
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::CDataType = remove_cvref_t<typename Problem::CDataType> |
◆ CLayout
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::CLayout = remove_cvref_t<typename Problem::CLayout> |
◆ I0
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::I0 = number<0> |
◆ I1
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::I1 = number<1> |
◆ I2
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::I2 = number<2> |
◆ PipelineImplBase
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
| using ck_tile::GemmPipelineAgBgCrCompV4< Problem, Policy >::PipelineImplBase = GemmPipelineAgBgCrImplBase<Problem, Policy> |
Member Function Documentation
◆ GetName()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticnodiscard |
◆ GetSmemPackA()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticconstexpr |
◆ GetSmemPackB()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticconstexpr |
◆ GetSmemSize()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticconstexpr |
◆ GetVectorSizeA()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeB()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<bool IsWave32Host = false>
|
inlinestaticconstexpr |
◆ GetVectorSizeC()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticconstexpr |
◆ IsTransposeC()
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
inlinestaticconstexpr |
◆ operator()() [1/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [2/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [3/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename ADramBlockWindowTmp, typename BDramBlockWindowTmp, typename std::enable_if_t<!is_detected< is_tuple, ADramBlockWindowTmp >::value &&!is_detected< is_tuple, BDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [4/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename AElementFunction, typename BElementFunction, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [5/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
◆ operator()() [6/6]
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
template<typename AsDramBlockWindowTmp, typename BsDramBlockWindowTmp, typename std::enable_if_t< is_detected< is_tuple, AsDramBlockWindowTmp >::value &&is_detected< is_tuple, BsDramBlockWindowTmp >::value, bool > * = nullptr>
|
inline |
Member Data Documentation
◆ APackedSize
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
Initial value:
=
ck_tile::numeric_traits<remove_cvref_t<ADataType>>::PackedSize
Definition tile/core/numeric/numeric.hpp:81
◆ BlockSize
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ BPackedSize
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
Initial value:
=
ck_tile::numeric_traits<remove_cvref_t<BDataType>>::PackedSize
◆ DoubleSmemBuffer
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ HasHotLoop
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ is_a_load_tr_v
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ is_b_load_tr_v
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ kPadK
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ kPadM
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ kPadN
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ KPerBlock
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ MPerBlock
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ NPerBlock
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ NumWaveGroups
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ Preshuffle
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ Scheduler
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
◆ TailNum
template<typename Problem, typename Policy = GemmPipelineAgBgCrCompV4DefaultPolicy>
|
staticconstexpr |
The documentation for this struct was generated from the following file: