Problem Struct Reference#
ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem Struct Reference
#include <gridwise_gemm_wmma_cshuffle_v3_b_scale.hpp>
Inheritance diagram for ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem:
Public Member Functions | |
| __host__ | Problem (index_t M_, index_t N_, index_t K_, std::array< index_t, NumATensor > StrideAs_, std::array< index_t, NumBTensor > StrideBs_, std::array< index_t, NumDTensor > StrideDs_, index_t StrideE_, index_t StrideScaleB_, index_t KBatch_) |
| __host__ void | Print () const |
Public Attributes | |
| index_t | M |
| index_t | N |
| index_t | K |
| std::array< index_t, NumATensor > | StrideAs |
| std::array< index_t, NumBTensor > | StrideBs |
| std::array< index_t, NumDTensor > | StrideDs |
| index_t | StrideE |
| index_t | StrideScaleB |
| index_t | KBatch |
| index_t | MPadded |
| index_t | NPadded |
| index_t | KRead |
| index_t | KPadded |
| index_t | AK0 |
| index_t | BK0 |
| index_t | MBlock |
| index_t | NBlock |
Constructor & Destructor Documentation
◆ Problem()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inline |
Member Function Documentation
◆ Print()
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
|
inline |
Member Data Documentation
◆ AK0
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::AK0 |
◆ BK0
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::BK0 |
◆ K
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::K |
◆ KBatch
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::KBatch |
◆ KPadded
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::KPadded |
◆ KRead
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::KRead |
◆ M
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::M |
◆ MBlock
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::MBlock |
◆ MPadded
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::MPadded |
◆ N
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::N |
◆ NBlock
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::NBlock |
◆ NPadded
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::NPadded |
◆ StrideAs
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| std::array<index_t, NumATensor> ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::StrideAs |
◆ StrideBs
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| std::array<index_t, NumBTensor> ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::StrideBs |
◆ StrideDs
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| std::array<index_t, NumDTensor> ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::StrideDs |
◆ StrideE
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::StrideE |
◆ StrideScaleB
template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename AsDataType, typename BsDataType, typename BScaleType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, tensor_operation::device::GemmSpecialization GemmSpec, index_t BlockSize, index_t ScaleBlockN, index_t ScaleBlockK, index_t MPerBlock, index_t NPerBlock, index_t KPerBlock, index_t AK1Value, index_t BK1Value, index_t MPerWmma, index_t NPerWmma, index_t MRepeat, index_t NRepeat, typename ABlockTransferThreadClusterLengths_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, bool AThreadTransferSrcResetCoordinateAfterRun, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, bool BThreadTransferSrcResetCoordinateAfterRun, index_t BBlockLdsExtraN, index_t CShuffleMRepeatPerShuffle, index_t CShuffleNRepeatPerShuffle, typename CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, typename CDEShuffleBlockTransferScalarPerVectors, BlockGemmPipelineScheduler BlkGemmPipeSched = BlockGemmPipelineScheduler::Intrawave, BlockGemmPipelineVersion BlkGemmPipelineVer = BlockGemmPipelineVersion::v4, typename ComputeTypeA = EDataType, typename ComputeTypeB = ComputeTypeA, bool PermuteA = false, bool PermuteB = false>
| index_t ck::GridwiseGemm_wmma_cshuffle_v3_b_scale< ALayout, BLayout, DsLayout, ELayout, AsDataType, BsDataType, BScaleType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, BlockSize, ScaleBlockN, ScaleBlockK, MPerBlock, NPerBlock, KPerBlock, AK1Value, BK1Value, MPerWmma, NPerWmma, MRepeat, NRepeat, ABlockTransferThreadClusterLengths_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMRepeatPerShuffle, CShuffleNRepeatPerShuffle, CDEShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVectors, BlkGemmPipeSched, BlkGemmPipelineVer, ComputeTypeA, ComputeTypeB, PermuteA, PermuteB >::Problem::StrideScaleB |
The documentation for this struct was generated from the following file: