GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce > Struct Template Reference#
ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce > Struct Template Reference
#include <gridwise_normalization_bwd_data.hpp>
Public Types | |
| using | ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
| using | DYThreadBufferDimAccessOrder |
| using | XThreadBufferDimAccessOrder |
| using | GammaThreadBufferDimAccessOrder |
| using | MeanInvStdThreadBufferDimAccessOrder |
| using | DXThreadBufferDimAccessOrder |
| using | ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder |
| using | ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize> |
| using | PassThroughOp = tensor_operation::element_wise::PassThrough |
| using | BlockwiseSumReduce |
Static Public Member Functions | |
| static __device__ void | Run (const GridDesc_M_K &dy_grid_desc_m_k, const GridDesc_M_K &x_grid_desc_m_k, const GridDesc_M_K &gamma_grid_desc_m_k, const GridDesc_M_K &mean_grid_desc_m_k, const GridDesc_M_K &inv_std_grid_desc_m_k, const GridDesc_M_K &dx_grid_desc_m_k, index_t num_k_block_tile_iteration, const DYDataType *const __restrict__ p_dy_global, const XDataType *const __restrict__ p_x_global, const GammaDataType *const __restrict__ p_gamma_global, const MeanInvStdDataType *const __restrict__ p_mean_global, const MeanInvStdDataType *const __restrict__ p_inv_std_global, DXDataType *const __restrict__ p_dx_global) |
Static Public Attributes | |
| static constexpr auto | thread_cluster_desc |
| static constexpr auto | thread_buffer_desc_m_k |
| static constexpr auto | thread_buffer_desc_m |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr index_t | M_BlockTileSize = MThreadClusterSize * MThreadSliceSize |
| static constexpr index_t | K_BlockTileSize = KThreadClusterSize * KThreadSliceSize |
Member Typedef Documentation
◆ BlockwiseSumReduce
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::BlockwiseSumReduce |
Initial value:
PartitionedBlockwiseReduction<ComputeDataType,
BlockSize,
true>
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition blockwise_softmax.hpp:69
DYThreadBufferDimAccessOrder ThreadClusterArrangeOrder
Definition gridwise_normalization_bwd_data.hpp:86
Definition reduction_functions_blockwise.hpp:28
Definition reduction_operator.hpp:37
◆ DXThreadBufferDimAccessOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::DXThreadBufferDimAccessOrder |
Initial value:
Definition utility/sequence.hpp:43
Definition utility/functional.hpp:100
◆ DYThreadBufferDimAccessOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::DYThreadBufferDimAccessOrder |
Initial value:
◆ GammaThreadBufferDimAccessOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::GammaThreadBufferDimAccessOrder |
Initial value:
◆ MeanInvStdThreadBufferDimAccessOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::MeanInvStdThreadBufferDimAccessOrder |
Initial value:
◆ PassThroughOp
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::PassThroughOp = tensor_operation::element_wise::PassThrough |
◆ ThreadBufferLengths_M_K
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadBufferLengths_M_K = Sequence<MThreadSliceSize, KThreadSliceSize> |
◆ ThreadClusterArrangeOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadClusterArrangeOrder = DYThreadBufferDimAccessOrder |
◆ ThreadClusterLengths_M_K
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::ThreadClusterLengths_M_K = Sequence<MThreadClusterSize, KThreadClusterSize> |
◆ XThreadBufferDimAccessOrder
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
| using ck::GridwiseNormalizationBwdData_mk_to_mk< DYDataType, XDataType, GammaDataType, MeanInvStdDataType, ComputeDataType, DXDataType, GridDesc_M_K, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, DYSrcVectorDim, DYSrcVectorSize, XSrcVectorDim, XSrcVectorSize, GammaSrcVectorDim, GammaSrcVectorSize, MeanInvStdSrcVectorDim, MeanInvStdSrcVectorSize, DXDstVectorDim, DXDstVectorSize, SweepOnce >::XThreadBufferDimAccessOrder |
Initial value:
Member Function Documentation
◆ Run()
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
inlinestatic |
Member Data Documentation
◆ I0
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ I1
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ I2
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ K_BlockTileSize
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ M_BlockTileSize
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
◆ thread_buffer_desc_m
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
Initial value:
=
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
◆ thread_buffer_desc_m_k
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
Initial value:
◆ thread_cluster_desc
template<typename DYDataType, typename XDataType, typename GammaDataType, typename MeanInvStdDataType, typename ComputeDataType, typename DXDataType, typename GridDesc_M_K, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t DYSrcVectorDim, index_t DYSrcVectorSize, index_t XSrcVectorDim, index_t XSrcVectorSize, index_t GammaSrcVectorDim, index_t GammaSrcVectorSize, index_t MeanInvStdSrcVectorDim, index_t MeanInvStdSrcVectorSize, index_t DXDstVectorDim, index_t DXDstVectorSize, bool SweepOnce>
|
staticconstexpr |
Initial value:
=
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
Sequence< MThreadClusterSize, KThreadClusterSize > ThreadClusterLengths_M_K
Definition gridwise_normalization_bwd_data.hpp:73
The documentation for this struct was generated from the following file: