DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference

DeviceSoftmaxImpl&lt; InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize &gt; Struct Template Reference#

Composable Kernel: ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference
ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize > Struct Template Reference

#include <device_softmax_impl.hpp>

Inheritance diagram for ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >:
ck::tensor_operation::device::DeviceSoftmax< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim > ck::tensor_operation::device::BaseOperator

Classes

struct  Argument
struct  Invoker

Public Types

using GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1, 1))
using GridwiseSoftmaxGeneric
using GridwiseSoftmaxSweepOnce

Public Member Functions

bool IsSupportedArgument (const BaseArgument *p_arg) override
std::unique_ptr< BaseArgumentMakeArgumentPointer (const std::vector< index_t > inLengths, const std::vector< index_t > inStrides, const std::vector< int > reduceDims, double alpha, double beta, const void *in_dev, void *out_dev, InElementwiseOp in_elementwise_op, AccElementwiseOp acc_elementwise_op) override
std::unique_ptr< BaseInvokerMakeInvokerPointer () override
std::string GetTypeString () const override
Public Member Functions inherited from ck::tensor_operation::device::BaseOperator
 BaseOperator ()=default
 BaseOperator (const BaseOperator &)=default
BaseOperatoroperator= (const BaseOperator &)=default
virtual std::string GetInstanceString () const
virtual std::string GetTypeIdName () const
virtual std::optional< std::string > GetObjectName () const
virtual std::optional< std::string > GetTemplateInfo () const
virtual std::string GetTypeIdHashCode () const
virtual size_t GetWorkSpaceSize (const BaseArgument *) const
virtual void SetWorkSpacePointer (BaseArgument *p_arg, void *p_workspace, const StreamConfig &=StreamConfig{}) const
virtual ~BaseOperator ()

Static Public Member Functions

static auto MakeSrc2dDescriptor (const std::vector< index_t > &inLengths, const std::vector< index_t > &inStrides, int blkGroupSize, int numBlockTileIteration)
static bool IsSupportedArgument (const Argument &arg)
static auto MakeArgument (const std::vector< index_t > inLengths, const std::vector< index_t > inStrides, const std::vector< int > reduceDims, double alpha, double beta, const InDataType *in_dev, OutDataType *out_dev, InElementwiseOp in_elementwise_op, AccElementwiseOp acc_elementwise_op)
static auto MakeInvoker ()

Static Public Attributes

static constexpr index_t NumInvariantDim = Rank - NumReduceDim
static constexpr index_t NumSrcDim = Rank
static constexpr index_t NumDstDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
static constexpr bool reduceAllDim = (NumInvariantDim == 0)
static constexpr index_t M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
static constexpr index_t K_BlockTileSize = KThreadClusterSize * KThreadSliceSize

Member Typedef Documentation

◆ GridDesc_M_K

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
using ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::GridDesc_M_K = decltype(MakeSrc2dDescriptor({1}, {1}, 1, 1))

◆ GridwiseSoftmaxGeneric

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
using ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::GridwiseSoftmaxGeneric
Initial value:
OutDataType,
AccDataType,
BlockSize,
MThreadClusterSize,
KThreadClusterSize,
MThreadSliceSize,
KThreadSliceSize,
InSrcVectorDim,
InSrcVectorSize,
OutDstVectorSize,
false>
Definition gridwise_softmax.hpp:55
decltype(MakeSrc2dDescriptor({1}, {1}, 1, 1)) GridDesc_M_K
Definition device_softmax_impl.hpp:117

◆ GridwiseSoftmaxSweepOnce

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
using ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::GridwiseSoftmaxSweepOnce
Initial value:
OutDataType,
AccDataType,
BlockSize,
MThreadClusterSize,
KThreadClusterSize,
MThreadSliceSize,
KThreadSliceSize,
InSrcVectorDim,
InSrcVectorSize,
OutDstVectorSize,
true>

Member Function Documentation

◆ GetTypeString()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::string ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::GetTypeString ( ) const
inlineoverridevirtual

◆ IsSupportedArgument() [1/2]

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::IsSupportedArgument ( const Argument & arg)
inlinestatic

◆ IsSupportedArgument() [2/2]

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::IsSupportedArgument ( const BaseArgument * p_arg)
inlineoverridevirtual

◆ MakeArgument()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
auto ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeArgument ( const std::vector< index_t > inLengths,
const std::vector< index_t > inStrides,
const std::vector< int > reduceDims,
double alpha,
double beta,
const InDataType * in_dev,
OutDataType * out_dev,
InElementwiseOp in_elementwise_op,
AccElementwiseOp acc_elementwise_op )
inlinestatic

◆ MakeArgumentPointer()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::unique_ptr< BaseArgument > ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeArgumentPointer ( const std::vector< index_t > inLengths,
const std::vector< index_t > inStrides,
const std::vector< int > reduceDims,
double alpha,
double beta,
const void * in_dev,
void * out_dev,
InElementwiseOp in_elementwise_op,
AccElementwiseOp acc_elementwise_op )
inlineoverridevirtual

◆ MakeInvoker()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
auto ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeInvoker ( )
inlinestatic

◆ MakeInvokerPointer()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
std::unique_ptr< BaseInvoker > ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeInvokerPointer ( )
inlineoverridevirtual

◆ MakeSrc2dDescriptor()

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
auto ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::MakeSrc2dDescriptor ( const std::vector< index_t > & inLengths,
const std::vector< index_t > & inStrides,
int blkGroupSize,
int numBlockTileIteration )
inlinestatic

Member Data Documentation

◆ K_BlockTileSize

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::K_BlockTileSize = KThreadClusterSize * KThreadSliceSize
staticconstexpr

◆ M_BlockTileSize

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::M_BlockTileSize = MThreadClusterSize * MThreadSliceSize
staticconstexpr

◆ NumDstDim

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumDstDim = (NumInvariantDim == 0) ? 1 : NumInvariantDim
staticconstexpr

◆ NumInvariantDim

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumInvariantDim = Rank - NumReduceDim
staticconstexpr

◆ NumSrcDim

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
index_t ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::NumSrcDim = Rank
staticconstexpr

◆ reduceAllDim

template<typename InDataType, typename AccDataType, typename OutDataType, typename InElementwiseOp, typename AccElementwiseOp, index_t Rank, index_t NumReduceDim, index_t BlockSize, index_t MThreadClusterSize, index_t KThreadClusterSize, index_t MThreadSliceSize, index_t KThreadSliceSize, index_t InSrcVectorDim, index_t InSrcVectorSize, index_t OutDstVectorSize>
bool ck::tensor_operation::device::DeviceSoftmaxImpl< InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize >::reduceAllDim = (NumInvariantDim == 0)
staticconstexpr

The documentation for this struct was generated from the following file: