#include <thread_group_tensor_slice_transfer_v6r2.hpp>
|
| __device__ constexpr | ThreadGroupTensorSliceTransfer_v6r2 (const Src0Desc &src0_desc, const Index &src0_block_slice_origin, const Src1Desc &src1_desc, const Index &src1_block_slice_origin, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const ElementwiseOperation &element_op) |
| template<typename Src0Buffer, typename Src1Buffer, typename DstBuffer> |
| __device__ void | Run (const Src0Desc &src0_desc, const Src0Buffer &src0_buf, const Src1Desc &src1_desc, const Src1Buffer &src1_buf, const DstDesc &dst_desc, DstBuffer &dst_buf) |
| __device__ void | MoveSrc0SliceWindow (const Src0Desc &src0_desc, const Index &step) |
| __device__ void | MoveSrc1SliceWindow (const Src1Desc &src1_desc, const Index &step) |
| __device__ void | MoveDstSliceWindow (const DstDesc &dst_desc, const Index &step) |
◆ Index
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| using ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Index = MultiIndex<nDim> |
◆ ThreadGroupTensorSliceTransfer_v6r2()
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| __device__ constexpr ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::ThreadGroupTensorSliceTransfer_v6r2 |
( |
const Src0Desc & | src0_desc, |
|
|
const Index & | src0_block_slice_origin, |
|
|
const Src1Desc & | src1_desc, |
|
|
const Index & | src1_block_slice_origin, |
|
|
const DstDesc & | dst_desc, |
|
|
const Index & | dst_block_slice_origin, |
|
|
const ElementwiseOperation & | element_op ) |
|
inlineconstexpr |
◆ MoveDstSliceWindow()
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| __device__ void ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveDstSliceWindow |
( |
const DstDesc & | dst_desc, |
|
|
const Index & | step ) |
|
inline |
◆ MoveSrc0SliceWindow()
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| __device__ void ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveSrc0SliceWindow |
( |
const Src0Desc & | src0_desc, |
|
|
const Index & | step ) |
|
inline |
◆ MoveSrc1SliceWindow()
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| __device__ void ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::MoveSrc1SliceWindow |
( |
const Src1Desc & | src1_desc, |
|
|
const Index & | step ) |
|
inline |
◆ Run()
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
template<typename Src0Buffer, typename Src1Buffer, typename DstBuffer>
| __device__ void ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Run |
( |
const Src0Desc & | src0_desc, |
|
|
const Src0Buffer & | src0_buf, |
|
|
const Src1Desc & | src1_desc, |
|
|
const Src1Buffer & | src1_buf, |
|
|
const DstDesc & | dst_desc, |
|
|
DstBuffer & | dst_buf ) |
|
inline |
◆ nDim
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| index_t ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::nDim = remove_reference_t<Src0Desc>::GetNumOfDimension() |
|
staticconstexpr |
◆ thread_slice_lengths
template<typename ThreadGroup, typename ElementwiseOperation,
InMemoryDataOperationEnum DstInMemOp, typename SliceLengths, typename ThreadClusterLengths, typename ThreadClusterArrangeOrder, typename Src0Data, typename Src1Data, typename DstData, typename Src0Desc, typename Src1Desc, typename DstDesc, typename DimAccessOrder,
index_t VectorDim,
index_t ScalarPerVector, bool ThreadTransferSrc0ResetCoordinateAfterRun, bool ThreadTransferSrc1ResetCoordinateAfterRun, bool ThreadTransferDstResetCoordinateAfterRun>
| auto ck::ThreadGroupTensorSliceTransfer_v6r2< ThreadGroup, ElementwiseOperation, DstInMemOp, SliceLengths, ThreadClusterLengths, ThreadClusterArrangeOrder, Src0Data, Src1Data, DstData, Src0Desc, Src1Desc, DstDesc, DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::thread_slice_lengths = SliceLengths{} / ThreadClusterLengths{} |
|
staticconstexpr |
The documentation for this struct was generated from the following file: