25template <
typename Src0Data,
33 typename ElementwiseOperation,
34 typename SliceLengths,
35 typename DimAccessOrder,
39 bool Src0ResetCoordinateAfterRun,
40 bool Src1ResetCoordinateAfterRun,
41 bool Src2ResetCoordinateAfterRun,
42 bool DstResetCoordinateAfterRun>
57 const Index& src0_slice_origin,
58 const Src1Desc& src1_desc,
59 const Index& src1_slice_origin,
60 const Src2Desc& src2_desc,
61 const Index& src2_slice_origin,
62 const DstDesc& dst_desc,
63 const Index& dst_slice_origin,
64 const ElementwiseOperation& element_op)
69 element_op_(element_op)
72 "wrong! cannot evenly divide");
76 const Index& src0_slice_origin_idx)
82 const Index& src1_slice_origin_idx)
88 const Index& src2_slice_origin_idx)
98 template <
typename Src0Buffer,
typename Src1Buffer,
typename Src2Buffer,
typename DstBuffer>
99 __device__
void Run(
const Src0Desc& src0_desc,
100 const Src0Buffer& src0_buf,
101 const Src1Desc& src1_desc,
102 const Src1Buffer& src1_buf,
103 const Src2Desc& src2_desc,
104 const Src2Buffer& src2_buf,
105 const DstDesc& dst_desc,
122 using src0_vector_t =
typename src0_vector_type::type;
125 using src1_vector_t =
typename src1_vector_type::type;
128 using src2_vector_t =
typename src2_vector_type::type;
131 using dst_vector_t =
typename dst_vector_type::type;
133 const bool is_src0_valid =
136 const bool is_src1_valid =
139 const bool is_src2_valid =
143 auto src0_vector_container = src0_vector_type{
144 src0_buf.template Get<src0_vector_t>(src0_coord_.GetOffset(), is_src0_valid)};
146 auto src1_vector_container = src1_vector_type{
147 src1_buf.template Get<src1_vector_t>(src1_coord_.GetOffset(), is_src1_valid)};
149 auto src2_vector_container = src2_vector_type{
150 src2_buf.template Get<src2_vector_t>(src2_coord_.GetOffset(), is_src2_valid)};
152 auto dst_vector_container = dst_vector_type{};
156 element_op_(dst_vector_container.template AsType<DstData>()(i),
157 src0_vector_container.template AsType<Src0Data>()[i],
158 src1_vector_container.template AsType<Src1Data>()[i],
159 src2_vector_container.template AsType<Src2Data>()[i]);
162 const bool is_dst_valid =
165 dst_buf.template Update<DstInMemOp, dst_vector_t>(
166 dst_coord_.GetOffset(),
168 dst_vector_container.template AsType<dst_vector_t>()[
I0]);
171 if constexpr(idx_1d.value != num_access - 1)
186 if constexpr(Src0ResetCoordinateAfterRun)
188 const auto src0_reset_step =
194 if constexpr(Src1ResetCoordinateAfterRun)
196 const auto src1_reset_step =
202 if constexpr(Src2ResetCoordinateAfterRun)
204 const auto src2_reset_step =
210 if constexpr(DstResetCoordinateAfterRun)
212 const auto dst_reset_step =
229 if constexpr(num_access == 0)
235 constexpr auto reset_step =
244 const Index& src0_slice_origin_step_idx)
247 const auto adjusted_step_idx = Src0ResetCoordinateAfterRun
248 ? src0_slice_origin_step_idx
259 const Index& src1_slice_origin_step_idx)
262 const auto adjusted_step_idx = Src1ResetCoordinateAfterRun
263 ? src1_slice_origin_step_idx
274 const Index& src2_slice_origin_step_idx)
277 const auto adjusted_step_idx = Src2ResetCoordinateAfterRun
278 ? src2_slice_origin_step_idx
289 const Index& dst_slice_origin_step_idx)
292 const auto adjusted_step_idx = DstResetCoordinateAfterRun
293 ? dst_slice_origin_step_idx
307 const ElementwiseOperation element_op_;
int32_t index_t
Definition ck.hpp:299
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
InMemoryDataOperationEnum
Definition ck.hpp:277
__host__ __device__ constexpr bool coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:560
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
typename remove_cv< T >::type remove_cv_t
Definition type.hpp:295
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
Array< index_t, N > MultiIndex
Definition array_multi_index.hpp:12
typename vector_type_maker< T, N >::type vector_type_maker_t
Definition dtype_vector.hpp:54
Definition tensor_space_filling_curve.hpp:20
static __device__ __host__ constexpr auto GetStepBetween(Number< AccessIdx1dBegin >, Number< AccessIdx1dEnd >)
Definition tensor_space_filling_curve.hpp:52
__host__ static __device__ constexpr index_t GetNumOfAccess()
Definition tensor_space_filling_curve.hpp:41
static __device__ __host__ constexpr auto GetForwardStep(Number< AccessIdx1d >)
Definition tensor_space_filling_curve.hpp:66
MultiIndex< nDim > Index
Definition tensor_space_filling_curve.hpp:23
__device__ void MoveSrc0SliceWindow(const Src0Desc &src0_desc, const Index &src0_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:243
__device__ void MoveSrc1SliceWindow(const Src1Desc &src1_desc, const Index &src1_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:258
__device__ void SetDstSliceOrigin(const DstDesc &dst_desc, const Index &dst_slice_origin_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:93
__device__ void SetSrc0SliceOrigin(const Src0Desc &src0_desc, const Index &src0_slice_origin_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:75
__device__ void MoveDstSliceWindow(const DstDesc &dst_desc, const Index &dst_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:288
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Src0Coord decltype(make_tensor_coordinate(Src0Desc{}, Index{})) Src0Coord
Definition threadwise_tensor_slice_transfer_v6r3.hpp:49
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Src2Coord decltype(make_tensor_coordinate(Src2Desc{}, Index{})) Src2Coord
Definition threadwise_tensor_slice_transfer_v6r3.hpp:51
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::I0 static constexpr auto I0
Definition threadwise_tensor_slice_transfer_v6r3.hpp:54
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::DstCoord decltype(make_tensor_coordinate(DstDesc{}, Index{})) DstCoord
Definition threadwise_tensor_slice_transfer_v6r3.hpp:52
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::nDim static constexpr index_t nDim
Definition threadwise_tensor_slice_transfer_v6r3.hpp:45
__device__ constexpr ThreadwiseTensorSliceTransfer_v6r3(const Src0Desc &src0_desc, const Index &src0_slice_origin, const Src1Desc &src1_desc, const Index &src1_slice_origin, const Src2Desc &src2_desc, const Index &src2_slice_origin, const DstDesc &dst_desc, const Index &dst_slice_origin, const ElementwiseOperation &element_op)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:56
static __device__ constexpr auto GetCoordinateResetStep()
Definition threadwise_tensor_slice_transfer_v6r3.hpp:219
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Src1Coord decltype(make_tensor_coordinate(Src1Desc{}, Index{})) Src1Coord
Definition threadwise_tensor_slice_transfer_v6r3.hpp:50
__device__ void MoveSrc2SliceWindow(const Src2Desc &src2_desc, const Index &src2_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:273
__device__ void SetSrc2SliceOrigin(const Src2Desc &src2_desc, const Index &src2_slice_origin_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:87
ck::ThreadwiseTensorSliceTransfer_v6r3< Src0Data, Src1Data, Src2Data, DstData, Src0Desc, Src1Desc, Src2Desc, DstDesc, ElementwiseOperation, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, DstInMemOp, ThreadTransferSrc0ResetCoordinateAfterRun, ThreadTransferSrc1ResetCoordinateAfterRun, ThreadTransferSrc2ResetCoordinateAfterRun, ThreadTransferDstResetCoordinateAfterRun >::Index MultiIndex< nDim > Index
Definition threadwise_tensor_slice_transfer_v6r3.hpp:47
__device__ void SetSrc1SliceOrigin(const Src1Desc &src1_desc, const Index &src1_slice_origin_idx)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:81
__device__ void Run(const Src0Desc &src0_desc, const Src0Buffer &src0_buf, const Src1Desc &src1_desc, const Src1Buffer &src1_buf, const Src2Desc &src2_desc, const Src2Buffer &src2_buf, const DstDesc &dst_desc, DstBuffer &dst_buf)
Definition threadwise_tensor_slice_transfer_v6r3.hpp:99
Definition threadwise_tensor_slice_transfer_util.hpp:20
Definition functional2.hpp:33