thread_group_tensor_slice_transfer_gather_direct_load.hpp Source File#
thread_group_tensor_slice_transfer_gather_direct_load.hpp
Go to the documentation of this file.
Definition ck.hpp:268
__host__ __device__ constexpr auto make_multi_index(Xs &&... xs)
Definition array_multi_index.hpp:15
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13
__host__ __device__ constexpr auto generate_sequence_v2(F &&f, Number< N >)
Definition sequence_helper.hpp:25
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
typename remove_reference< T >::type remove_reference_t
Definition type.hpp:292
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
__host__ __device__ constexpr const TData & At(index_t i) const
Definition utility/array.hpp:22
__device__ void ResetDstSliceWindow(const DstDesc &dst_desc)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:232
static constexpr auto I0
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:67
__device__ void SetDstSliceOrigin(const DstDesc &dst_desc, const Index &dst_slice_origin_idx)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:226
decltype(make_tensor_coordinate(SrcDesc{}, Index{})) SrcCoord
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:61
decltype(make_tensor_coordinate_step(SrcDesc{}, Index{})) SrcCoordStep
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:64
static constexpr index_t gather_num
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:79
__device__ constexpr ThreadGroupTensorSliceTransfer_Gather_DirectLoad(const SrcDesc &src_desc, const Index &src_block_slice_origin, const DstDesc &dst_desc, const Index &dst_block_slice_origin, const StaticallyIndexedArray< IndexType, gather_num > &gather_offsets)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:135
static constexpr auto thread_cluster_lengths
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:71
decltype(make_tensor_coordinate(DstDesc{}, Index{})) DstCoord
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:62
static constexpr auto thread_steps
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:77
static constexpr auto thread_single_load_size
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:73
static __device__ constexpr bool AreThreadClusterLengthsValid()
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:81
__device__ void SetSrcSliceOrigin(const SrcDesc &src_desc, const Index &src_slice_origin_idx)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:209
decltype(make_tensor_coordinate_step(DstDesc{}, Index{})) DstCoordStep
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:65
__device__ void Run(const SrcDesc &src_desc, const SrcBuffer &src_buf, const DstDesc &dst_desc, DstBuffer &dst_buf)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:238
__device__ auto generate_steps(const DescType &desc, int sign)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:372
MultiIndex< nDim > Index
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:59
static constexpr index_t nDim
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:58
static constexpr auto block_slice_lengths
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:70
__device__ void MoveSrcSliceWindow(const SrcDesc &src_desc, const Index &step)
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:365
static constexpr auto I1
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:68
static constexpr auto thread_slice_lengths
Definition thread_group_tensor_slice_transfer_gather_direct_load.hpp:78
Definition threadwise_tensor_slice_transfer_util.hpp:20
Definition functional2.hpp:33
Definition functional3.hpp:97