gridwise_gemm_xdlops_v2r4.hpp File Reference#
gridwise_gemm_xdlops_v2r4.hpp File Reference
#include "ck/utility/common_header.hpp"#include "ck/tensor_description/multi_index_transform_helper.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp"#include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp"#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
Functions | |
| template<typename GridwiseGemm, typename FloatAB, typename FloatC, typename ABK0MK1GridDesc, typename BBK0NK1GridDesc, typename CM0N0M1N1M2M3M4N2GridDesc, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, typename CBlockClusterAdaptor, bool HasMainKBlockLoop> | |
| __global__ void | ck::kernel_gemm_xdlops_v2r4 (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const ABK0MK1GridDesc a_b_k0_m_k1_grid_desc, const BBK0NK1GridDesc b_b_k0_n_k1_grid_desc, const CM0N0M1N1M2M3M4N2GridDesc c_m0_n0_m1_n1_m2_m3_m4_n2_grid_desc, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const CBlockClusterAdaptor c_block_cluster_adaptor) |