gridwise_gemm_xdlops_skip_b_lds_v1.hpp File Reference#
gridwise_gemm_xdlops_skip_b_lds_v1.hpp File Reference
#include "ck/utility/common_header.hpp"#include "ck/tensor_description/multi_index_transform_helper.hpp"#include "ck/tensor_description/tensor_descriptor.hpp"#include "ck/tensor_description/tensor_descriptor_helper.hpp"#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp"#include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp"#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"Go to the source code of this file.
Namespaces | |
| namespace | ck |
Functions | |
| template<typename GridwiseGemm, typename FloatAB, typename FloatC, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, typename Block2CTileMap, bool HasMainK0BlockLoop> | |
| __global__ void | ck::kernel_gemm_xdlops_skip_b_lds_v1 (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const AGridDesc_K0_M_K1 a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 b_grid_desc_k0_n_k1, const CGridDesc_M_N c_grid_desc_m_n, const AElementwiseOperation a_element_op, const BElementwiseOperation b_element_op, const CElementwiseOperation c_element_op, const Block2CTileMap block_2_ctile_map) |