block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp Source File#
block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp
Go to the documentation of this file.
308 // leave some exclusive space so that the second v_lds buffer will nenver overlap with the first
Definition tile/core/algorithm/cluster_descriptor.hpp:13
typename impl::WarpGemmDispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition warp_gemm_dispatcher.hpp:182
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_HOST_DEVICE constexpr auto make_naive_tensor_descriptor(const tuple< Lengths... > &lengths, const tuple< Strides... > &strides, number< GuaranteedLastDimensionVectorLength >=number<-1 >{}, number< GuaranteedLastDimensionVectorStride >=number<-1 >{})
Definition tile/core/tensor/tensor_descriptor.hpp:274
CK_TILE_HOST_DEVICE constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition coordinate_transform.hpp:1615
CK_TILE_HOST_DEVICE constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldTopIdss, NewUpperDimensionNewTopIdss)
Definition tile/core/tensor/tensor_descriptor.hpp:203
CK_TILE_HOST_DEVICE constexpr auto integer_least_multiple(X x, Y y)
Definition tile/core/numeric/math.hpp:155
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
WarpGemmImpl< WarpGemmAttributeMfmaIterateKAndTransposedCDistribution_SwizzleB< WarpGemmAttributeMfmaImpl_f32_32x32x16_fp8_fp8< WGAttrCtlEnum::Default_ >, 2, swizzle_factor > > WarpGemmMfmaFp8Fp8F32M32N32K32SwizzleBTransposedCDistribution
Definition warp_gemm.hpp:394
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:16
static CK_TILE_HOST_DEVICE constexpr auto MakeKDramTileDistribution()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:106
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSizeK()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:346
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t IsPreloadWholeNextIterationK()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:20
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetNumVLdsBuffers()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:45
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t IsFirstKLdsBufferOverlapLastVLdsBuffer()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:325
static constexpr index_t NumPrefetchV
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:17
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSizeV()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:353
static CK_TILE_DEVICE constexpr auto MakeVDramTileDistribution()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:183
static CK_TILE_HOST_DEVICE constexpr auto MakeVLdsBlockDescriptor()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:137
static CK_TILE_HOST_DEVICE constexpr auto GetSmemKPackK()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:61
static CK_TILE_HOST_DEVICE constexpr auto MakeKLdsBlockDescriptor()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:68
static CK_TILE_HOST_DEVICE constexpr auto GetQKBlockGemm()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:252
static CK_TILE_DEVICE constexpr auto GetNumKLdsBuffers()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:26
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetExclusiveKLdsBytes()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:311
static CK_TILE_HOST_DEVICE constexpr ck_tile::index_t GetSmemSize()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:360
static CK_TILE_HOST_DEVICE constexpr auto MakeQRegTileDistribution()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:51
static CK_TILE_DEVICE constexpr auto GetNumPrefetchV()
Definition block_fmha_pipeline_qr_ks_vs_whole_k_prefetch_default_policy.hpp:32
Definition block_fmha_pipeline_qx_ks_vs_custom_policy.hpp:266
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentK()
Definition block_fmha_pipeline_qx_ks_vs_custom_policy.hpp:346
static CK_TILE_HOST_DEVICE constexpr std::enable_if_t< std::is_convertible_v< decltype(Problem::kHasDropout), bool >, ck_tile::index_t > GetSmemSizeDropout(int)
Definition block_fmha_pipeline_qx_ks_vs_custom_policy.hpp:687
static CK_TILE_HOST_DEVICE constexpr auto GetAlignmentV()
Definition block_fmha_pipeline_qx_ks_vs_custom_policy.hpp:388
static CK_TILE_HOST_DEVICE constexpr auto GetSmemKPackV()
Definition block_fmha_pipeline_qx_ks_vs_custom_policy.hpp:373
Definition block_gemm_areg_bsmem_creg_one_warp_v1.hpp:16
Definition block_gemm_areg_bsmem_creg_v2_custom_policy.hpp:16
Definition block_gemm_areg_bsmem_creg_v2.hpp:16
Definition block_gemm_problem.hpp:18
Definition tile_gemm_shape.hpp:17
Definition tile/core/container/sequence.hpp:49
Definition tile_distribution_encoding.hpp:26
Definition tile/core/container/tuple.hpp:192