flatmm_sn_32x128x512_1x4x1_16x16x32_itl.hpp File Reference

flatmm_sn_32x128x512_1x4x1_16x16x32_itl.hpp File Reference#

Composable Kernel: flatmm_sn_32x128x512_1x4x1_16x16x32_itl.hpp File Reference
flatmm_sn_32x128x512_1x4x1_16x16x32_itl.hpp File Reference

Go to the source code of this file.

Classes

struct  ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_BF16_itl
struct  ck_tile::FlatmmSn_32x128x512_1x4x1_16x16x32_FP16_itl

Namespaces

namespace  ck_tile

Macros

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define _UK_PK_CVT_(x0_, x1_, y_)
#define _UK_ATOMIC_ADD_   "global_atomic_pk_add_bf16"
#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16
#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"
#define _UK_PK_CVT_(x0_, x1_, y_)
#define _UK_ATOMIC_ADD_   "global_atomic_pk_add_bf16"

Macro Definition Documentation

◆ _UK_ATOMIC_ADD_ [1/2]

#define _UK_ATOMIC_ADD_   "global_atomic_pk_add_bf16"

◆ _UK_ATOMIC_ADD_ [2/2]

#define _UK_ATOMIC_ADD_   "global_atomic_pk_add_bf16"

◆ _UK_MFMA_ [1/2]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_MFMA_ [2/2]

#define _UK_MFMA_   "v_mfma_f32_16x16x16_bf16"

◆ _UK_PK_CVT_ [1/2]

#define _UK_PK_CVT_ ( x0_,
x1_,
y_ )
Value:
" v_cmp_u_f32 s[36:37], " x0_ ", " x0_ " \n" \
" v_add3_u32 v50, " x0_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v54, v50, %[v_nan_hi], s[36:37] \n" \
" v_cmp_u_f32 s[36:37], " x1_ ", " x1_ " \n" \
" v_add3_u32 v50, " x1_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v55, v50, %[v_nan_hi], s[36:37] \n" \
" v_perm_b32 " y_ ", v55, v54, s52 \n"

◆ _UK_PK_CVT_ [2/2]

#define _UK_PK_CVT_ ( x0_,
x1_,
y_ )
Value:
" v_cmp_u_f32 s[36:37], " x0_ ", " x0_ " \n" \
" v_add3_u32 v50, " x0_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v54, v50, %[v_nan_hi], s[36:37] \n" \
" v_cmp_u_f32 s[36:37], " x1_ ", " x1_ " \n" \
" v_add3_u32 v50, " x1_ ", %[v_nan_lo], 1 \n" \
" v_cndmask_b32 v55, v50, %[v_nan_hi], s[36:37] \n" \
" v_perm_b32 " y_ ", v55, v54, s52 \n"

◆ CK_TILE_FLATMM_UK_MFMA [1/2]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_FP16

◆ CK_TILE_FLATMM_UK_MFMA [2/2]

#define CK_TILE_FLATMM_UK_MFMA   CK_TILE_FLATMM_UK_MFMA_BF16