const auto dst_val = mtk::wmma::utils::cast<DST_TYPE>(src_val);mtk::wmma::utils::cp_async::cp_async<N>(dst_ptr, src_ptr);
mtk::wmma::utils::cp_async::commit();
mtk::wmma::utils::cp_async::wait_group<i>();
mtk::wmma::utils::cp_async::wait_all();Nis data size in byte. (4, 8, 16)