// Copyright © 2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

// clang-format off
#pragma once

#include <aotriton/config.h>
#include <aotriton/dtypes.h>
#include <aotriton/util.h>
#include <aotriton/runtime.h>
#include <aotriton/_internal/lazy_tensor_internal.h>
#include <aotriton/flash.h>
#include <functional>
#include <string>
#include <vector>

namespace AOTRITON_NS::v3::flash {

// Unlike KernelDescription, Operator must have its own parameter class
struct OpAttnBwdParams {
    const TensorView<4>*   Q;
    const TensorView<4>*   K;
    const TensorView<4>*   V;
    const TensorView<4>*   B;
    float                  sm_scale;
    const TensorView<4>*   Out;
    const TensorView<4>*   DO;
    const TensorView<4>*   DK;
    const TensorView<4>*   DV;
    const TensorView<4>*   DQ;
    const TensorView<4>*   DB;
    LazyTensorInternal<4>* DQ_ACC;
    const TensorView<2>*   L;
    LazyTensorInternal<2>* D;
    int32_t                num_head_q;
    int32_t                num_head_k;
    const TensorView<1>*   cu_seqlens_q;
    const TensorView<1>*   cu_seqlens_k;
    int32_t                num_seqlens;
    int32_t                max_seqlen_q;
    int32_t                max_seqlen_k;
    int32_t                head_dim;
    float                  dropout_p;
    const TensorView<0>*   philox_seed_ptr;
    const TensorView<0>*   philox_offset1;
    uint64_t               philox_offset2;
    int32_t                Window_left;
    int32_t                Window_right;
    int16_t                BLOCK_DMODEL;
    int8_t                 CAUSAL_TYPE;
    bool                   ENABLE_DROPOUT;
    bool                   PADDED_HEAD;
    int8_t                 BIAS_TYPE;
};

struct OpAttnBwdContext {
    OpAttnBwdParams *params = nullptr;
    const attn_options *call_options = nullptr;
    enum BackendEnum : int32_t {
        None = -1,
        kMetro_TritonSplit = 0,
        kShim_BwdKernelFuse = 1,
        kMetro_AiterAsm = 2,
        Max = 3
    };
    static constexpr BackendEnum fallback_backend = kMetro_TritonSplit;
    BackendEnum backend_index = BackendEnum::None;
    bool disable_fallback = false;

#if AOTRITON_BUILD_FOR_TUNING
    int _has_preferred_backend = -1;
    static constexpr int _total_number_of_backends = BackendEnum::Max;
    const char* _backend_name = nullptr;
#endif

    // One more layer of dispatcher of functionals is added due to
    // 1. Individual kernel may use fewer arguments
    // 2. Metro kernel needs overall performance numbers over individual kernels.
    // 3. Even metro kernel only has one kernel, another set LUT is need to
    //    determine which metro kernel (or backend) need to be used
    int64_t godel_number() const;
    static std::tuple<int, int> get_archmod_number(Gpu gpu);
    static constexpr int kMaxGodelNumber = 576;

    hipError_t lookup_optimal(Gpu gpu);
    // Unlike Triton kernel, Operator's launch need gpu argument to eventually
    // call backend's lookup_optimal
    hipError_t launch(Gpu gpu, hipStream_t stream) const;
private:
    typedef void (*OpTuneTableEntry)(OpAttnBwdContext& context, int mod_number);
    static OpTuneTableEntry optune_table[][ kMaxGodelNumber ];

    typedef hipError_t (*BackendLauncher)(const OpAttnBwdContext& context,
                                          Gpu gpu,
                                          hipStream_t stream);
    static BackendLauncher launcher_table[ BackendEnum::Max ];
};

namespace optune {

extern int op_attn_bwd__lut_lambda__0(const OpAttnBwdParams& params, int mod_number, int8_t lut[1][10][10]);

void Optune_op_attn_bwd__A0__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F8(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F24(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F200(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F492(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A0__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F560(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F561(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F564(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F565(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F568(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A9__F572(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F0(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F1(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F4(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F5(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F12(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F16(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F17(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F20(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F21(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F28(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F32(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F33(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F36(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F37(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F40(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F44(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F48(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F49(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F52(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F53(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F56(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F60(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F64(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F65(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F68(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F69(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F72(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F76(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F80(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F81(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F84(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F85(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F88(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F92(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F96(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F97(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F100(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F101(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F104(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F108(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F112(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F113(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F116(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F117(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F120(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F124(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F128(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F129(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F132(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F133(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F136(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F140(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F144(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F145(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F148(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F149(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F152(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F156(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F160(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F161(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F164(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F165(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F168(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F172(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F192(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F193(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F196(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F197(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F204(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F208(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F209(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F212(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F213(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F216(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F220(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F224(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F225(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F228(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F229(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F232(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F236(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F240(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F241(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F244(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F245(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F248(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F252(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F256(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F257(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F260(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F261(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F264(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F268(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F272(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F273(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F276(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F277(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F280(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F284(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F288(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F289(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F292(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F293(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F296(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F300(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F304(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F305(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F308(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F309(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F312(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F316(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F320(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F321(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F324(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F325(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F328(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F332(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F336(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F337(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F340(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F341(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F344(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F348(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F352(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F353(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F356(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F357(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F360(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F364(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F384(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F385(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F388(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F389(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F392(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F396(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F400(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F401(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F404(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F405(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F408(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F412(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F416(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F417(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F420(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F421(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F424(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F428(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F432(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F433(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F436(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F437(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F440(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F444(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F448(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F449(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F452(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F453(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F456(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F460(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F464(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F465(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F468(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F469(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F472(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F476(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F480(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F481(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F484(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F485(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F488(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F496(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F497(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F500(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F501(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F504(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F508(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F512(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F513(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F516(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F517(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F520(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F524(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F528(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F529(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F532(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F533(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F536(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F540(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F544(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F545(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F548(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F549(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F552(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F556(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F560(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F561(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F564(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F565(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F568(OpAttnBwdContext& params, int mod_number);
void Optune_op_attn_bwd__A10__F572(OpAttnBwdContext& params, int mod_number);

}

}

// vim: set fileencoding=utf-8

