/*
 * Main configuration header of the CPU dispatcher.
 *
 * This header is autogenerated by the Meson build script located at `meson_cpu/meson.build`.
 * It provides a set of utilities that are required for the runtime dispatching process.
 *
 * The most important macros in this header are:
 *   - @ref @P@CPU_DISPATCH_DECLARE: Used to declare the dispatched functions and variables.
 *   - @ref @P@CPU_DISPATCH_CURFX: Used to define the dispatched functions with target-specific suffixes.
 *   - @ref @P@CPU_DISPATCH_CALL: Used for runtime dispatching of the exported functions and variables.
 */
#ifndef @P@_CPU_DISPATCHER_CONF_H_
#define @P@_CPU_DISPATCHER_CONF_H_
/// This definition is required to provide compatibility with NumPy distutils
#define @P@_CPU_MESON_BUILD
/**
 * @def @P@WITH_CPU_BASELINE
 * Enabled baseline features names as a single string where each is separated by a single space.
 * For example: "SSE SSE2 SSE3"
 * Required for logging purposes only.
 */
#define @P@WITH_CPU_BASELINE "@WITH_CPU_BASELINE@"
/**
 * @def @P@WITH_CPU_BASELINE_N
 * Number of enabled baseline features.
 */
#define @P@WITH_CPU_BASELINE_N @WITH_CPU_BASELINE_N@
/**
 * @def @P@WITH_CPU_DISPATCH
 * Dispatched features names as a single string where each is separated by a single space.
 */
#define @P@WITH_CPU_DISPATCH "@WITH_CPU_DISPATCH@"
/**
 * @def @P@WITH_CPU_DISPATCH_N
 * Number of enabled dispatched features.
 */
#define @P@WITH_CPU_DISPATCH_N @WITH_CPU_DISPATCH_N@
// Expand a macro, used by the following macros
#define @P@_CPU_EXPAND(X) X
#define @P@_CPU_CAT__(a, b) a ## b
#define @P@_CPU_CAT_(a, b) @P@_CPU_CAT__(a, b)
#define @P@_CPU_CAT(a, b) @P@_CPU_CAT_(a, b)
#define @P@_CPU_STRINGIFY(x) #x
#define @P@_CPU_TOSTRING(x) @P@_CPU_STRINGIFY(x)

/**
 * @def @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...)
 * Call each enabled baseline feature sorted by lowest interest
 * using preprocessor callback without testing whether the
 * feature is supported by CPU or not.
 *
 * Required for logging purposes only, for example, generating
 * a Python list to hold the information of the enabled features.
 *
 * Unwrapped Version:
 * @code
 * #define @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...) \
 *     @P@_CPU_EXPAND(EXEC_CB(SSE, __VA_ARGS__))   \
 *     @P@_CPU_EXPAND(EXEC_CB(SSE2, __VA_ARGS__))  \
 *     @P@_CPU_EXPAND(EXEC_CB(SSE3, __VA_ARGS__))
 * @endcode
 *
 * @param EXEC_CB The preprocessor callback to be called for each enabled baseline feature.
 * @param ... Additional arguments to be passed to the preprocessor callback.
 */
#define @P@WITH_CPU_BASELINE_CALL(EXEC_CB, ...) \
@WITH_CPU_BASELINE_CALL@

/**
 * @def @P@WITH_CPU_DISPATCH_CALL(EXEC_CB, ...)
 * Similar to the above but for enabled dispatched features.
 *
 * @param EXEC_CB The preprocessor callback to be called for each enabled dispatched feature.
 * @param ... Additional arguments to be passed to the preprocessor callback.
 */
#define @P@WITH_CPU_DISPATCH_CALL(EXEC_CB, ...) \
@WITH_CPU_DISPATCH_CALL@
/*
 * Defines the default behavior for the configurable macros derived from the configuration header
 * that is generated by the meson function `mod_features.multi_targets()`.
 *
 * Note: Providing fallback in case of optimization disabled is no longer needed for meson
 * since we always guarantee having configuration headers.
 *
 * However, it is still needed for compatibility with Numpy distutils.
 */
#ifndef @P@DISABLE_OPTIMIZATION
    #define @P@MTARGETS_CONF_BASELINE(CB, ...) \
         &&"Expected config header that generated by mod_features.multi_targets()";
    #define @P@MTARGETS_CONF_DISPATCH(TEST_FEATURE_CB, CB, ...) \
         &&"Expected config header that generated by mod_features.multi_targets()";
#else
    #define @P@MTARGETS_CONF_BASELINE(CB, ...) @P@_CPU_EXPAND(CB(__VA_ARGS__))
    #define @P@MTARGETS_CONF_DISPATCH(CHK, CB, ...)
#endif
/**
 * @def @P@CPU_DISPATCH_CURFX(NAME)
 *
 * Returns `NAME` suffixed with "_" + "the current target" during compiling
 * the generated static libraries that are derived from the Meson function
 * `mod_features.multi_targets()`.
 *
 * It also returns `NAME` as-is without any suffix when it comes to the baseline features or
 * in case if the optimization is disabled.
 *
 * Note: `mod_features.multi_targets()` provides a unique target name within the compiler #definition
 * called `@P@MTARGETS_CURRENT` on each generated library based on the specified features
 * within its parameter 'dispatch:'.
 *
 * For example:
 *
 * @code
 * # from meson
 * mod_features.multi_targets(
 *  'arithmetic.dispatch.h', 'arithmetic.c',
 *   baseline: [SSE3], dispatch: [AVX512_SKX, AVX2],
 *   prefix: '@P@'
 * )
 * @code
 *
 * @code
 * void @P@CPU_DISPATCH_CURFX(add)(const int *src0, const int *src1, int *dst)
 * {
 * #ifdef @P@HAVE_AVX512F // one of the implied feature of AVX512_SKX
 *   // code
 * #elif defined(@P@HAVE_AVX2)
 *   // code
 * #elif defined(@P@HAVE_SSE3)
 *   // CODE
 * #else
 *   // Fallback code in case of no features enabled
 * #endif
 * }
 * @endif
 *
 * // Unwrapped version :
 * void add_AVX512_SKX(const int *src0, const int *src1, int *dst)
 * {...}
 * void add_AVX2(const int *src0, const int *src1, int *dst)
 * {...}
 * // baseline
 * void add(const int *src0, const int *src1, int *dst)
 * {...}
 * @endcode
 *
 * @param NAME The base name of the dispatched function or variable.
 */
#ifdef @P@MTARGETS_CURRENT
    // '@P@MTARGETS_CURRENT': only defined by the dispatch targets
    // within the meson function `mod_features.multi_targets()`
    #define @P@CPU_DISPATCH_CURFX(NAME) @P@_CPU_CAT(@P@_CPU_CAT(NAME, _), @P@MTARGETS_CURRENT)
#else
    #define @P@CPU_DISPATCH_CURFX(NAME) @P@_CPU_EXPAND(NAME)
#endif
/**
 * @def @P@CPU_DISPATCH_DECLARE(...)
 *
 * Provides forward declarations for the exported variables and functions
 * based on the enabled baseline and dispatched features.
 *
 * This macro requires include the config file that been generated
 * by meson function `mod_features.multi_targets()` to determine the enabled
 * baseline and dispatched features.
 *
 * For example:
 *
 * @code
 * # from meson
 * mod_features.multi_targets(
 *  'arithmetic.dispatch.h', 'arithmetic.c',
 *   baseline: [SSE3], dispatch: [AVX512_SKX, AVX2],
 *   prefix: '@P@'
 * )
 * @code
 *
 * @code
 * // from C
 * #include "arithmetic.dispatch.h"
 * @P@CPU_DISPATCH_DECLARE(void add, (const int *src0, const int *src1, int *dst))
 *
 * // Unwrapped version:
 * void add_AVX512_SKX(const int *src0, const int *src1, int *dst);
 * void add_AVX2(const int *src0, const int *src1, int *dst);
 * void add(const int *src0, const int *src1, int *dst); // baseline
 * @endcode
 *
 * @param ... The function or variable prototype to be declared,
 *            with the target-specific suffix added automatically.
 */
#define @P@CPU_DISPATCH_DECLARE(...) \
    @P@MTARGETS_CONF_DISPATCH(@P@CPU_DISPATCH_DECLARE_CHK_, @P@CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) \
    @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_DECLARE_BASE_CB_, __VA_ARGS__)

// Preprocessor callbacks
#define @P@CPU_DISPATCH_DECLARE_CB_(DUMMY, TARGET_NAME, LEFT, ...) \
    @P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__;
#define @P@CPU_DISPATCH_DECLARE_BASE_CB_(LEFT, ...) \
    LEFT __VA_ARGS__;
// Dummy CPU runtime checking
#define @P@CPU_DISPATCH_DECLARE_CHK_(FEATURE_NAME)

/**
 * @def @P@CPU_DISPATCH_DECLARE_XB(LEFT, ...)
 *
 * Same as `@P@CPU_DISPATCH_DECLARE` but exclude the baseline declaration even
 * if it was enabled within `mod_features.multi_targets()`.
 */
#define @P@CPU_DISPATCH_DECLARE_XB(...) \
    @P@MTARGETS_CONF_DISPATCH(@P@CPU_DISPATCH_DECLARE_CHK_, @P@CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__)

/**
 * @def @P@CPU_DISPATCH_CALL(...)
 *
 * Helper macro used for runtime dispatching of the exported functions and variables
 * within the meson `mod_features.multi_targets()` function.
 *
 * This macro dispatches only one symbol based on the order of the specified features within the meson function
 * `mod_features.multi_targets()`. For example, if `mod_features.multi_targets()` is called with
 * `dispatch: [features_highest_1, features_highest_2]`, the macro will test each enabled feature against
 * the CPU at runtime. Once it fails, it will move to the next order until falling back to the baseline.
 *
 * Similar to `@P@CPU_DISPATCH_DECLARE`, this macro requires including the config file that has been generated
 * by the meson function `mod_features.multi_targets()` to determine the enabled baseline and dispatched features.
 *
 * Example usage:
 *
 * @code
 * # from meson
 * mod_features.multi_targets(
 *   'arithmetic.dispatch.h', 'arithmetic.c',
 *   baseline: [SSE3], dispatch: [AVX512_SKX, AVX2],
 *   prefix: '@P@'
 * )
 * @endcode
 *
 * @code
 * // from C
 * #include "arithmetic.dispatch.h"
 *
 * // Example 1:
 * @P@CPU_DISPATCH_CALL(add, (src0, src1, dst));
 *
 * // Unwrapped version:
 * @P@CPU_HAVE(AVX512_SKX) ? add_AVX512_SKX(src0, src1, dst) :
 *     (@P@CPU_HAVE(AVX2) ? add_AVX2(src0, src1, dst) :
 *         add(src0, src1, dst); // baseline
 *
 * // Example 2:
 * typedef void (*func_type)(const int*, const int*, int*);
 * func_type func = @P@CPU_DISPATCH_CALL(add);
 *
 * // Unwrapped version:
 * func_type func2 = @P@CPU_HAVE(AVX512_SKX) ? add_AVX512_SKX :
 *                     (@P@CPU_HAVE(AVX2) ? add_AVX2 :
 *                         add; // baseline
 *
 * // Example 3:
 * func_type func3;
 * @P@CPU_DISPATCH_CALL(func3 = add);
 *
 * // Unwrapped version:
 * func_type func2 = @P@CPU_HAVE(AVX512_SKX) ? func3 = add_AVX512_SKX :
 *                     (@P@CPU_HAVE(AVX2) ? func3 = add_AVX2 :
 *                         func3 = add; // baseline
 *
 * @endcode
 *
 * @param ... The function or variable prototype to be called or assigned,
 *            with the target-specific suffix added automatically.
 */
#define @P@CPU_DISPATCH_CALL(...) \
    @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_CB_, __VA_ARGS__) \
    @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_CALL_BASE_CB_, __VA_ARGS__)
// Preprocessor callbacks
#define @P@CPU_DISPATCH_CALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
    (TESTED_FEATURES) ? (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) :
#define @P@CPU_DISPATCH_CALL_BASE_CB_(LEFT, ...) \
    (LEFT __VA_ARGS__)

/**
 * @def @P@CPU_DISPATCH_CALL_XB(LEFT, ...)
 *
 * Same as `@P@CPU_DISPATCH_CALL` but exclude the baseline call even
 * if it was provided within meson `mod_features.multi_targets()`.
 *
 * Note: This macro returns void
 */
#define @P@CPU_DISPATCH_CALL_XB_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
    (TESTED_FEATURES) ? (void) (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) :
#define @P@CPU_DISPATCH_CALL_XB(...) \
    @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_XB_CB_, __VA_ARGS__) \
    ((void) 0 /* discarded expression value */)

/**
 * @def @P@CPU_DISPATCH_INFO(...)
 *
 * Returns an array of two strings containing the enabled target names
 * in each multi-target source.
 *
 * The first item represents the currently dispatched target,
 * while the second item contains the available targets that
 * can potentially be dispatched based on CPU capabilities.
 *
 * @code
 * #include "arithmetic.dispatch.h" // generated config file
 * const char *enabled_targets[] = @P@CPU_DISPATCH_INFO();
 *
 * printf("Current dispatched target: %s\n", enabled_targets[0]);
 * printf("Available targets: %s\n", enabled_targets[1]);
 * @endcode
 */
#define @P@CPU_DISPATCH_INFO() \
    { \
        @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_INFO_HIGH_CB_, DUMMY) \
        @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_INFO_BASE_HIGH_CB_, DUMMY) \
        "", \
        @P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_INFO_CB_, DUMMY) \
        @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_INFO_BASE_CB_, DUMMY) \
        ""\
    }
#define @P@CPU_DISPATCH_INFO_HIGH_CB_(TESTED_FEATURES, TARGET_NAME, ...) \
    (TESTED_FEATURES) ? @P@_CPU_TOSTRING(TARGET_NAME) :
#define @P@CPU_DISPATCH_INFO_BASE_HIGH_CB_(...) \
    (1) ? "baseline(" @P@WITH_CPU_BASELINE ")" :
// Preprocessor callbacks
#define @P@CPU_DISPATCH_INFO_CB_(TESTED_FEATURES, TARGET_NAME, ...) \
    @P@_CPU_TOSTRING(TARGET_NAME) " "
#define @P@CPU_DISPATCH_INFO_BASE_CB_(...) \
    "baseline(" @P@WITH_CPU_BASELINE ")"
/**
 * Macro @P@CPU_DISPATCH_CALL_ALL(...)
 *
 * Same as `@P@CPU_DISPATCH_CALL` but dispatching all the required optimizations for
 * the exported functions and variables instead of highest interested one.
 * Returns void.
 */
#define @P@CPU_DISPATCH_CALL_ALL(...) \
    (@P@MTARGETS_CONF_DISPATCH(@P@CPU_HAVE, @P@CPU_DISPATCH_CALL_ALL_CB_, __VA_ARGS__) \
    @P@MTARGETS_CONF_BASELINE(@P@CPU_DISPATCH_CALL_ALL_BASE_CB_, __VA_ARGS__))
// Preprocessor callbacks
#define @P@CPU_DISPATCH_CALL_ALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
    ((TESTED_FEATURES) ? (@P@_CPU_CAT(@P@_CPU_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : (void) 0),
#define @P@CPU_DISPATCH_CALL_ALL_BASE_CB_(LEFT, ...) \
    ( LEFT __VA_ARGS__ )

// Brings the headers files of enabled CPU features
#ifdef @P@HAVE_SSE
    #include <xmmintrin.h>
#endif
#ifdef @P@HAVE_SSE2
    #include <emmintrin.h>
#endif
#ifdef @P@HAVE_SSE3
    #include <pmmintrin.h>
#endif
#ifdef @P@HAVE_SSSE3
    #include <tmmintrin.h>
#endif
#ifdef @P@HAVE_SSE41
    #include <smmintrin.h>
#endif
#ifdef @P@HAVE_POPCNT
    #ifdef _MSC_VER
        #include <nmmintrin.h>
    #else
        #include <popcntintrin.h>
    #endif
#endif
#ifdef @P@HAVE_AVX
    #include <immintrin.h>
#endif

#if defined(@P@HAVE_XOP) || defined(@P@HAVE_FMA4)
    #include <x86intrin.h>
#endif

#ifdef @P@HAVE_VSX
    #include <altivec.h>
#endif

#ifdef @P@HAVE_VX
    #include <vecintrin.h>
#endif

#ifdef @P@HAVE_NEON
    #include <arm_neon.h>
#endif

#ifdef @P@HAVE_RVV
    #include <riscv_vector.h>
#endif
#endif // @P@_CPU_DISPATCHER_CONF_H_
