Zelda64Recomp/include/rsp_vu.h

// This file is modified from the Ares N64 emulator core. Ares can
// be found at https://github.com/ares-emulator/ares. The original license
// for this portion of Ares is as follows:
// ----------------------------------------------------------------------
// ares
// 
// Copyright(c) 2004 - 2021 ares team, Near et al
// 
// Permission to use, copy, modify, and /or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright noticeand this permission notice appear in all copies.
// 
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS.IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// ----------------------------------------------------------------------
#include <cstdint>

#if defined(__x86_64__) || defined(_M_X64)
#define ARCHITECTURE_SUPPORTS_SSE4_1 1
#include <nmmintrin.h>
using v128 = __m128i;
#elif defined(__aarch64__) || defined(_M_ARM64)
#define ARCHITECTURE_SUPPORTS_SSE4_1 1
#include "sse2neon.h"
using v128 = __m128i;
#endif

namespace Accuracy {
    namespace RSP {
#if ARCHITECTURE_SUPPORTS_SSE4_1
        constexpr bool SISD = false;
        constexpr bool SIMD = true;
#else
        constexpr bool SISD = true;
        constexpr bool SIMD = false;
#endif
    }
}

using u8 = uint8_t;
using s8 = int8_t;
using u16 = uint16_t;
using s16 = int16_t;
using u32 = uint32_t;
using s32 = int32_t;
using u64 = uint64_t;
using s64 = int64_t;
using uint128_t = uint64_t[2];

template<u32 bits> inline auto sclamp(s64 x) -> s64 {
  enum : s64 { b = 1ull << (bits - 1), m = b - 1 };
  return (x > m) ? m : (x < -b) ? -b : x;
}

template<u32 bits> inline auto sclip(s64 x) -> s64 {
  enum : u64 { b = 1ull << (bits - 1), m = b * 2 - 1 };
  return ((x & m) ^ b) - b;
}

struct RSP {
    using r32 = uint32_t;
    using cr32 = const r32;

    union r128 {
        struct { uint64_t u128[2]; };
#if ARCHITECTURE_SUPPORTS_SSE4_1
        struct {   __m128i v128; };

        operator __m128i() const { return v128; }
        auto operator=(__m128i value) { v128 = value; }
#endif

        auto byte(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }
        auto byte(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }

        auto element(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }
        auto element(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }

        auto u8(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }
        auto u8(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }

        auto s16(u32 index) -> int16_t& { return ((int16_t*)&u128)[7 - index]; }
        auto s16(u32 index) const -> int16_t { return ((int16_t*)&u128)[7 - index]; }

        auto u16(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }
        auto u16(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }

        //VCx registers
        auto get(u32 index) const -> bool { return u16(index) != 0; }
        auto set(u32 index, bool value) -> bool { return u16(index) = 0 - value, value; }

        //vu-registers.cpp
        inline auto operator()(u32 index) const -> r128;
    };
    using cr128 = const r128;

    struct VU {
        r128 r[32];
        r128 acch, accm, accl;
        r128 vcoh, vcol;  //16-bit little endian
        r128 vcch, vccl;  //16-bit little endian
        r128 vce;         // 8-bit little endian
        s16 divin;
        s16 divout;
        bool divdp;
    } vpu;

    static constexpr r128 zero{0};
    static constexpr r128 invert{(uint64_t)-1, (uint64_t)-1};

    inline auto accumulatorGet(u32 index) const -> u64;
    inline auto accumulatorSet(u32 index, u64 value) -> void;
    inline auto accumulatorSaturate(u32 index, bool slice, u16 negative, u16 positive) const -> u16;

    inline auto CFC2(r32& rt, u8 rd) -> void;
    inline auto CTC2(cr32& rt, u8 rd) -> void;
    template<u8 e> inline auto LBV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LDV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LFV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LHV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LLV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LPV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LQV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LRV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LSV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LTV(u8 vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LUV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto LWV(r128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto MFC2(r32& rt, cr128& vs) -> void;
    template<u8 e> inline auto MTC2(cr32& rt, r128& vs) -> void;
    template<u8 e> inline auto SBV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SDV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SFV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SHV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SLV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SPV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SQV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SRV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SSV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto STV(u8 vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SUV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto SWV(cr128& vt, cr32& rs, s8 imm) -> void;
    template<u8 e> inline auto VABS(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VADD(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VADDC(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VAND(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VCH(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VCL(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VCR(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VEQ(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VGE(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VLT(r128& vd, cr128& vs, cr128& vt) -> void;
    template<bool U, u8 e>
    inline auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<0, e>(vd, vs, vt); }
    template<u8 e> inline auto VMACU(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<1, e>(vd, vs, vt); }
    inline auto VMACQ(r128& vd) -> void;
    template<u8 e> inline auto VMADH(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMADL(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMADM(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMADN(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMOV(r128& vd, u8 de, cr128& vt) -> void;
    template<u8 e> inline auto VMRG(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMUDH(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMUDL(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMUDM(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMUDN(r128& vd, cr128& vs, cr128& vt) -> void;
    template<bool U, u8 e>
    inline auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<0, e>(rd, vs, vt); }
    template<u8 e> inline auto VMULU(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<1, e>(rd, vs, vt); }
    template<u8 e> inline auto VMULQ(r128& rd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VNAND(r128& rd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VNE(r128& vd, cr128& vs, cr128& vt) -> void;
    inline auto VNOP() -> void;
    template<u8 e> inline auto VNOR(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VNXOR(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VOR(r128& vd, cr128& vs, cr128& vt) -> void;
    template<bool L, u8 e>
    inline auto VRCP(r128& vd, u8 de, cr128& vt) -> void;
    template<u8 e> inline auto VRCP(r128& vd, u8 de, cr128& vt) -> void { VRCP<0, e>(vd, de, vt); }
    template<u8 e> inline auto VRCPL(r128& vd, u8 de, cr128& vt) -> void { VRCP<1, e>(vd, de, vt); }
    template<u8 e> inline auto VRCPH(r128& vd, u8 de, cr128& vt) -> void;
    template<bool D, u8 e>
    inline auto VRND(r128& vd, u8 vs, cr128& vt) -> void;
    template<u8 e> inline auto VRNDN(r128& vd, u8 vs, cr128& vt) -> void { VRND<0, e>(vd, vs, vt); }
    template<u8 e> inline auto VRNDP(r128& vd, u8 vs, cr128& vt) -> void { VRND<1, e>(vd, vs, vt); }
    template<bool L, u8 e>
    inline auto VRSQ(r128& vd, u8 de, cr128& vt) -> void;
    template<u8 e> inline auto VRSQ(r128& vd, u8 de, cr128& vt) -> void { VRSQ<0, e>(vd, de, vt); }
    template<u8 e> inline auto VRSQL(r128& vd, u8 de, cr128& vt) -> void { VRSQ<1, e>(vd, de, vt); }
    template<u8 e> inline auto VRSQH(r128& vd, u8 de, cr128& vt) -> void;
    template<u8 e> inline auto VSAR(r128& vd, cr128& vs) -> void;
    template<u8 e> inline auto VSUB(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VSUBC(r128& vd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void;
    template<u8 e> inline auto VZERO(r128& rd, cr128& vs, cr128& vt) -> void;
};
Added recomp runtime library and portultra, MM initial boot 2023-02-20 04:27:35 +01:00			`// This file is modified from the Ares N64 emulator core. Ares can`
			`// be found at https://github.com/ares-emulator/ares. The original license`
			`// for this portion of Ares is as follows:`
			`// ----------------------------------------------------------------------`
			`// ares`
			`//`
			`// Copyright(c) 2004 - 2021 ares team, Near et al`
			`//`
			`// Permission to use, copy, modify, and /or distribute this software for any`
			`// purpose with or without fee is hereby granted, provided that the above`
			`// copyright noticeand this permission notice appear in all copies.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`// MERCHANTABILITY AND FITNESS.IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR`
			`// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN`
			`// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF`
			`// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`
			`// ----------------------------------------------------------------------`
			`#include <cstdint>`

Fix arm64 building and X11 conflicts (#214) 2024-05-22 02:39:34 +02:00			`#if defined(__x86_64__) \|\| defined(_M_X64)`
Added recomp runtime library and portultra, MM initial boot 2023-02-20 04:27:35 +01:00			`#define ARCHITECTURE_SUPPORTS_SSE4_1 1`
			`#include <nmmintrin.h>`
			`using v128 = __m128i;`
Fix arm64 building and X11 conflicts (#214) 2024-05-22 02:39:34 +02:00			`#elif defined(__aarch64__) \|\| defined(_M_ARM64)`
			`#define ARCHITECTURE_SUPPORTS_SSE4_1 1`
			`#include "sse2neon.h"`
Added recomp runtime library and portultra, MM initial boot 2023-02-20 04:27:35 +01:00			`using v128 = __m128i;`
			`#endif`

			`namespace Accuracy {`
			`namespace RSP {`
			`#if ARCHITECTURE_SUPPORTS_SSE4_1`
			`constexpr bool SISD = false;`
			`constexpr bool SIMD = true;`
			`#else`
			`constexpr bool SISD = true;`
			`constexpr bool SIMD = false;`
			`#endif`
			`}`
			`}`

			`using u8 = uint8_t;`
			`using s8 = int8_t;`
			`using u16 = uint16_t;`
			`using s16 = int16_t;`
			`using u32 = uint32_t;`
			`using s32 = int32_t;`
			`using u64 = uint64_t;`
			`using s64 = int64_t;`
			`using uint128_t = uint64_t[2];`

			`template<u32 bits> inline auto sclamp(s64 x) -> s64 {`
			`enum : s64 { b = 1ull << (bits - 1), m = b - 1 };`
			`return (x > m) ? m : (x < -b) ? -b : x;`
			`}`

Initial compile on linux 2023-10-23 21:03:05 +02:00			`template<u32 bits> inline auto sclip(s64 x) -> s64 {`
			`enum : u64 { b = 1ull << (bits - 1), m = b * 2 - 1 };`
			`return ((x & m) ^ b) - b;`
			`}`

Added recomp runtime library and portultra, MM initial boot 2023-02-20 04:27:35 +01:00			`struct RSP {`
			`using r32 = uint32_t;`
			`using cr32 = const r32;`

			`union r128 {`
			`struct { uint64_t u128[2]; };`
			`#if ARCHITECTURE_SUPPORTS_SSE4_1`
			`struct { __m128i v128; };`

			`operator __m128i() const { return v128; }`
			`auto operator=(__m128i value) { v128 = value; }`
			`#endif`

			`auto byte(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }`
			`auto byte(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }`

			`auto element(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }`
			`auto element(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }`

			`auto u8(u32 index) -> uint8_t& { return ((uint8_t*)&u128)[15 - index]; }`
			`auto u8(u32 index) const -> uint8_t { return ((uint8_t*)&u128)[15 - index]; }`

			`auto s16(u32 index) -> int16_t& { return ((int16_t*)&u128)[7 - index]; }`
			`auto s16(u32 index) const -> int16_t { return ((int16_t*)&u128)[7 - index]; }`

			`auto u16(u32 index) -> uint16_t& { return ((uint16_t*)&u128)[7 - index]; }`
			`auto u16(u32 index) const -> uint16_t { return ((uint16_t*)&u128)[7 - index]; }`

			`//VCx registers`
			`auto get(u32 index) const -> bool { return u16(index) != 0; }`
			`auto set(u32 index, bool value) -> bool { return u16(index) = 0 - value, value; }`

			`//vu-registers.cpp`
			`inline auto operator()(u32 index) const -> r128;`
			`};`
			`using cr128 = const r128;`

			`struct VU {`
			`r128 r[32];`
			`r128 acch, accm, accl;`
			`r128 vcoh, vcol; //16-bit little endian`
			`r128 vcch, vccl; //16-bit little endian`
			`r128 vce; // 8-bit little endian`
			`s16 divin;`
			`s16 divout;`
			`bool divdp;`
			`} vpu;`

			`static constexpr r128 zero{0};`
			`static constexpr r128 invert{(uint64_t)-1, (uint64_t)-1};`

			`inline auto accumulatorGet(u32 index) const -> u64;`
			`inline auto accumulatorSet(u32 index, u64 value) -> void;`
			`inline auto accumulatorSaturate(u32 index, bool slice, u16 negative, u16 positive) const -> u16;`

			`inline auto CFC2(r32& rt, u8 rd) -> void;`
			`inline auto CTC2(cr32& rt, u8 rd) -> void;`
			`template<u8 e> inline auto LBV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LDV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LFV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LHV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LLV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LPV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LQV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LRV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LSV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LTV(u8 vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LUV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto LWV(r128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto MFC2(r32& rt, cr128& vs) -> void;`
			`template<u8 e> inline auto MTC2(cr32& rt, r128& vs) -> void;`
			`template<u8 e> inline auto SBV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SDV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SFV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SHV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SLV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SPV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SQV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SRV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SSV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto STV(u8 vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SUV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto SWV(cr128& vt, cr32& rs, s8 imm) -> void;`
			`template<u8 e> inline auto VABS(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VADD(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VADDC(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VAND(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VCH(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VCL(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VCR(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VEQ(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VGE(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VLT(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<bool U, u8 e>`
			`inline auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMACF(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<0, e>(vd, vs, vt); }`
			`template<u8 e> inline auto VMACU(r128& vd, cr128& vs, cr128& vt) -> void { VMACF<1, e>(vd, vs, vt); }`
			`inline auto VMACQ(r128& vd) -> void;`
			`template<u8 e> inline auto VMADH(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMADL(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMADM(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMADN(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMOV(r128& vd, u8 de, cr128& vt) -> void;`
			`template<u8 e> inline auto VMRG(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMUDH(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMUDL(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMUDM(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMUDN(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<bool U, u8 e>`
			`inline auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VMULF(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<0, e>(rd, vs, vt); }`
			`template<u8 e> inline auto VMULU(r128& rd, cr128& vs, cr128& vt) -> void { VMULF<1, e>(rd, vs, vt); }`
			`template<u8 e> inline auto VMULQ(r128& rd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VNAND(r128& rd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VNE(r128& vd, cr128& vs, cr128& vt) -> void;`
			`inline auto VNOP() -> void;`
			`template<u8 e> inline auto VNOR(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VNXOR(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VOR(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<bool L, u8 e>`
			`inline auto VRCP(r128& vd, u8 de, cr128& vt) -> void;`
			`template<u8 e> inline auto VRCP(r128& vd, u8 de, cr128& vt) -> void { VRCP<0, e>(vd, de, vt); }`
			`template<u8 e> inline auto VRCPL(r128& vd, u8 de, cr128& vt) -> void { VRCP<1, e>(vd, de, vt); }`
			`template<u8 e> inline auto VRCPH(r128& vd, u8 de, cr128& vt) -> void;`
			`template<bool D, u8 e>`
			`inline auto VRND(r128& vd, u8 vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VRNDN(r128& vd, u8 vs, cr128& vt) -> void { VRND<0, e>(vd, vs, vt); }`
			`template<u8 e> inline auto VRNDP(r128& vd, u8 vs, cr128& vt) -> void { VRND<1, e>(vd, vs, vt); }`
			`template<bool L, u8 e>`
			`inline auto VRSQ(r128& vd, u8 de, cr128& vt) -> void;`
			`template<u8 e> inline auto VRSQ(r128& vd, u8 de, cr128& vt) -> void { VRSQ<0, e>(vd, de, vt); }`
			`template<u8 e> inline auto VRSQL(r128& vd, u8 de, cr128& vt) -> void { VRSQ<1, e>(vd, de, vt); }`
			`template<u8 e> inline auto VRSQH(r128& vd, u8 de, cr128& vt) -> void;`
			`template<u8 e> inline auto VSAR(r128& vd, cr128& vs) -> void;`
			`template<u8 e> inline auto VSUB(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VSUBC(r128& vd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void;`
			`template<u8 e> inline auto VZERO(r128& rd, cr128& vs, cr128& vt) -> void;`
			`};`