diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 4c6b565d5d..d0e033d371 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -94,9 +94,18 @@ void JitArm64::psq_st(UGeckoInstruction inst) fpr.Lock(Q0, Q1); ARM64Reg arm_addr = gpr.R(inst.RA); + ARM64Reg VS = fpr.R(inst.RS); + ARM64Reg scale_reg = W0; ARM64Reg addr_reg = W1; - ARM64Reg type_reg = gpr.GetReg(); + ARM64Reg type_reg = W2; + + BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + + // Wipe the registers we are using as temporaries + gprs_in_use &= BitSet32(~0x40000007); + fprs_in_use &= BitSet32(~3); LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); @@ -118,13 +127,35 @@ void JitArm64::psq_st(UGeckoInstruction inst) if (update) MOV(arm_addr, addr_reg); - ARM64Reg VS = fpr.R(inst.RS); m_float_emit.FCVTN(32, D0, VS); - MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[inst.W * 8]); - LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true)); - BLR(X30); - gpr.Unlock(W0, W1, W2, W30, type_reg); + // Inline address check + { + TST(addr_reg, 6, 1); + FixupBranch argh = B(CC_NEQ); + + // Fast + MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[inst.W * 8]); + LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true)); + BLR(EncodeRegTo64(type_reg)); + + FixupBranch continue1 = B(); + SetJumpTarget(argh); + + // Slow + MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[16 + inst.W * 8]); + LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true)); + + ABI_PushRegisters(gprs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use, X30); + BLR(EncodeRegTo64(type_reg)); + m_float_emit.ABI_PopRegisters(fprs_in_use, X30); + ABI_PushRegisters(gprs_in_use); + + SetJumpTarget(continue1); + } + + gpr.Unlock(W0, W1, W2, W30); fpr.Unlock(Q0, Q1); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e82367873b..053d7ebf05 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -107,7 +107,6 @@ void JitArm64AsmRoutineManager::GenerateCommon() ARM64Reg addr_reg = X1; ARM64Reg scale_reg = X0; ARM64FloatEmitter float_emit(this); - const u32 GPR_CALLER_SAVE = 0x6007FFFF; const u8* loadPairedIllegal = GetCodePtr(); BRK(100); @@ -263,299 +262,255 @@ void JitArm64AsmRoutineManager::GenerateCommon() // Stores const u8* storePairedIllegal = GetCodePtr(); BRK(0x101); - const u8* storePairedFloat = GetCodePtr(); + const u8* storePairedFloat; + const u8* storePairedFloatSlow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); - + storePairedFloat = GetCodePtr(); float_emit.REV32(8, D0, D0); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(64, Q0, 0, addr_reg, SP); RET(X30); - SetJumpTarget(argh); - - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storePairedFloatSlow = GetCodePtr(); float_emit.UMOV(64, X0, Q0, 0); ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32)); - MOVI2R(X30, (u64)PowerPC::Write_U64); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)PowerPC::Write_U64); + BR(X2); } - const u8* storePairedU8 = GetCodePtr(); + + const u8* storePairedU8; + const u8* storePairedU8Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 - - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1, 0); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1, 0); + float_emit.FCVTZU(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.XTN(8, D0, D0); + }; + storePairedU8 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(16, Q0, 0, addr_reg, SP); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storePairedU8Slow = GetCodePtr(); + emit_quantize(); float_emit.UMOV(16, W0, Q0, 0); REV16(W0, W0); - MOVI2R(X30, (u64)PowerPC::Write_U16); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)PowerPC::Write_U16); + BR(X2); } - const u8* storePairedS8 = GetCodePtr(); + const u8* storePairedS8; + const u8* storePairedS8Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 - - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1, 0); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1, 0); + float_emit.FCVTZS(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.XTN(8, D0, D0); + }; + storePairedS8 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(16, Q0, 0, addr_reg, SP); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storePairedS8Slow = GetCodePtr(); + emit_quantize(); float_emit.UMOV(16, W0, Q0, 0); REV16(W0, W0); - MOVI2R(X30, (u64)PowerPC::Write_U16); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)PowerPC::Write_U16); + BR(X2); } - const u8* storePairedU16 = GetCodePtr(); + const u8* storePairedU16; + const u8* storePairedU16Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1, 0); + float_emit.FCVTZU(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.REV16(8, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1, 0); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.REV16(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storePairedU16 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(32, Q0, 0, addr_reg, SP); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storePairedU16Slow = GetCodePtr(); + emit_quantize(); float_emit.REV32(8, D0, D0); float_emit.UMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)PowerPC::Write_U32); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)PowerPC::Write_U32); + BR(X2); } - const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie + const u8* storePairedS16; // Used by Viewtiful Joe's intro movie + const u8* storePairedS16Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1, 0); + float_emit.FCVTZS(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.REV16(8, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1, 0); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.REV16(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storePairedS16 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(32, Q0, 0, addr_reg, SP); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storePairedS16Slow = GetCodePtr(); + emit_quantize(); float_emit.REV32(8, D0, D0); float_emit.UMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)PowerPC::Write_U32); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)PowerPC::Write_U32); + BR(X2); } - const u8* storeSingleFloat = GetCodePtr(); + const u8* storeSingleFloat; + const u8* storeSingleFloatSlow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); - + storeSingleFloat = GetCodePtr(); float_emit.REV32(8, D0, D0); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.STR(32, INDEX_UNSIGNED, D0, addr_reg, 0); RET(X30); - SetJumpTarget(argh); - - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); + storeSingleFloatSlow = GetCodePtr(); float_emit.UMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)&PowerPC::Write_U32); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + MOVI2R(X2, (u64)&PowerPC::Write_U32); + BR(X2); } - const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii + const u8* storeSingleU8; // Used by MKWii + const u8* storeSingleU8Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1); + float_emit.FCVTZU(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.XTN(8, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storeSingleU8 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(8, Q0, 0, addr_reg); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); - float_emit.UMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)&PowerPC::Write_U8); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + storeSingleU8Slow = GetCodePtr(); + emit_quantize(); + float_emit.UMOV(8, W0, Q0, 0); + MOVI2R(X2, (u64)&PowerPC::Write_U8); + BR(X2); } - const u8* storeSingleS8 = GetCodePtr(); + const u8* storeSingleS8; + const u8* storeSingleS8Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1); + float_emit.FCVTZS(32, D0, D0); + float_emit.XTN(16, D0, D0); + float_emit.XTN(8, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storeSingleS8 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.ST1(8, Q0, 0, addr_reg); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); - float_emit.SMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)&PowerPC::Write_U8); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + storeSingleS8Slow = GetCodePtr(); + emit_quantize(); + float_emit.SMOV(8, W0, Q0, 0); + MOVI2R(X2, (u64)&PowerPC::Write_U8); + BR(X2); } - const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii + const u8* storeSingleU16; // Used by MKWii + const u8* storeSingleU16Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1); + float_emit.FCVTZU(32, D0, D0); + float_emit.XTN(16, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storeSingleU16 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.REV16(8, D0, D0); float_emit.ST1(16, Q0, 0, addr_reg); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); - float_emit.UMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)&PowerPC::Write_U16); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + storeSingleU16Slow = GetCodePtr(); + emit_quantize(); + float_emit.UMOV(16, W0, Q0, 0); + MOVI2R(X2, (u64)&PowerPC::Write_U16); + BR(X2); } - const u8* storeSingleS16 = GetCodePtr(); + const u8* storeSingleS16; + const u8* storeSingleS16Slow; { - BitSet32 gprs(GPR_CALLER_SAVE & ~7); // All except X0/X1/X2 - BitSet32 fprs(~3); // All except Q0/Q1 + auto emit_quantize = [this, &float_emit, scale_reg]() + { + MOVI2R(X2, (u64)&m_quantizeTableS); + ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); + float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); + float_emit.FMUL(32, D0, D0, D1); + float_emit.FCVTZS(32, D0, D0); + float_emit.XTN(16, D0, D0); + }; - MOVI2R(X2, (u64)&m_quantizeTableS); - ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3)); - float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); - float_emit.FMUL(32, D0, D0, D1); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - - TST(DecodeReg(addr_reg), 6, 1); - FixupBranch argh = B(CC_NEQ); + storeSingleS16 = GetCodePtr(); + emit_quantize(); MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32); float_emit.REV16(8, D0, D0); float_emit.ST1(16, Q0, 0, addr_reg); RET(X30); - SetJumpTarget(argh); - ABI_PushRegisters(gprs); - float_emit.ABI_PushRegisters(fprs, X3); - float_emit.SMOV(32, W0, Q0, 0); - MOVI2R(X30, (u64)&PowerPC::Write_U16); - BLR(X30); - float_emit.ABI_PopRegisters(fprs, X3); - ABI_PopRegisters(gprs); - RET(X30); + storeSingleS16Slow = GetCodePtr(); + emit_quantize(); + float_emit.SMOV(16, W0, Q0, 0); + MOVI2R(X2, (u64)&PowerPC::Write_U16); + BR(X2); } pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(16 * sizeof(u8*)); + ReserveCodeSpace(32 * sizeof(u8*)); + // Fast pairedStoreQuantized[0] = storePairedFloat; pairedStoreQuantized[1] = storePairedIllegal; pairedStoreQuantized[2] = storePairedIllegal; @@ -573,4 +528,24 @@ void JitArm64AsmRoutineManager::GenerateCommon() pairedStoreQuantized[13] = storeSingleU16; pairedStoreQuantized[14] = storeSingleS8; pairedStoreQuantized[15] = storeSingleS16; + + // Slow + pairedStoreQuantized[16] = storePairedFloatSlow; + pairedStoreQuantized[17] = storePairedIllegal; + pairedStoreQuantized[18] = storePairedIllegal; + pairedStoreQuantized[19] = storePairedIllegal; + pairedStoreQuantized[20] = storePairedU8Slow; + pairedStoreQuantized[21] = storePairedU16Slow; + pairedStoreQuantized[22] = storePairedS8Slow; + pairedStoreQuantized[23] = storePairedS16Slow; + + pairedStoreQuantized[24] = storeSingleFloatSlow; + pairedStoreQuantized[25] = storePairedIllegal; + pairedStoreQuantized[26] = storePairedIllegal; + pairedStoreQuantized[27] = storePairedIllegal; + pairedStoreQuantized[28] = storeSingleU8Slow; + pairedStoreQuantized[29] = storeSingleU16Slow; + pairedStoreQuantized[30] = storeSingleS8Slow; + pairedStoreQuantized[31] = storeSingleS16Slow; + }