mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
Merge pull request #5259 from MerryMage/quantload
Jit64: Make psq_lXX PIE-compliant
This commit is contained in:
commit
8d4be36963
@ -237,6 +237,7 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
||||
GenMfcr();
|
||||
|
||||
GenQuantizedLoads();
|
||||
GenQuantizedSingleLoads();
|
||||
GenQuantizedStores();
|
||||
GenQuantizedSingleStores();
|
||||
|
||||
|
@ -148,16 +148,18 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
||||
|
||||
// Get the high part of the GQR register
|
||||
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + i]);
|
||||
gqr.AddMemOffset(2);
|
||||
|
||||
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
||||
AND(32, R(RSCRATCH2), gqr);
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||
|
||||
CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(&asm_routines.pairedLoadQuantized[w * 8])));
|
||||
LEA(64, RSCRATCH, M(w ? asm_routines.singleLoadQuantized : asm_routines.pairedLoadQuantized));
|
||||
// 8-bit operations do not zero upper 32-bits of 64-bit registers.
|
||||
// Here we know that RSCRATCH's least significant byte is zero.
|
||||
OR(8, R(RSCRATCH), R(RSCRATCH2));
|
||||
SHL(8, R(RSCRATCH), Imm8(3));
|
||||
CALLptr(MatR(RSCRATCH));
|
||||
}
|
||||
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
|
@ -277,13 +277,22 @@ const u8* CommonAsmRoutines::GenQuantizedStoreRuntime(bool single, EQuantizeType
|
||||
|
||||
void CommonAsmRoutines::GenQuantizedLoads()
|
||||
{
|
||||
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
ReserveCodeSpace(16 * sizeof(u8*));
|
||||
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
|
||||
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
|
||||
ReserveCodeSpace(8 * sizeof(u8*));
|
||||
|
||||
for (int type = 0; type < 8; type++)
|
||||
pairedLoadQuantized[type] = GenQuantizedLoadRuntime(false, static_cast<EQuantizeType>(type));
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenQuantizedSingleLoads()
|
||||
{
|
||||
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
|
||||
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
|
||||
ReserveCodeSpace(8 * sizeof(u8*));
|
||||
|
||||
for (int type = 0; type < 8; type++)
|
||||
pairedLoadQuantized[type + 8] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
|
||||
singleLoadQuantized[type] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
|
||||
}
|
||||
|
||||
const u8* CommonAsmRoutines::GenQuantizedLoadRuntime(bool single, EQuantizeType type)
|
||||
|
@ -33,6 +33,7 @@ protected:
|
||||
const u8* GenQuantizedLoadRuntime(bool single, EQuantizeType type);
|
||||
const u8* GenQuantizedStoreRuntime(bool single, EQuantizeType type);
|
||||
void GenQuantizedLoads();
|
||||
void GenQuantizedSingleLoads();
|
||||
void GenQuantizedStores();
|
||||
void GenQuantizedSingleStores();
|
||||
};
|
||||
|
@ -1616,10 +1616,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||
Jit->MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
||||
Jit->AND(32, R(RSCRATCH2), M(((char*)&GQR(quantreg)) + 2));
|
||||
Jit->MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
|
||||
|
||||
const u8** table =
|
||||
w ? Jit->asm_routines.singleLoadQuantized : Jit->asm_routines.pairedLoadQuantized;
|
||||
|
||||
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I)));
|
||||
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized)));
|
||||
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)table));
|
||||
Jit->MOVAPD(reg, R(XMM0));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
|
@ -81,7 +81,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
||||
UBFM(type_reg, scale_reg, 16, 18); // Type
|
||||
UBFM(scale_reg, scale_reg, 24, 29); // Scale
|
||||
|
||||
MOVP2R(X30, &pairedLoadQuantized[inst.W * 8]);
|
||||
MOVP2R(X30, inst.W ? singleLoadQuantized : pairedLoadQuantized);
|
||||
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||
BLR(X30);
|
||||
|
||||
|
@ -336,7 +336,7 @@ void JitArm64::GenerateCommonAsm()
|
||||
JitRegister::Register(start, GetCodePtr(), "JIT_QuantizedLoad");
|
||||
|
||||
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
ReserveCodeSpace(16 * sizeof(u8*));
|
||||
ReserveCodeSpace(8 * sizeof(u8*));
|
||||
|
||||
pairedLoadQuantized[0] = loadPairedFloatTwo;
|
||||
pairedLoadQuantized[1] = loadPairedIllegal;
|
||||
@ -347,14 +347,17 @@ void JitArm64::GenerateCommonAsm()
|
||||
pairedLoadQuantized[6] = loadPairedS8Two;
|
||||
pairedLoadQuantized[7] = loadPairedS16Two;
|
||||
|
||||
pairedLoadQuantized[8] = loadPairedFloatOne;
|
||||
pairedLoadQuantized[9] = loadPairedIllegal;
|
||||
pairedLoadQuantized[10] = loadPairedIllegal;
|
||||
pairedLoadQuantized[11] = loadPairedIllegal;
|
||||
pairedLoadQuantized[12] = loadPairedU8One;
|
||||
pairedLoadQuantized[13] = loadPairedU16One;
|
||||
pairedLoadQuantized[14] = loadPairedS8One;
|
||||
pairedLoadQuantized[15] = loadPairedS16One;
|
||||
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
ReserveCodeSpace(8 * sizeof(u8*));
|
||||
|
||||
singleLoadQuantized[0] = loadPairedFloatOne;
|
||||
singleLoadQuantized[1] = loadPairedIllegal;
|
||||
singleLoadQuantized[2] = loadPairedIllegal;
|
||||
singleLoadQuantized[3] = loadPairedIllegal;
|
||||
singleLoadQuantized[4] = loadPairedU8One;
|
||||
singleLoadQuantized[5] = loadPairedU16One;
|
||||
singleLoadQuantized[6] = loadPairedS8One;
|
||||
singleLoadQuantized[7] = loadPairedS16One;
|
||||
|
||||
// Stores
|
||||
start = GetCodePtr();
|
||||
|
@ -39,6 +39,12 @@ public:
|
||||
// Trashes: all three RSCRATCH
|
||||
const u8** pairedLoadQuantized;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to read from.
|
||||
// Out: XMM0: Bottom 32-bit slot holds the read value.
|
||||
// Trashes: all three RSCRATCH
|
||||
const u8** singleLoadQuantized;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to write to.
|
||||
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
|
||||
|
Loading…
x
Reference in New Issue
Block a user