mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-04-27 09:51:23 +02:00
Merge pull request #2801 from Tilka/stuff
x64Emitter: add MOVSLDUP/MOVSHDUP
This commit is contained in:
commit
59e48e0fcf
@ -1636,22 +1636,47 @@ void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, d
|
|||||||
|
|
||||||
void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
|
void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
|
||||||
|
|
||||||
// THESE TWO ARE UNTESTED.
|
|
||||||
void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
|
void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
|
||||||
void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
|
void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
|
||||||
|
|
||||||
void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
|
void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
|
||||||
void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
|
void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
|
||||||
|
|
||||||
|
// Pretty much every x86 CPU nowadays supports SSE3,
|
||||||
|
// but the SSE2 fallbacks are easy.
|
||||||
|
void XEmitter::MOVSLDUP(X64Reg regOp, const OpArg& arg)
|
||||||
|
{
|
||||||
|
if (cpu_info.bSSE3)
|
||||||
|
{
|
||||||
|
WriteSSEOp(0xF3, 0x12, regOp, arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!arg.IsSimpleReg(regOp))
|
||||||
|
MOVAPD(regOp, arg);
|
||||||
|
UNPCKLPS(regOp, R(regOp));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void XEmitter::MOVSHDUP(X64Reg regOp, const OpArg& arg)
|
||||||
|
{
|
||||||
|
if (cpu_info.bSSE3)
|
||||||
|
{
|
||||||
|
WriteSSEOp(0xF3, 0x16, regOp, arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!arg.IsSimpleReg(regOp))
|
||||||
|
MOVAPD(regOp, arg);
|
||||||
|
UNPCKHPS(regOp, R(regOp));
|
||||||
|
}
|
||||||
|
}
|
||||||
void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
|
void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
|
||||||
{
|
{
|
||||||
if (cpu_info.bSSE3)
|
if (cpu_info.bSSE3)
|
||||||
{
|
{
|
||||||
WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup
|
WriteSSEOp(0xF2, 0x12, regOp, arg);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Simulate this instruction with SSE2 instructions
|
|
||||||
if (!arg.IsSimpleReg(regOp))
|
if (!arg.IsSimpleReg(regOp))
|
||||||
MOVSD(regOp, arg);
|
MOVSD(regOp, arg);
|
||||||
UNPCKLPD(regOp, R(regOp));
|
UNPCKLPD(regOp, R(regOp));
|
||||||
|
@ -581,9 +581,12 @@ public:
|
|||||||
void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
|
void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
|
||||||
void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
|
void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
|
||||||
|
|
||||||
// SSE/SSE2: Useful alternative to shuffle in some cases.
|
// SSE3
|
||||||
|
void MOVSLDUP(X64Reg regOp, const OpArg& arg);
|
||||||
|
void MOVSHDUP(X64Reg regOp, const OpArg& arg);
|
||||||
void MOVDDUP(X64Reg regOp, const OpArg& arg);
|
void MOVDDUP(X64Reg regOp, const OpArg& arg);
|
||||||
|
|
||||||
|
// SSE/SSE2: Useful alternative to shuffle in some cases.
|
||||||
void UNPCKLPS(X64Reg dest, const OpArg& src);
|
void UNPCKLPS(X64Reg dest, const OpArg& src);
|
||||||
void UNPCKHPS(X64Reg dest, const OpArg& src);
|
void UNPCKHPS(X64Reg dest, const OpArg& src);
|
||||||
void UNPCKLPD(X64Reg dest, const OpArg& src);
|
void UNPCKLPD(X64Reg dest, const OpArg& src);
|
||||||
|
@ -289,7 +289,7 @@ void RegCache::BindToRegister(size_t i, bool doLoad, bool makeDirty)
|
|||||||
LoadRegister(i, xr);
|
LoadRegister(i, xr);
|
||||||
for (size_t j = 0; j < regs.size(); j++)
|
for (size_t j = 0; j < regs.size(); j++)
|
||||||
{
|
{
|
||||||
if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr)
|
if (i != j && regs[j].location.IsSimpleReg(xr))
|
||||||
{
|
{
|
||||||
Crash();
|
Crash();
|
||||||
}
|
}
|
||||||
|
@ -170,7 +170,7 @@ public:
|
|||||||
int NumFreeRegisters();
|
int NumFreeRegisters();
|
||||||
};
|
};
|
||||||
|
|
||||||
class GPRRegCache : public RegCache
|
class GPRRegCache final : public RegCache
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
||||||
@ -183,7 +183,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class FPURegCache : public RegCache
|
class FPURegCache final : public RegCache
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
void StoreRegister(size_t preg, const Gen::OpArg& newLoc) override;
|
||||||
|
@ -476,7 +476,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, B
|
|||||||
// fun tricks...
|
// fun tricks...
|
||||||
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address))
|
if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(address))
|
||||||
{
|
{
|
||||||
if (!arg.IsSimpleReg() || arg.GetSimpleReg() != RSCRATCH)
|
if (!arg.IsSimpleReg(RSCRATCH))
|
||||||
MOV(accessSize, R(RSCRATCH), arg);
|
MOV(accessSize, R(RSCRATCH), arg);
|
||||||
|
|
||||||
UnsafeWriteGatherPipe(accessSize);
|
UnsafeWriteGatherPipe(accessSize);
|
||||||
@ -654,7 +654,7 @@ void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool
|
|||||||
MOVDDUP(output, R(output));
|
MOVDDUP(output, R(output));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
else if (!input.IsSimpleReg(output))
|
||||||
{
|
{
|
||||||
if (duplicate)
|
if (duplicate)
|
||||||
MOVDDUP(output, input);
|
MOVDDUP(output, input);
|
||||||
@ -667,7 +667,7 @@ void EmuCodeBlock::ForceSinglePrecision(X64Reg output, const OpArg& input, bool
|
|||||||
void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*sseOp)(X64Reg, const OpArg&),
|
void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&), void (XEmitter::*sseOp)(X64Reg, const OpArg&),
|
||||||
X64Reg regOp, const OpArg& arg1, const OpArg& arg2, bool packed, bool reversible)
|
X64Reg regOp, const OpArg& arg1, const OpArg& arg2, bool packed, bool reversible)
|
||||||
{
|
{
|
||||||
if (arg1.IsSimpleReg() && regOp == arg1.GetSimpleReg())
|
if (arg1.IsSimpleReg(regOp))
|
||||||
{
|
{
|
||||||
(this->*sseOp)(regOp, arg2);
|
(this->*sseOp)(regOp, arg2);
|
||||||
}
|
}
|
||||||
@ -675,7 +675,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
|
|||||||
{
|
{
|
||||||
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2);
|
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2);
|
||||||
}
|
}
|
||||||
else if (arg2.IsSimpleReg() && arg2.GetSimpleReg() == regOp)
|
else if (arg2.IsSimpleReg(regOp))
|
||||||
{
|
{
|
||||||
if (reversible)
|
if (reversible)
|
||||||
{
|
{
|
||||||
@ -684,7 +684,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
|
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
|
||||||
if (!arg1.IsSimpleReg() || arg1.GetSimpleReg() != XMM0)
|
if (!arg1.IsSimpleReg(XMM0))
|
||||||
MOVAPD(XMM0, arg1);
|
MOVAPD(XMM0, arg1);
|
||||||
if (cpu_info.bAVX)
|
if (cpu_info.bAVX)
|
||||||
{
|
{
|
||||||
@ -714,7 +714,7 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&),
|
|||||||
void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, u8), void (XEmitter::*sseOp)(X64Reg, const OpArg&, u8),
|
void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&, u8), void (XEmitter::*sseOp)(X64Reg, const OpArg&, u8),
|
||||||
X64Reg regOp, const OpArg& arg1, const OpArg& arg2, u8 imm)
|
X64Reg regOp, const OpArg& arg1, const OpArg& arg2, u8 imm)
|
||||||
{
|
{
|
||||||
if (arg1.IsSimpleReg() && regOp == arg1.GetSimpleReg())
|
if (arg1.IsSimpleReg(regOp))
|
||||||
{
|
{
|
||||||
(this->*sseOp)(regOp, arg2, imm);
|
(this->*sseOp)(regOp, arg2, imm);
|
||||||
}
|
}
|
||||||
@ -722,10 +722,10 @@ void EmuCodeBlock::avx_op(void (XEmitter::*avxOp)(X64Reg, X64Reg, const OpArg&,
|
|||||||
{
|
{
|
||||||
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm);
|
(this->*avxOp)(regOp, arg1.GetSimpleReg(), arg2, imm);
|
||||||
}
|
}
|
||||||
else if (arg2.IsSimpleReg() && arg2.GetSimpleReg() == regOp)
|
else if (arg2.IsSimpleReg(regOp))
|
||||||
{
|
{
|
||||||
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
|
// The ugly case: regOp == arg2 without AVX, or with arg1 == memory
|
||||||
if (!arg1.IsSimpleReg() || arg1.GetSimpleReg() != XMM0)
|
if (!arg1.IsSimpleReg(XMM0))
|
||||||
MOVAPD(XMM0, arg1);
|
MOVAPD(XMM0, arg1);
|
||||||
if (cpu_info.bAVX)
|
if (cpu_info.bAVX)
|
||||||
{
|
{
|
||||||
@ -764,14 +764,14 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, const OpArg& input, X64Reg
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
if (!input.IsSimpleReg(output))
|
||||||
MOVAPD(output, input);
|
MOVAPD(output, input);
|
||||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
|
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
|
||||||
PAND(output, M(psMantissaTruncate));
|
PAND(output, M(psMantissaTruncate));
|
||||||
PADDQ(output, R(tmp));
|
PADDQ(output, R(tmp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
else if (!input.IsSimpleReg(output))
|
||||||
{
|
{
|
||||||
MOVAPD(output, input);
|
MOVAPD(output, input);
|
||||||
}
|
}
|
||||||
|
@ -46,9 +46,6 @@ CodeBuffer::~CodeBuffer()
|
|||||||
delete[] codebuffer;
|
delete[] codebuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnalyzeFunction2(Symbol &func);
|
|
||||||
u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc);
|
|
||||||
|
|
||||||
#define INVALID_TARGET ((u32)-1)
|
#define INVALID_TARGET ((u32)-1)
|
||||||
|
|
||||||
u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc)
|
u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc)
|
||||||
|
@ -721,6 +721,8 @@ TWO_OP_SSE_TEST(ANDNPD, "dqword")
|
|||||||
TWO_OP_SSE_TEST(ORPD, "dqword")
|
TWO_OP_SSE_TEST(ORPD, "dqword")
|
||||||
TWO_OP_SSE_TEST(XORPD, "dqword")
|
TWO_OP_SSE_TEST(XORPD, "dqword")
|
||||||
|
|
||||||
|
TWO_OP_SSE_TEST(MOVSLDUP, "dqword")
|
||||||
|
TWO_OP_SSE_TEST(MOVSHDUP, "dqword")
|
||||||
TWO_OP_SSE_TEST(MOVDDUP, "qword")
|
TWO_OP_SSE_TEST(MOVDDUP, "qword")
|
||||||
|
|
||||||
TWO_OP_SSE_TEST(UNPCKLPS, "dqword")
|
TWO_OP_SSE_TEST(UNPCKLPS, "dqword")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user