mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-13 15:59:23 +01:00
VertexLoaderX64: use common code for FORMAT_FLOAT
This commit is contained in:
parent
7030542546
commit
9da86092ae
@ -65,7 +65,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
|
|||||||
|
|
||||||
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
||||||
{
|
{
|
||||||
static const __m128i shuffle_lut[5][3] = {
|
static const __m128i shuffle_lut[4][3] = {
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
||||||
@ -78,9 +78,6 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
|||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
|
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
|
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
|
|
||||||
};
|
};
|
||||||
static const __m128 scale_factors[32] = {
|
static const __m128 scale_factors[32] = {
|
||||||
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
||||||
@ -110,6 +107,21 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
|||||||
if (attribute == DIRECT)
|
if (attribute == DIRECT)
|
||||||
m_src_ofs += load_bytes;
|
m_src_ofs += load_bytes;
|
||||||
|
|
||||||
|
if (format == FORMAT_FLOAT)
|
||||||
|
{
|
||||||
|
// Floats don't need to be scaled or converted,
|
||||||
|
// so we can just load/swap/store them directly
|
||||||
|
// and return early.
|
||||||
|
for (int i = 0; i < count_in; i++)
|
||||||
|
{
|
||||||
|
LoadAndSwap(32, scratch3, data);
|
||||||
|
MOV(32, dest, R(scratch3));
|
||||||
|
data.offset += sizeof(float);
|
||||||
|
dest.offset += sizeof(float);
|
||||||
|
}
|
||||||
|
return load_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
if (cpu_info.bSSSE3)
|
if (cpu_info.bSSSE3)
|
||||||
{
|
{
|
||||||
if (load_bytes > 8)
|
if (load_bytes > 8)
|
||||||
@ -170,29 +182,13 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
|||||||
else
|
else
|
||||||
PSRLD(coords, 16);
|
PSRLD(coords, 16);
|
||||||
break;
|
break;
|
||||||
case FORMAT_FLOAT:
|
|
||||||
// Floats don't need to be scaled or converted,
|
|
||||||
// so we can just load/swap/store them directly
|
|
||||||
// and return early.
|
|
||||||
// (In SSSE3 we still need to store them.)
|
|
||||||
for (int i = 0; i < count_in; i++)
|
|
||||||
{
|
|
||||||
LoadAndSwap(32, scratch3, data);
|
|
||||||
MOV(32, dest, R(scratch3));
|
|
||||||
data.offset += sizeof(float);
|
|
||||||
dest.offset += sizeof(float);
|
|
||||||
}
|
|
||||||
return load_bytes;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format != FORMAT_FLOAT)
|
CVTDQ2PS(coords, R(coords));
|
||||||
{
|
|
||||||
CVTDQ2PS(coords, R(coords));
|
|
||||||
|
|
||||||
if (dequantize && scaling_exponent)
|
if (dequantize && scaling_exponent)
|
||||||
MULPS(coords, M(&scale_factors[scaling_exponent]));
|
MULPS(coords, M(&scale_factors[scaling_exponent]));
|
||||||
}
|
|
||||||
|
|
||||||
switch (count_out)
|
switch (count_out)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user