2013-04-17 23:09:55 -04:00
|
|
|
// Copyright 2013 Dolphin Emulator Project
|
|
|
|
// Licensed under GPLv2
|
|
|
|
// Refer to the license.txt file included.
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2014-02-10 13:54:46 -05:00
|
|
|
#pragma once
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2009-02-15 13:45:03 +00:00
|
|
|
// Top vertex loaders
|
|
|
|
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
|
|
|
|
|
2010-03-05 12:04:09 +00:00
|
|
|
#include <algorithm>
|
2014-08-24 23:53:28 -04:00
|
|
|
#include <memory>
|
2008-12-08 04:46:09 +00:00
|
|
|
#include <string>
|
2014-08-27 22:37:08 -04:00
|
|
|
#include <unordered_map>
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2014-09-07 20:06:58 -05:00
|
|
|
#include "Common/CommonTypes.h"
|
2014-02-17 05:18:15 -05:00
|
|
|
#include "Common/x64Emitter.h"
|
2009-02-28 16:33:59 +00:00
|
|
|
|
2014-02-17 05:18:15 -05:00
|
|
|
#include "VideoCommon/CPMemory.h"
|
|
|
|
#include "VideoCommon/DataReader.h"
|
|
|
|
#include "VideoCommon/NativeVertexFormat.h"
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2014-11-11 01:48:38 -08:00
|
|
|
#if _M_SSE >= 0x401
|
|
|
|
#include <smmintrin.h>
|
|
|
|
#include <emmintrin.h>
|
|
|
|
#elif _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
|
|
|
#include <tmmintrin.h>
|
|
|
|
#endif
|
|
|
|
|
2014-06-13 18:36:54 +00:00
|
|
|
#ifdef _M_X86
|
2014-02-23 15:14:27 +01:00
|
|
|
#define USE_VERTEX_LOADER_JIT
|
|
|
|
#endif
|
2008-12-19 21:24:52 +00:00
|
|
|
|
2014-07-08 15:58:25 +02:00
|
|
|
// They are used for the communication with the loader functions
|
|
|
|
extern int tcIndex;
|
|
|
|
extern int colIndex;
|
|
|
|
extern int colElements[2];
|
2014-11-11 01:48:38 -08:00
|
|
|
GC_ALIGNED128(extern float posScale[4]);
|
|
|
|
GC_ALIGNED64(extern float tcScale[8][2]);
|
2014-07-08 15:58:25 +02:00
|
|
|
|
2008-12-08 04:46:09 +00:00
|
|
|
class VertexLoaderUID
|
|
|
|
{
|
|
|
|
u32 vid[5];
|
2010-03-06 10:07:37 +00:00
|
|
|
size_t hash;
|
2008-12-08 04:46:09 +00:00
|
|
|
public:
|
2013-10-29 01:23:17 -04:00
|
|
|
VertexLoaderUID()
|
2013-04-24 09:21:54 -04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2014-07-24 01:12:12 +02:00
|
|
|
VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat)
|
2013-04-24 09:21:54 -04:00
|
|
|
{
|
2014-07-24 01:12:12 +02:00
|
|
|
vid[0] = vtx_desc.Hex & 0xFFFFFFFF;
|
|
|
|
vid[1] = vtx_desc.Hex >> 32;
|
|
|
|
vid[2] = vat.g0.Hex & ~VAT_0_FRACBITS;
|
|
|
|
vid[3] = vat.g1.Hex & ~VAT_1_FRACBITS;
|
|
|
|
vid[4] = vat.g2.Hex & ~VAT_2_FRACBITS;
|
2010-03-06 10:07:37 +00:00
|
|
|
hash = CalculateHash();
|
2008-12-08 04:46:09 +00:00
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
|
|
|
bool operator < (const VertexLoaderUID &other) const
|
|
|
|
{
|
2010-03-06 02:07:48 +00:00
|
|
|
// This is complex because of speed.
|
|
|
|
if (vid[0] < other.vid[0])
|
|
|
|
return true;
|
|
|
|
else if (vid[0] > other.vid[0])
|
|
|
|
return false;
|
2013-04-24 09:21:54 -04:00
|
|
|
|
|
|
|
for (int i = 1; i < 5; ++i)
|
|
|
|
{
|
2010-03-06 02:07:48 +00:00
|
|
|
if (vid[i] < other.vid[i])
|
|
|
|
return true;
|
|
|
|
else if (vid[i] > other.vid[i])
|
|
|
|
return false;
|
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
2010-03-06 02:07:48 +00:00
|
|
|
return false;
|
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
|
|
|
bool operator == (const VertexLoaderUID& rh) const
|
|
|
|
{
|
2010-03-06 10:07:37 +00:00
|
|
|
return hash == rh.hash && std::equal(vid, vid + sizeof(vid) / sizeof(vid[0]), rh.vid);
|
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
|
|
|
size_t GetHash() const
|
|
|
|
{
|
2010-03-06 10:07:37 +00:00
|
|
|
return hash;
|
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
2010-03-06 10:07:37 +00:00
|
|
|
private:
|
2013-04-24 09:21:54 -04:00
|
|
|
|
|
|
|
size_t CalculateHash()
|
|
|
|
{
|
2010-03-06 02:07:48 +00:00
|
|
|
size_t h = -1;
|
2013-04-24 09:21:54 -04:00
|
|
|
|
2013-10-29 01:09:01 -04:00
|
|
|
for (auto word : vid)
|
2013-04-24 09:21:54 -04:00
|
|
|
{
|
2013-10-29 01:09:01 -04:00
|
|
|
h = h * 137 + word;
|
2010-03-06 02:07:48 +00:00
|
|
|
}
|
2013-04-24 09:21:54 -04:00
|
|
|
|
2010-03-06 02:07:48 +00:00
|
|
|
return h;
|
|
|
|
}
|
2008-12-08 04:46:09 +00:00
|
|
|
};
|
|
|
|
|
2013-02-26 13:49:00 -06:00
|
|
|
// ARMTODO: This should be done in a better way
|
|
|
|
#ifndef _M_GENERIC
|
2014-04-09 01:22:52 -05:00
|
|
|
class VertexLoader : public Gen::X64CodeBlock
|
2013-02-26 13:49:00 -06:00
|
|
|
#else
|
|
|
|
class VertexLoader
|
|
|
|
#endif
|
2008-12-08 04:46:09 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
|
|
|
~VertexLoader();
|
|
|
|
|
|
|
|
int GetVertexSize() const {return m_VertexSize;}
|
2014-07-26 01:10:44 +02:00
|
|
|
u32 GetNativeComponents() const { return m_native_components; }
|
|
|
|
const PortableVertexDeclaration& GetNativeVertexDeclaration() const
|
|
|
|
{ return m_native_vtx_decl; }
|
2013-10-29 01:23:17 -04:00
|
|
|
|
2014-07-24 01:51:37 +02:00
|
|
|
void SetupRunVertices(const VAT& vat, int primitive, int const count);
|
|
|
|
void RunVertices(const VAT& vat, int primitive, int count);
|
2008-12-08 04:46:09 +00:00
|
|
|
|
|
|
|
// For debugging / profiling
|
2009-02-15 12:38:25 +00:00
|
|
|
void AppendToString(std::string *dest) const;
|
|
|
|
int GetNumLoadedVerts() const { return m_numLoadedVertices; }
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2014-08-24 23:53:28 -04:00
|
|
|
NativeVertexFormat* GetNativeVertexFormat();
|
|
|
|
static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); }
|
|
|
|
|
2008-12-08 04:46:09 +00:00
|
|
|
private:
|
|
|
|
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
|
|
|
|
|
|
|
|
// GC vertex format
|
|
|
|
TVtxAttr m_VtxAttr; // VAT decoded into easy format
|
|
|
|
TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided.
|
|
|
|
|
|
|
|
// PC vertex format
|
2014-07-26 01:10:44 +02:00
|
|
|
u32 m_native_components;
|
|
|
|
PortableVertexDeclaration m_native_vtx_decl;
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2014-02-23 15:14:27 +01:00
|
|
|
#ifndef USE_VERTEX_LOADER_JIT
|
|
|
|
// Pipeline.
|
2008-12-08 04:46:09 +00:00
|
|
|
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
|
|
|
|
int m_numPipelineStages;
|
2014-02-23 15:14:27 +01:00
|
|
|
#endif
|
2008-12-08 04:46:09 +00:00
|
|
|
|
2008-12-19 21:24:52 +00:00
|
|
|
const u8 *m_compiledCode;
|
2008-12-08 04:46:09 +00:00
|
|
|
|
|
|
|
int m_numLoadedVertices;
|
|
|
|
|
2014-08-24 23:53:28 -04:00
|
|
|
NativeVertexFormat* m_native_vertex_format;
|
2014-08-27 22:37:08 -04:00
|
|
|
static std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> s_native_vertex_map;
|
2014-08-24 23:53:28 -04:00
|
|
|
|
2014-07-24 01:25:23 +02:00
|
|
|
void SetVAT(const VAT& vat);
|
2008-12-08 04:46:09 +00:00
|
|
|
|
|
|
|
void CompileVertexTranslator();
|
2013-02-21 11:36:10 +01:00
|
|
|
void ConvertVertices(int count);
|
2008-12-08 04:46:09 +00:00
|
|
|
|
|
|
|
void WriteCall(TPipelineFunction);
|
2009-01-10 23:10:33 +00:00
|
|
|
|
2013-02-26 13:49:00 -06:00
|
|
|
#ifndef _M_GENERIC
|
2009-02-06 19:52:23 +00:00
|
|
|
void WriteGetVariable(int bits, Gen::OpArg dest, void *address);
|
|
|
|
void WriteSetVariable(int bits, void *address, Gen::OpArg dest);
|
2013-02-26 13:49:00 -06:00
|
|
|
#endif
|
2013-03-19 21:51:12 -04:00
|
|
|
};
|
2014-11-11 01:48:38 -08:00
|
|
|
|
|
|
|
#if _M_SSE >= 0x301
|
|
|
|
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
|
|
|
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
|
|
|
static const __m128i kMaskSwap16to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L);
|
|
|
|
static const __m128i kMaskSwap16to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L);
|
|
|
|
static const __m128i kMaskSwap16to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL);
|
|
|
|
static const __m128i kMaskSwap16to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL);
|
|
|
|
static const __m128i kMask8to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L);
|
|
|
|
static const __m128i kMask8to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L);
|
|
|
|
static const __m128i kMask8to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL);
|
|
|
|
static const __m128i kMask8to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL);
|
|
|
|
|
|
|
|
template <typename T, bool threeIn, bool threeOut>
|
|
|
|
__forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
|
|
|
|
{
|
|
|
|
__m128i coords, mask;
|
|
|
|
|
|
|
|
int loadBytes = sizeof(T) * (2 + threeIn);
|
|
|
|
if (loadBytes > 8)
|
|
|
|
coords = _mm_loadu_si128((__m128i*)pData);
|
|
|
|
else if (loadBytes > 4)
|
|
|
|
coords = _mm_loadl_epi64((__m128i*)pData);
|
|
|
|
else
|
|
|
|
coords = _mm_cvtsi32_si128(*(u32*)pData);
|
|
|
|
|
|
|
|
// Float case (no scaling)
|
|
|
|
if (sizeof(T) == 4)
|
|
|
|
{
|
|
|
|
coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2);
|
|
|
|
if (threeOut)
|
|
|
|
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, coords);
|
|
|
|
else
|
|
|
|
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, coords);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Byte swap, unpack, and move to high bytes for sign extend.
|
|
|
|
if (std::is_unsigned<T>::value)
|
|
|
|
mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32l_3 : kMaskSwap16to32l_2) : (threeIn ? kMask8to32l_3 : kMask8to32l_2);
|
|
|
|
else
|
|
|
|
mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32h_3 : kMaskSwap16to32h_2) : (threeIn ? kMask8to32h_3 : kMask8to32h_2);
|
|
|
|
coords = _mm_shuffle_epi8(coords, mask);
|
|
|
|
|
|
|
|
// Sign extend
|
|
|
|
if (std::is_signed<T>::value)
|
|
|
|
coords = _mm_srai_epi32(coords, 32 - sizeof(T) * 8);
|
|
|
|
|
|
|
|
__m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale);
|
|
|
|
if (threeOut)
|
|
|
|
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, out);
|
|
|
|
else
|
|
|
|
_mm_storel_pi((__m64*)VertexManager::s_pCurBufferPointer, out);
|
|
|
|
}
|
|
|
|
|
|
|
|
VertexManager::s_pCurBufferPointer += sizeof(float) * (2 + threeOut);
|
|
|
|
}
|
|
|
|
#endif
|