mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 23:41:19 +01:00
c569b33829
Second: A experiment. implemented parallelization in texture decoding using openmp. is most a experiment to test the performance in different os/plataforms. in my system (windows x64 amd 1055t) give a speedup in large textures, but i tested in in intel dual core and gives a slowdown. o i limited the use for large textures and cpus with more than 3 cores. please test an let me know if it improves or degrades the speed. please for linux and osx user. to enable this you will have to enable your compiler support for openmp to test this code. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7284 8ced0084-cf51-0410-be5f-012b33b47a6e
808 lines
28 KiB
C++
808 lines
28 KiB
C++
// Copyright (C) 2003 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
#include <assert.h>
|
|
|
|
#include "Common.h"
|
|
#include "VideoCommon.h"
|
|
#include "VideoConfig.h"
|
|
#include "MemoryUtil.h"
|
|
#include "StringUtil.h"
|
|
#include "x64Emitter.h"
|
|
#include "ABI.h"
|
|
#include "PixelEngine.h"
|
|
#include "Host.h"
|
|
|
|
#include "LookUpTables.h"
|
|
#include "Statistics.h"
|
|
#include "VertexLoaderManager.h"
|
|
#include "VertexLoader.h"
|
|
#include "BPMemory.h"
|
|
#include "DataReader.h"
|
|
#include "VertexManagerBase.h"
|
|
|
|
#include "VertexLoader_Position.h"
|
|
#include "VertexLoader_Normal.h"
|
|
#include "VertexLoader_Color.h"
|
|
#include "VertexLoader_TextCoord.h"
|
|
|
|
//BBox
|
|
#include "XFMemory.h"
|
|
extern float GC_ALIGNED16(g_fProjectionMatrix[16]);
|
|
|
|
#define USE_JIT
|
|
|
|
#define COMPILED_CODE_SIZE 4096
|
|
|
|
NativeVertexFormat *g_nativeVertexFmt;
|
|
|
|
#ifndef _WIN32
|
|
#undef inline
|
|
#define inline
|
|
#endif
|
|
|
|
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
|
|
// when decoding each vertex.
|
|
static u8 s_curposmtx;
|
|
static u8 s_curtexmtx[8];
|
|
static int s_texmtxwrite = 0;
|
|
static int s_texmtxread = 0;
|
|
|
|
static int loop_counter;
|
|
|
|
// Vertex loaders read these. Although the scale ones should be baked into the shader.
|
|
int tcIndex;
|
|
int colIndex;
|
|
TVtxAttr* pVtxAttr;
|
|
int colElements[2];
|
|
float posScale;
|
|
float tcScale[8];
|
|
|
|
static const float fractionTable[32] = {
|
|
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
|
|
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
|
|
1.0f / (1U << 8), 1.0f / (1U << 9), 1.0f / (1U << 10), 1.0f / (1U << 11),
|
|
1.0f / (1U << 12), 1.0f / (1U << 13), 1.0f / (1U << 14), 1.0f / (1U << 15),
|
|
1.0f / (1U << 16), 1.0f / (1U << 17), 1.0f / (1U << 18), 1.0f / (1U << 19),
|
|
1.0f / (1U << 20), 1.0f / (1U << 21), 1.0f / (1U << 22), 1.0f / (1U << 23),
|
|
1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
|
|
1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
|
|
};
|
|
|
|
using namespace Gen;
|
|
|
|
void LOADERDECL PosMtx_ReadDirect_UByte()
|
|
{
|
|
s_curposmtx = DataReadU8() & 0x3f;
|
|
PRIM_LOG("posmtx: %d, ", s_curposmtx);
|
|
}
|
|
|
|
void LOADERDECL PosMtx_Write()
|
|
{
|
|
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
|
|
*VertexManager::s_pCurBufferPointer++ = 0;
|
|
*VertexManager::s_pCurBufferPointer++ = 0;
|
|
*VertexManager::s_pCurBufferPointer++ = 0;
|
|
}
|
|
|
|
void LOADERDECL UpdateBoundingBox()
|
|
{
|
|
if (!PixelEngine::bbox_active)
|
|
return;
|
|
|
|
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
|
|
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
|
|
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
|
|
// We must transform the just loaded point by the current world and projection matrix - in software.
|
|
// Then convert to screen space and update the bounding box.
|
|
float p[3] = {data[0], data[1], data[2]};
|
|
|
|
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
|
|
const float *proj_matrix = &g_fProjectionMatrix[0];
|
|
|
|
float t[3];
|
|
t[0] = p[0] * world_matrix[0] + p[1] * world_matrix[1] + p[2] * world_matrix[2] + world_matrix[3];
|
|
t[1] = p[0] * world_matrix[4] + p[1] * world_matrix[5] + p[2] * world_matrix[6] + world_matrix[7];
|
|
t[2] = p[0] * world_matrix[8] + p[1] * world_matrix[9] + p[2] * world_matrix[10] + world_matrix[11];
|
|
|
|
float o[4];
|
|
o[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
|
|
// Depth culling
|
|
if (o[2] < 0.0) {
|
|
// No pixels are likely to be drawn - don't update bounding box.
|
|
return;
|
|
}
|
|
o[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
|
|
o[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
|
|
o[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
|
|
|
|
o[0] /= o[3];
|
|
o[1] /= o[3];
|
|
|
|
// should possibly adjust for viewport?
|
|
o[0] = (o[0] + 1.0f) * 320.0f;
|
|
o[1] = (o[1] + 1.0f) * 240.0f;
|
|
|
|
if (o[0] < PixelEngine::bbox[0]) PixelEngine::bbox[0] = (u16)std::max(0.0f, o[0]);
|
|
if (o[0] > PixelEngine::bbox[1]) PixelEngine::bbox[1] = (u16)std::min(640.0f, o[0]);
|
|
if (o[1] < PixelEngine::bbox[2]) PixelEngine::bbox[2] = (u16)std::max(0.0f, o[1]);
|
|
if (o[1] > PixelEngine::bbox[3]) PixelEngine::bbox[3] = (u16)std::min(480.0f, o[1]);
|
|
|
|
// Hardware tests bounding boxes in 2x2 blocks => left and top are even, right and bottom are odd
|
|
PixelEngine::bbox[0] &= ~1;
|
|
PixelEngine::bbox[1] |= 1;
|
|
PixelEngine::bbox[2] &= ~1;
|
|
PixelEngine::bbox[3] |= 1;
|
|
|
|
/*
|
|
if (GetAsyncKeyState(VK_LSHIFT)) {
|
|
ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]);
|
|
ERROR_LOG(VIDEO, "%i %i %i %i", g_VideoInitialize.pBBox[0], g_VideoInitialize.pBBox[1], g_VideoInitialize.pBBox[2], g_VideoInitialize.pBBox[3]);
|
|
}*/
|
|
}
|
|
|
|
void LOADERDECL TexMtx_ReadDirect_UByte()
|
|
{
|
|
s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f;
|
|
PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]);
|
|
s_texmtxread++;
|
|
}
|
|
|
|
void LOADERDECL TexMtx_Write_Float()
|
|
{
|
|
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
|
|
VertexManager::s_pCurBufferPointer += 4;
|
|
}
|
|
|
|
void LOADERDECL TexMtx_Write_Float2()
|
|
{
|
|
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
|
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
|
|
VertexManager::s_pCurBufferPointer += 8;
|
|
}
|
|
|
|
void LOADERDECL TexMtx_Write_Float4()
|
|
{
|
|
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
|
|
((float*)VertexManager::s_pCurBufferPointer)[1] = 0;
|
|
((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
|
|
((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0.
|
|
VertexManager::s_pCurBufferPointer += 16;
|
|
}
|
|
|
|
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
|
{
|
|
m_compiledCode = NULL;
|
|
m_numLoadedVertices = 0;
|
|
m_VertexSize = 0;
|
|
m_numPipelineStages = 0;
|
|
m_NativeFmt = g_vertex_manager->CreateNativeVertexFormat();
|
|
loop_counter = 0;
|
|
VertexLoader_Normal::Init();
|
|
VertexLoader_Position::Init();
|
|
VertexLoader_TextCoord::Init();
|
|
|
|
m_VtxDesc = vtx_desc;
|
|
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
|
|
|
|
AllocCodeSpace(COMPILED_CODE_SIZE);
|
|
CompileVertexTranslator();
|
|
WriteProtect();
|
|
}
|
|
|
|
VertexLoader::~VertexLoader()
|
|
{
|
|
FreeCodeSpace();
|
|
delete m_NativeFmt;
|
|
}
|
|
|
|
void VertexLoader::CompileVertexTranslator()
|
|
{
|
|
m_VertexSize = 0;
|
|
const TVtxAttr &vtx_attr = m_VtxAttr;
|
|
|
|
#ifdef USE_JIT
|
|
if (m_compiledCode)
|
|
PanicAlert("trying to recompile a vtx translator");
|
|
|
|
m_compiledCode = GetCodePtr();
|
|
ABI_EmitPrologue(4);
|
|
|
|
// Start loop here
|
|
const u8 *loop_start = GetCodePtr();
|
|
|
|
// Reset component counters if present in vertex format only.
|
|
if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord ||
|
|
m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) {
|
|
WriteSetVariable(32, &tcIndex, Imm32(0));
|
|
}
|
|
if (m_VtxDesc.Color0 || m_VtxDesc.Color1) {
|
|
WriteSetVariable(32, &colIndex, Imm32(0));
|
|
}
|
|
if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx ||
|
|
m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) {
|
|
WriteSetVariable(32, &s_texmtxwrite, Imm32(0));
|
|
WriteSetVariable(32, &s_texmtxread, Imm32(0));
|
|
}
|
|
#endif
|
|
|
|
// Colors
|
|
const int col[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
|
|
// TextureCoord
|
|
// Since m_VtxDesc.Text7Coord is broken across a 32 bit word boundary, retrieve its value manually.
|
|
// If we didn't do this, the vertex format would be read as one bit offset from where it should be, making
|
|
// 01 become 00, and 10/11 become 01
|
|
const int tc[8] = {
|
|
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
|
|
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, (m_VtxDesc.Hex >> 31) & 3
|
|
};
|
|
|
|
// Reset pipeline
|
|
m_numPipelineStages = 0;
|
|
|
|
// It's a bit ugly that we poke inside m_NativeFmt in this function. Planning to fix this.
|
|
m_NativeFmt->m_components = 0;
|
|
|
|
// Position in pc vertex format.
|
|
int nat_offset = 0;
|
|
PortableVertexDeclaration vtx_decl;
|
|
memset(&vtx_decl, 0, sizeof(vtx_decl));
|
|
for (int i = 0; i < 8; i++) {
|
|
vtx_decl.texcoord_offset[i] = -1;
|
|
}
|
|
|
|
// m_VBVertexStride for texmtx and posmtx is computed later when writing.
|
|
|
|
// Position Matrix Index
|
|
if (m_VtxDesc.PosMatIdx) {
|
|
WriteCall(PosMtx_ReadDirect_UByte);
|
|
m_NativeFmt->m_components |= VB_HAS_POSMTXIDX;
|
|
m_VertexSize += 1;
|
|
}
|
|
|
|
if (m_VtxDesc.Tex0MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX0; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex1MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX1; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex2MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX2; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex3MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX3; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex4MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX4; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex5MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX5; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex6MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX6; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
|
|
|
|
// Write vertex position loader
|
|
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
|
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
|
|
nat_offset += 12;
|
|
|
|
// OK, so we just got a point. Let's go back and read it for the bounding box.
|
|
|
|
#ifdef BBOX_SUPPORT
|
|
WriteCall(UpdateBoundingBox);
|
|
#endif
|
|
|
|
// Normals
|
|
vtx_decl.num_normals = 0;
|
|
if (m_VtxDesc.Normal != NOT_PRESENT) {
|
|
m_VertexSize += VertexLoader_Normal::GetSize(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
|
|
TPipelineFunction pFunc = VertexLoader_Normal::GetFunction(m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3, g_Config.backend_info.bAllowSignedBytes);
|
|
if (pFunc == 0)
|
|
{
|
|
char temp[256];
|
|
sprintf(temp,"%i %i %i %i", m_VtxDesc.Normal, m_VtxAttr.NormalFormat, m_VtxAttr.NormalElements, m_VtxAttr.NormalIndex3);
|
|
Host_SysMessage("VertexLoader_Normal::GetFunction returned zero!");
|
|
}
|
|
WriteCall(pFunc);
|
|
|
|
vtx_decl.num_normals = vtx_attr.NormalElements ? 3 : 1;
|
|
vtx_decl.normal_offset[0] = -1;
|
|
vtx_decl.normal_offset[1] = -1;
|
|
vtx_decl.normal_offset[2] = -1;
|
|
vtx_decl.normal_gl_type = VAR_FLOAT;
|
|
vtx_decl.normal_gl_size = 3;
|
|
vtx_decl.normal_offset[0] = nat_offset;
|
|
nat_offset += 12;
|
|
if (vtx_attr.NormalElements) {
|
|
vtx_decl.normal_offset[1] = nat_offset;
|
|
nat_offset += 12;
|
|
vtx_decl.normal_offset[2] = nat_offset;
|
|
nat_offset += 12;
|
|
}
|
|
|
|
int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
|
|
m_NativeFmt->m_components |= VB_HAS_NRM0;
|
|
|
|
if (numNormals == NRM_THREE)
|
|
m_NativeFmt->m_components |= VB_HAS_NRM1 | VB_HAS_NRM2;
|
|
}
|
|
|
|
vtx_decl.color_gl_type = VAR_UNSIGNED_BYTE;
|
|
vtx_decl.color_offset[0] = -1;
|
|
vtx_decl.color_offset[1] = -1;
|
|
for (int i = 0; i < 2; i++) {
|
|
m_NativeFmt->m_components |= VB_HAS_COL0 << i;
|
|
switch (col[i])
|
|
{
|
|
case NOT_PRESENT:
|
|
m_NativeFmt->m_components &= ~(VB_HAS_COL0 << i);
|
|
vtx_decl.color_offset[i] = -1;
|
|
break;
|
|
case DIRECT:
|
|
switch (m_VtxAttr.color[i].Comp)
|
|
{
|
|
case FORMAT_16B_565: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_565); break;
|
|
case FORMAT_24B_888: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_888); break;
|
|
case FORMAT_32B_888x: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_888x); break;
|
|
case FORMAT_16B_4444: m_VertexSize += 2; WriteCall(Color_ReadDirect_16b_4444); break;
|
|
case FORMAT_24B_6666: m_VertexSize += 3; WriteCall(Color_ReadDirect_24b_6666); break;
|
|
case FORMAT_32B_8888: m_VertexSize += 4; WriteCall(Color_ReadDirect_32b_8888); break;
|
|
default: _assert_(0); break;
|
|
}
|
|
break;
|
|
case INDEX8:
|
|
m_VertexSize += 1;
|
|
switch (m_VtxAttr.color[i].Comp)
|
|
{
|
|
case FORMAT_16B_565: WriteCall(Color_ReadIndex8_16b_565); break;
|
|
case FORMAT_24B_888: WriteCall(Color_ReadIndex8_24b_888); break;
|
|
case FORMAT_32B_888x: WriteCall(Color_ReadIndex8_32b_888x); break;
|
|
case FORMAT_16B_4444: WriteCall(Color_ReadIndex8_16b_4444); break;
|
|
case FORMAT_24B_6666: WriteCall(Color_ReadIndex8_24b_6666); break;
|
|
case FORMAT_32B_8888: WriteCall(Color_ReadIndex8_32b_8888); break;
|
|
default: _assert_(0); break;
|
|
}
|
|
break;
|
|
case INDEX16:
|
|
m_VertexSize += 2;
|
|
switch (m_VtxAttr.color[i].Comp)
|
|
{
|
|
case FORMAT_16B_565: WriteCall(Color_ReadIndex16_16b_565); break;
|
|
case FORMAT_24B_888: WriteCall(Color_ReadIndex16_24b_888); break;
|
|
case FORMAT_32B_888x: WriteCall(Color_ReadIndex16_32b_888x); break;
|
|
case FORMAT_16B_4444: WriteCall(Color_ReadIndex16_16b_4444); break;
|
|
case FORMAT_24B_6666: WriteCall(Color_ReadIndex16_24b_6666); break;
|
|
case FORMAT_32B_8888: WriteCall(Color_ReadIndex16_32b_8888); break;
|
|
default: _assert_(0); break;
|
|
}
|
|
break;
|
|
}
|
|
// Common for the three bottom cases
|
|
if (col[i] != NOT_PRESENT) {
|
|
vtx_decl.color_offset[i] = nat_offset;
|
|
nat_offset += 4;
|
|
}
|
|
}
|
|
|
|
// Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
|
|
for (int i = 0; i < 8; i++) {
|
|
vtx_decl.texcoord_offset[i] = -1;
|
|
const int format = m_VtxAttr.texCoord[i].Format;
|
|
const int elements = m_VtxAttr.texCoord[i].Elements;
|
|
|
|
if (tc[i] == NOT_PRESENT) {
|
|
m_NativeFmt->m_components &= ~(VB_HAS_UV0 << i);
|
|
} else {
|
|
_assert_msg_(VIDEO, DIRECT <= tc[i] && tc[i] <= INDEX16, "Invalid texture coordinates!\n(tc[i] = %d)", tc[i]);
|
|
_assert_msg_(VIDEO, FORMAT_UBYTE <= format && format <= FORMAT_FLOAT, "Invalid texture coordinates format!\n(format = %d)", format);
|
|
_assert_msg_(VIDEO, 0 <= elements && elements <= 1, "Invalid number of texture coordinates elemnts!\n(elements = %d)", elements);
|
|
|
|
m_NativeFmt->m_components |= VB_HAS_UV0 << i;
|
|
WriteCall(VertexLoader_TextCoord::GetFunction(tc[i], format, elements));
|
|
m_VertexSize += VertexLoader_TextCoord::GetSize(tc[i], format, elements);
|
|
}
|
|
|
|
if (m_NativeFmt->m_components & (VB_HAS_TEXMTXIDX0 << i)) {
|
|
if (tc[i] != NOT_PRESENT) {
|
|
// if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
|
|
vtx_decl.texcoord_offset[i] = nat_offset;
|
|
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
|
|
vtx_decl.texcoord_size[i] = 3;
|
|
nat_offset += 12;
|
|
WriteCall(m_VtxAttr.texCoord[i].Elements ? TexMtx_Write_Float : TexMtx_Write_Float2);
|
|
}
|
|
else {
|
|
m_NativeFmt->m_components |= VB_HAS_UV0 << i; // have to include since using now
|
|
vtx_decl.texcoord_offset[i] = nat_offset;
|
|
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
|
|
vtx_decl.texcoord_size[i] = 4;
|
|
nat_offset += 16; // still include the texture coordinate, but this time as 6 + 2 bytes
|
|
WriteCall(TexMtx_Write_Float4);
|
|
}
|
|
}
|
|
else {
|
|
if (tc[i] != NOT_PRESENT) {
|
|
vtx_decl.texcoord_offset[i] = nat_offset;
|
|
vtx_decl.texcoord_gl_type[i] = VAR_FLOAT;
|
|
vtx_decl.texcoord_size[i] = vtx_attr.texCoord[i].Elements ? 2 : 1;
|
|
nat_offset += 4 * (vtx_attr.texCoord[i].Elements ? 2 : 1);
|
|
}
|
|
}
|
|
|
|
if (tc[i] == NOT_PRESENT) {
|
|
// if there's more tex coords later, have to write a dummy call
|
|
int j = i + 1;
|
|
for (; j < 8; ++j) {
|
|
if (tc[j] != NOT_PRESENT) {
|
|
WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right!
|
|
break;
|
|
}
|
|
}
|
|
// tricky!
|
|
if (j == 8 && !((m_NativeFmt->m_components & VB_HAS_TEXMTXIDXALL) & (VB_HAS_TEXMTXIDXALL << (i + 1)))) {
|
|
// no more tex coords and tex matrices, so exit loop
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_VtxDesc.PosMatIdx) {
|
|
WriteCall(PosMtx_Write);
|
|
vtx_decl.posmtx_offset = nat_offset;
|
|
nat_offset += 4;
|
|
} else {
|
|
vtx_decl.posmtx_offset = -1;
|
|
}
|
|
|
|
native_stride = nat_offset;
|
|
vtx_decl.stride = native_stride;
|
|
|
|
#ifdef USE_JIT
|
|
// End loop here
|
|
#ifdef _M_X64
|
|
MOV(64, R(RAX), Imm64((u64)&loop_counter));
|
|
SUB(32, MatR(RAX), Imm8(1));
|
|
#else
|
|
SUB(32, M(&loop_counter), Imm8(1));
|
|
#endif
|
|
|
|
J_CC(CC_NZ, loop_start, true);
|
|
ABI_EmitEpilogue(4);
|
|
#endif
|
|
m_NativeFmt->Initialize(vtx_decl);
|
|
}
|
|
|
|
void VertexLoader::WriteCall(TPipelineFunction func)
|
|
{
|
|
#ifdef USE_JIT
|
|
#ifdef _M_X64
|
|
MOV(64, R(RAX), Imm64((u64)func));
|
|
CALLptr(R(RAX));
|
|
#else
|
|
CALL((void*)func);
|
|
#endif
|
|
#else
|
|
m_PipelineStages[m_numPipelineStages++] = func;
|
|
#endif
|
|
}
|
|
|
|
void VertexLoader::WriteGetVariable(int bits, OpArg dest, void *address)
|
|
{
|
|
#ifdef USE_JIT
|
|
#ifdef _M_X64
|
|
MOV(64, R(RAX), Imm64((u64)address));
|
|
MOV(bits, dest, MatR(RAX));
|
|
#else
|
|
MOV(bits, dest, M(address));
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
|
|
{
|
|
#ifdef USE_JIT
|
|
#ifdef _M_X64
|
|
MOV(64, R(RAX), Imm64((u64)address));
|
|
MOV(bits, MatR(RAX), value);
|
|
#else
|
|
MOV(bits, M(address), value);
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
|
{
|
|
m_numLoadedVertices += count;
|
|
|
|
// Flush if our vertex format is different from the currently set.
|
|
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
|
{
|
|
// We really must flush here. It's possible that the native representations
|
|
// of the two vtx formats are the same, but we have no way to easily check that
|
|
// now.
|
|
VertexManager::Flush();
|
|
// Also move the Set() here?
|
|
}
|
|
g_nativeVertexFmt = m_NativeFmt;
|
|
|
|
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
|
{
|
|
// if cull mode is none, ignore triangles and quads
|
|
DataSkip(count * m_VertexSize);
|
|
return;
|
|
}
|
|
|
|
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
|
|
|
// Load position and texcoord scale factors.
|
|
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
|
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
|
|
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
|
|
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
|
|
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
|
|
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
|
|
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
|
|
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
|
|
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
|
|
|
pVtxAttr = &m_VtxAttr;
|
|
posScale = fractionTable[m_VtxAttr.PosFrac];
|
|
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
|
for (int i = 0; i < 8; i++)
|
|
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
|
|
for (int i = 0; i < 2; i++)
|
|
colElements[i] = m_VtxAttr.color[i].Elements;
|
|
|
|
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
|
|
int granularity = 1;
|
|
switch (primitive) {
|
|
case 3: // strip .. hm, weird
|
|
case 4: // fan
|
|
if (VertexManager::GetRemainingSize() < 3 * native_stride)
|
|
VertexManager::Flush();
|
|
break;
|
|
case 6: // line strip
|
|
if (VertexManager::GetRemainingSize() < 2 * native_stride)
|
|
VertexManager::Flush();
|
|
break;
|
|
case 0: granularity = 4; break; // quads
|
|
case 2: granularity = 3; break; // tris
|
|
case 5: granularity = 2; break; // lines
|
|
}
|
|
|
|
int startv = 0, extraverts = 0;
|
|
int v = 0;
|
|
|
|
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
|
while (v < count)
|
|
{
|
|
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
|
|
//if (remainingVerts2 - v + startv < remainingVerts)
|
|
//remainingVerts = remainingVerts2 - v + startv;
|
|
if (remainingVerts < granularity) {
|
|
INCSTAT(stats.thisFrame.numBufferSplits);
|
|
// This buffer full - break current primitive and flush, to switch to the next buffer.
|
|
u8* plastptr = VertexManager::s_pCurBufferPointer;
|
|
if (v - startv > 0)
|
|
VertexManager::AddVertices(primitive, v - startv + extraverts);
|
|
VertexManager::Flush();
|
|
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
|
// Why does this need to be so complicated?
|
|
switch (primitive) {
|
|
case 3: // triangle strip, copy last two vertices
|
|
// a little trick since we have to keep track of signs
|
|
if (v & 1) {
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
|
|
VertexManager::s_pCurBufferPointer += native_stride*3;
|
|
extraverts = 3;
|
|
}
|
|
else {
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
|
|
VertexManager::s_pCurBufferPointer += native_stride*2;
|
|
extraverts = 2;
|
|
}
|
|
break;
|
|
case 4: // tri fan, copy first and last vert
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
|
|
VertexManager::s_pCurBufferPointer += native_stride;
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
|
VertexManager::s_pCurBufferPointer += native_stride;
|
|
extraverts = 2;
|
|
break;
|
|
case 6: // line strip
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
|
VertexManager::s_pCurBufferPointer += native_stride;
|
|
extraverts = 1;
|
|
break;
|
|
default:
|
|
extraverts = 0;
|
|
break;
|
|
}
|
|
startv = v;
|
|
}
|
|
int remainingPrims = remainingVerts / granularity;
|
|
remainingVerts = remainingPrims * granularity;
|
|
if (count - v < remainingVerts)
|
|
remainingVerts = count - v;
|
|
|
|
#ifdef USE_JIT
|
|
if (remainingVerts > 0) {
|
|
loop_counter = remainingVerts;
|
|
((void (*)())(void*)m_compiledCode)();
|
|
}
|
|
#else
|
|
for (int s = 0; s < remainingVerts; s++)
|
|
{
|
|
tcIndex = 0;
|
|
colIndex = 0;
|
|
s_texmtxwrite = s_texmtxread = 0;
|
|
for (int i = 0; i < m_numPipelineStages; i++)
|
|
m_PipelineStages[i]();
|
|
PRIM_LOG("\n");
|
|
}
|
|
#endif
|
|
v += remainingVerts;
|
|
}
|
|
|
|
if (startv < count)
|
|
VertexManager::AddVertices(primitive, count - startv + extraverts);
|
|
}
|
|
|
|
|
|
|
|
|
|
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
|
|
{
|
|
m_numLoadedVertices += count;
|
|
|
|
// Flush if our vertex format is different from the currently set.
|
|
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
|
{
|
|
// We really must flush here. It's possible that the native representations
|
|
// of the two vtx formats are the same, but we have no way to easily check that
|
|
// now.
|
|
VertexManager::Flush();
|
|
// Also move the Set() here?
|
|
}
|
|
g_nativeVertexFmt = m_NativeFmt;
|
|
|
|
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
|
{
|
|
// if cull mode is none, ignore triangles and quads
|
|
DataSkip(count * m_VertexSize);
|
|
return;
|
|
}
|
|
|
|
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
|
|
|
// Load position and texcoord scale factors.
|
|
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
|
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
|
|
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
|
|
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
|
|
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
|
|
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
|
|
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
|
|
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
|
|
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
|
|
|
pVtxAttr = &m_VtxAttr;
|
|
posScale = fractionTable[m_VtxAttr.PosFrac];
|
|
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
|
for (int i = 0; i < 8; i++)
|
|
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
|
|
for (int i = 0; i < 2; i++)
|
|
colElements[i] = m_VtxAttr.color[i].Elements;
|
|
|
|
if(VertexManager::GetRemainingSize() < native_stride * count)
|
|
VertexManager::Flush();
|
|
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
|
|
VertexManager::s_pCurBufferPointer += native_stride * count;
|
|
DataSkip(count * m_VertexSize);
|
|
VertexManager::AddVertices(primitive, count);
|
|
}
|
|
|
|
|
|
|
|
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
|
|
{
|
|
VAT vat;
|
|
vat.g0.Hex = _group0;
|
|
vat.g1.Hex = _group1;
|
|
vat.g2.Hex = _group2;
|
|
|
|
m_VtxAttr.PosElements = vat.g0.PosElements;
|
|
m_VtxAttr.PosFormat = vat.g0.PosFormat;
|
|
m_VtxAttr.PosFrac = vat.g0.PosFrac;
|
|
m_VtxAttr.NormalElements = vat.g0.NormalElements;
|
|
m_VtxAttr.NormalFormat = vat.g0.NormalFormat;
|
|
m_VtxAttr.color[0].Elements = vat.g0.Color0Elements;
|
|
m_VtxAttr.color[0].Comp = vat.g0.Color0Comp;
|
|
m_VtxAttr.color[1].Elements = vat.g0.Color1Elements;
|
|
m_VtxAttr.color[1].Comp = vat.g0.Color1Comp;
|
|
m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements;
|
|
m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat;
|
|
m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac;
|
|
m_VtxAttr.ByteDequant = vat.g0.ByteDequant;
|
|
m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3;
|
|
|
|
m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements;
|
|
m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat;
|
|
m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac;
|
|
m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements;
|
|
m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat;
|
|
m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac;
|
|
m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements;
|
|
m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat;
|
|
m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac;
|
|
m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements;
|
|
m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat;
|
|
|
|
m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac;
|
|
m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements;
|
|
m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat;
|
|
m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac;
|
|
m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements;
|
|
m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat;
|
|
m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac;
|
|
m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements;
|
|
m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat;
|
|
m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac;
|
|
};
|
|
|
|
void VertexLoader::AppendToString(std::string *dest) const
|
|
{
|
|
dest->reserve(250);
|
|
static const char *posMode[4] = {
|
|
"Inv",
|
|
"Dir",
|
|
"I8",
|
|
"I16",
|
|
};
|
|
static const char *posFormats[5] = {
|
|
"u8", "s8", "u16", "s16", "flt",
|
|
};
|
|
static const char *colorFormat[8] = {
|
|
"565",
|
|
"888",
|
|
"888x",
|
|
"4444",
|
|
"6666",
|
|
"8888",
|
|
"Inv",
|
|
"Inv",
|
|
};
|
|
|
|
dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ",
|
|
m_VertexSize, m_VtxDesc.PosMatIdx,
|
|
m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat]));
|
|
if (m_VtxDesc.Normal) {
|
|
dest->append(StringFromFormat("Nrm: %i %s-%s ",
|
|
m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat]));
|
|
}
|
|
int color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1};
|
|
for (int i = 0; i < 2; i++)
|
|
{
|
|
if (color_mode[i])
|
|
{
|
|
dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp]));
|
|
}
|
|
}
|
|
int tex_mode[8] = {
|
|
m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord,
|
|
m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord
|
|
};
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
if (tex_mode[i])
|
|
{
|
|
dest->append(StringFromFormat("T%i: %i %s-%s ",
|
|
i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format]));
|
|
}
|
|
}
|
|
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
|
|
}
|