mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-12 00:59:11 +01:00
eaa1ea71c1
change naming in all the backends vertex managers to make more easy to continue with the merge an some future improvements. please test this as i'm interested in knowing the performance in linux and windows with the different hardware platforms.
477 lines
12 KiB
C++
477 lines
12 KiB
C++
// Copyright (C) 2003 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
//DL facts:
|
|
// Ikaruga uses (nearly) NO display lists!
|
|
// Zelda WW uses TONS of display lists
|
|
// Zelda TP uses almost 100% display lists except menus (we like this!)
|
|
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
|
|
|
|
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they are
|
|
// while interpreting them, and hope that the vertex format doesn't change, though, if you do it right
|
|
// when they are called. The reason is that the vertex format affects the sizes of the vertices.
|
|
|
|
#include "Common.h"
|
|
#include "VideoCommon.h"
|
|
#include "OpcodeDecoding.h"
|
|
#include "CommandProcessor.h"
|
|
#include "CPUDetect.h"
|
|
#include "Core.h"
|
|
#include "Host.h"
|
|
#include "HW/Memmap.h"
|
|
#include "FifoPlayer/FifoRecorder.h"
|
|
|
|
#include "VertexLoaderManager.h"
|
|
|
|
#include "Statistics.h"
|
|
|
|
#include "XFMemory.h"
|
|
#include "CPMemory.h"
|
|
#include "BPMemory.h"
|
|
|
|
#include "Fifo.h"
|
|
#include "DataReader.h"
|
|
|
|
#include "OpenCL.h"
|
|
#include "OpenCL/OCLTextureDecoder.h"
|
|
#include "VideoConfig.h"
|
|
|
|
u8* g_pVideoData = 0;
|
|
bool g_bRecordFifoData = false;
|
|
|
|
#if _M_SSE >= 0x301
|
|
DataReadU32xNfunc DataReadU32xFuncs_SSSE3[16] = {
|
|
DataReadU32xN_SSSE3<1>,
|
|
DataReadU32xN_SSSE3<2>,
|
|
DataReadU32xN_SSSE3<3>,
|
|
DataReadU32xN_SSSE3<4>,
|
|
DataReadU32xN_SSSE3<5>,
|
|
DataReadU32xN_SSSE3<6>,
|
|
DataReadU32xN_SSSE3<7>,
|
|
DataReadU32xN_SSSE3<8>,
|
|
DataReadU32xN_SSSE3<9>,
|
|
DataReadU32xN_SSSE3<10>,
|
|
DataReadU32xN_SSSE3<11>,
|
|
DataReadU32xN_SSSE3<12>,
|
|
DataReadU32xN_SSSE3<13>,
|
|
DataReadU32xN_SSSE3<14>,
|
|
DataReadU32xN_SSSE3<15>,
|
|
DataReadU32xN_SSSE3<16>
|
|
};
|
|
#endif
|
|
|
|
DataReadU32xNfunc DataReadU32xFuncs[16] = {
|
|
DataReadU32xN<1>,
|
|
DataReadU32xN<2>,
|
|
DataReadU32xN<3>,
|
|
DataReadU32xN<4>,
|
|
DataReadU32xN<5>,
|
|
DataReadU32xN<6>,
|
|
DataReadU32xN<7>,
|
|
DataReadU32xN<8>,
|
|
DataReadU32xN<9>,
|
|
DataReadU32xN<10>,
|
|
DataReadU32xN<11>,
|
|
DataReadU32xN<12>,
|
|
DataReadU32xN<13>,
|
|
DataReadU32xN<14>,
|
|
DataReadU32xN<15>,
|
|
DataReadU32xN<16>
|
|
};
|
|
|
|
extern u8* GetVideoBufferStartPtr();
|
|
extern u8* GetVideoBufferEndPtr();
|
|
|
|
static void Decode();
|
|
|
|
void InterpretDisplayList(u32 address, u32 size)
|
|
{
|
|
u8* old_pVideoData = g_pVideoData;
|
|
u8* startAddress = Memory::GetPointer(address);
|
|
|
|
// Avoid the crash if Memory::GetPointer failed ..
|
|
if (startAddress != 0)
|
|
{
|
|
g_pVideoData = startAddress;
|
|
|
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
|
Statistics::SwapDL();
|
|
|
|
u8 *end = g_pVideoData + size;
|
|
while (g_pVideoData < end)
|
|
{
|
|
Decode();
|
|
}
|
|
INCSTAT(stats.numDListsCalled);
|
|
INCSTAT(stats.thisFrame.numDListsCalled);
|
|
|
|
// un-swap
|
|
Statistics::SwapDL();
|
|
}
|
|
|
|
// reset to the old pointer
|
|
g_pVideoData = old_pVideoData;
|
|
}
|
|
|
|
// Defer to backend-specific DL cache.
|
|
extern bool HandleDisplayList(u32 address, u32 size);
|
|
|
|
void ExecuteDisplayList(u32 address, u32 size)
|
|
{
|
|
if (!HandleDisplayList(address, size))
|
|
InterpretDisplayList(address, size);
|
|
}
|
|
|
|
bool FifoCommandRunnable()
|
|
{
|
|
u32 buffer_size = (u32)(GetVideoBufferEndPtr() - g_pVideoData);
|
|
if (buffer_size == 0)
|
|
return false; // can't peek
|
|
|
|
u8 cmd_byte = DataPeek8(0);
|
|
u32 command_size = 0;
|
|
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_NOP: // Hm, this means that we scan over nop streams pretty slowly...
|
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache - no parameters
|
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
|
command_size = 1;
|
|
break;
|
|
|
|
case GX_LOAD_BP_REG:
|
|
command_size = 5;
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG:
|
|
command_size = 6;
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A:
|
|
case GX_LOAD_INDX_B:
|
|
case GX_LOAD_INDX_C:
|
|
case GX_LOAD_INDX_D:
|
|
command_size = 5;
|
|
break;
|
|
|
|
case GX_CMD_CALL_DL:
|
|
command_size = 9;
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
// check if we can read the header
|
|
if (buffer_size >= 5)
|
|
{
|
|
command_size = 1 + 4;
|
|
u32 Cmd2 = DataPeek32(1);
|
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
|
command_size += transfer_size * 4;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (cmd_byte & 0x80)
|
|
{
|
|
// check if we can read the header
|
|
if (buffer_size >= 3)
|
|
{
|
|
command_size = 1 + 2;
|
|
u16 numVertices = DataPeek16(1);
|
|
command_size += numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK);
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// TODO(Omega): Maybe dump FIFO to file on this error
|
|
char szTemp[1024];
|
|
sprintf(szTemp, "GFX FIFO: Unknown Opcode (0x%x).\n"
|
|
"This means one of the following:\n"
|
|
"* The emulated GPU got desynced, disabling dual core can help\n"
|
|
"* Command stream corrupted by some spurious memory bug\n"
|
|
"* This really is an unknown opcode (unlikely)\n"
|
|
"* Some other sort of bug\n\n"
|
|
"Dolphin will now likely crash or hang. Enjoy." , cmd_byte);
|
|
Host_SysMessage(szTemp);
|
|
INFO_LOG(VIDEO, "%s", szTemp);
|
|
{
|
|
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
|
|
|
char szTmp[512];
|
|
// sprintf(szTmp, "Illegal command %02x (at %08x)",cmd_byte,g_pDataReader->GetPtr());
|
|
sprintf(szTmp, "Illegal command %02x\n"
|
|
"CPBase: 0x%08x\n"
|
|
"CPEnd: 0x%08x\n"
|
|
"CPHiWatermark: 0x%08x\n"
|
|
"CPLoWatermark: 0x%08x\n"
|
|
"CPReadWriteDistance: 0x%08x\n"
|
|
"CPWritePointer: 0x%08x\n"
|
|
"CPReadPointer: 0x%08x\n"
|
|
"CPBreakpoint: 0x%08x\n"
|
|
"bFF_GPReadEnable: %s\n"
|
|
"bFF_BPEnable: %s\n"
|
|
"bFF_BPInt: %s\n"
|
|
"bFF_Breakpoint: %s\n"
|
|
,cmd_byte, fifo.CPBase, fifo.CPEnd, fifo.CPHiWatermark, fifo.CPLoWatermark, fifo.CPReadWriteDistance
|
|
,fifo.CPWritePointer, fifo.CPReadPointer, fifo.CPBreakpoint, fifo.bFF_GPReadEnable ? "true" : "false"
|
|
,fifo.bFF_BPEnable ? "true" : "false" ,fifo.bFF_BPInt ? "true" : "false"
|
|
,fifo.bFF_Breakpoint ? "true" : "false");
|
|
|
|
Host_SysMessage(szTmp);
|
|
INFO_LOG(VIDEO, "%s", szTmp);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (command_size > buffer_size)
|
|
return false;
|
|
|
|
// INFO_LOG("OP detected: cmd_byte 0x%x size %i buffer %i",cmd_byte, command_size, buffer_size);
|
|
|
|
return true;
|
|
}
|
|
|
|
static void Decode()
|
|
{
|
|
u8 *opcodeStart = g_pVideoData;
|
|
|
|
int cmd_byte = DataReadU8();
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_NOP:
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG: //0x08
|
|
{
|
|
u8 sub_cmd = DataReadU8();
|
|
u32 value = DataReadU32();
|
|
LoadCPReg(sub_cmd, value);
|
|
INCSTAT(stats.thisFrame.numCPLoads);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
u32 Cmd2 = DataReadU32();
|
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
|
u32 xf_address = Cmd2 & 0xFFFF;
|
|
GC_ALIGNED128(u32 data_buffer[16]);
|
|
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
|
LoadXFReg(transfer_size, xf_address, data_buffer);
|
|
|
|
INCSTAT(stats.thisFrame.numXFLoads);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A: //used for position matrices
|
|
LoadIndexedXF(DataReadU32(), 0xC);
|
|
break;
|
|
case GX_LOAD_INDX_B: //used for normal matrices
|
|
LoadIndexedXF(DataReadU32(), 0xD);
|
|
break;
|
|
case GX_LOAD_INDX_C: //used for postmatrices
|
|
LoadIndexedXF(DataReadU32(), 0xE);
|
|
break;
|
|
case GX_LOAD_INDX_D: //used for lights
|
|
LoadIndexedXF(DataReadU32(), 0xF);
|
|
break;
|
|
|
|
case GX_CMD_CALL_DL:
|
|
{
|
|
u32 address = DataReadU32();
|
|
u32 count = DataReadU32();
|
|
ExecuteDisplayList(address, count);
|
|
}
|
|
break;
|
|
|
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
|
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
|
|
break;
|
|
|
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
|
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
|
break;
|
|
|
|
case GX_LOAD_BP_REG: //0x61
|
|
{
|
|
u32 bp_cmd = DataReadU32();
|
|
LoadBPReg(bp_cmd);
|
|
INCSTAT(stats.thisFrame.numBPLoads);
|
|
}
|
|
break;
|
|
|
|
// draw primitives
|
|
default:
|
|
if (cmd_byte & 0x80)
|
|
{
|
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
|
u16 numVertices = DataReadU16();
|
|
|
|
VertexLoaderManager::RunVertices(
|
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
|
numVertices);
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Display lists get added directly into the FIFO stream
|
|
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
|
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
|
|
}
|
|
|
|
static void DecodeSemiNop()
|
|
{
|
|
u8 *opcodeStart = g_pVideoData;
|
|
|
|
int cmd_byte = DataReadU8();
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
|
case GX_NOP:
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG: //0x08
|
|
// We have to let CP writes through because they determine the size of vertices.
|
|
{
|
|
u8 sub_cmd = DataReadU8();
|
|
u32 value = DataReadU32();
|
|
LoadCPReg(sub_cmd, value);
|
|
INCSTAT(stats.thisFrame.numCPLoads);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
u32 Cmd2 = DataReadU32();
|
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
|
u32 address = Cmd2 & 0xFFFF;
|
|
GC_ALIGNED128(u32 data_buffer[16]);
|
|
DataReadU32xFuncs[transfer_size-1](data_buffer);
|
|
LoadXFReg(transfer_size, address, data_buffer);
|
|
INCSTAT(stats.thisFrame.numXFLoads);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A: //used for position matrices
|
|
LoadIndexedXF(DataReadU32(), 0xC);
|
|
break;
|
|
case GX_LOAD_INDX_B: //used for normal matrices
|
|
LoadIndexedXF(DataReadU32(), 0xD);
|
|
break;
|
|
case GX_LOAD_INDX_C: //used for postmatrices
|
|
LoadIndexedXF(DataReadU32(), 0xE);
|
|
break;
|
|
case GX_LOAD_INDX_D: //used for lights
|
|
LoadIndexedXF(DataReadU32(), 0xF);
|
|
break;
|
|
|
|
case GX_CMD_CALL_DL:
|
|
// Hm, wonder if any games put tokens in display lists - in that case,
|
|
// we'll have to parse them too.
|
|
DataSkip(8);
|
|
break;
|
|
|
|
case GX_LOAD_BP_REG: //0x61
|
|
// We have to let BP writes through because they set tokens and stuff.
|
|
// TODO: Call a much simplified LoadBPReg instead.
|
|
{
|
|
u32 bp_cmd = DataReadU32();
|
|
LoadBPReg(bp_cmd);
|
|
INCSTAT(stats.thisFrame.numBPLoads);
|
|
}
|
|
break;
|
|
|
|
// draw primitives
|
|
default:
|
|
if (cmd_byte & 0x80)
|
|
{
|
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
|
u16 numVertices = DataReadU16();
|
|
DataSkip(numVertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK));
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte);
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
|
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
|
|
}
|
|
|
|
void OpcodeDecoder_Init()
|
|
{
|
|
g_pVideoData = GetVideoBufferStartPtr();
|
|
|
|
#if _M_SSE >= 0x301
|
|
if (cpu_info.bSSSE3)
|
|
{
|
|
for (int i = 0; i < 16; ++i)
|
|
DataReadU32xFuncs[i] = DataReadU32xFuncs_SSSE3[i];
|
|
}
|
|
#endif
|
|
|
|
if (g_Config.bEnableOpenCL)
|
|
{
|
|
OpenCL::Initialize();
|
|
TexDecoder_OpenCL_Initialize();
|
|
}
|
|
}
|
|
|
|
|
|
void OpcodeDecoder_Shutdown()
|
|
{
|
|
if (g_Config.bEnableOpenCL)
|
|
{
|
|
TexDecoder_OpenCL_Shutdown();
|
|
OpenCL::Destroy();
|
|
}
|
|
}
|
|
|
|
void OpcodeDecoder_Run(bool skipped_frame)
|
|
{
|
|
if (!skipped_frame)
|
|
{
|
|
while (FifoCommandRunnable())
|
|
Decode();
|
|
}
|
|
else
|
|
{
|
|
while (FifoCommandRunnable())
|
|
DecodeSemiNop();
|
|
}
|
|
}
|