diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 7a0c77b6a0..496ac0a851 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -252,7 +252,7 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) } } Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len); - s_video_buffer_pp_read_ptr = OpcodeDecoder_Preprocess(s_video_buffer_pp_read_ptr, write_ptr + len, false); + s_video_buffer_pp_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false); // This would have to be locked if the GPU thread didn't spin. s_video_buffer_write_ptr = write_ptr + len; } @@ -294,7 +294,7 @@ void RunGpuLoop() // See comment in SyncGPU if (write_ptr > seen_ptr) { - s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, write_ptr, nullptr, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); { std::lock_guard vblk(s_video_buffer_lock); @@ -330,7 +330,7 @@ void RunGpuLoop() u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, write_ptr, &cyclesExecuted, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted) @@ -403,7 +403,7 @@ void RunGpu() FPURoundMode::SaveSIMDState(); FPURoundMode::LoadDefaultSIMDState(); ReadDataFromFifo(fifo.CPReadPointer); - s_video_buffer_read_ptr = OpcodeDecoder_Run(s_video_buffer_read_ptr, s_video_buffer_write_ptr, nullptr, false); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); FPURoundMode::LoadSIMDState(); } diff --git a/Source/Core/VideoCommon/OpcodeDecoding.cpp b/Source/Core/VideoCommon/OpcodeDecoding.cpp index c5b1a09099..12a9d5b350 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/OpcodeDecoding.cpp @@ -34,12 +34,8 @@ bool g_bRecordFifoData = false; -u8* g_video_buffer_read_ptr; -static u8* s_video_buffer_pp_read_ptr; - static u32 InterpretDisplayList(u32 address, u32 size) { - u8* old_pVideoData = g_video_buffer_read_ptr; u8* startAddress; if (g_use_deterministic_gpu_thread) @@ -55,32 +51,26 @@ static u32 InterpretDisplayList(u32 address, u32 size) // temporarily swap dl and non-dl (small "hack" for the stats) Statistics::SwapDL(); - OpcodeDecoder_Run(startAddress, startAddress + size, &cycles, true); + OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true); INCSTAT(stats.thisFrame.numDListsCalled); // un-swap Statistics::SwapDL(); } - // reset to the old pointer - g_video_buffer_read_ptr = old_pVideoData; - return cycles; } static void InterpretDisplayListPreprocess(u32 address, u32 size) { - u8* old_read_ptr = s_video_buffer_pp_read_ptr; u8* startAddress = Memory::GetPointer(address); PushFifoAuxBuffer(startAddress, size); if (startAddress != nullptr) { - OpcodeDecoder_Preprocess(startAddress, startAddress + size, true); + OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), nullptr, true); } - - s_video_buffer_pp_read_ptr = old_read_ptr; } static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) @@ -131,180 +121,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) } } -template -static u32 Decode(u8* end, bool in_display_list) -{ - u8 *opcodeStart = *bufp; - if (*bufp == end) - return 0; - - u8 cmd_byte = DataRead(bufp); - u32 cycles; - int refarray; - switch (cmd_byte) - { - case GX_NOP: - cycles = 6; // Hm, this means that we scan over nop streams pretty slowly... - break; - - case GX_LOAD_CP_REG: //0x08 - { - if (end - *bufp < 1 + 4) - return 0; - cycles = 12; - u8 sub_cmd = DataRead(bufp); - u32 value = DataRead(bufp); - LoadCPReg(sub_cmd, value, is_preprocess); - if (!is_preprocess) - INCSTAT(stats.thisFrame.numCPLoads); - } - break; - - case GX_LOAD_XF_REG: - { - if (end - *bufp < 4) - return 0; - u32 Cmd2 = DataRead(bufp); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - if ((size_t) (end - *bufp) < transfer_size * sizeof(u32)) - return 0; - cycles = 18 + 6 * transfer_size; - if (!is_preprocess) - { - u32 xf_address = Cmd2 & 0xFFFF; - LoadXFReg(transfer_size, xf_address); - - INCSTAT(stats.thisFrame.numXFLoads); - } - else - { - *bufp += transfer_size * sizeof(u32); - } - } - break; - - case GX_LOAD_INDX_A: //used for position matrices - refarray = 0xC; - goto load_indx; - case GX_LOAD_INDX_B: //used for normal matrices - refarray = 0xD; - goto load_indx; - case GX_LOAD_INDX_C: //used for postmatrices - refarray = 0xE; - goto load_indx; - case GX_LOAD_INDX_D: //used for lights - refarray = 0xF; - goto load_indx; - load_indx: - if (end - *bufp < 4) - return 0; - cycles = 6; - if (is_preprocess) - PreprocessIndexedXF(DataRead(bufp), refarray); - else - LoadIndexedXF(DataRead(bufp), refarray); - break; - - case GX_CMD_CALL_DL: - { - if (end - *bufp < 8) - return 0; - u32 address = DataRead(bufp); - u32 count = DataRead(bufp); - - if (in_display_list) - { - cycles = 6; - WARN_LOG(VIDEO,"recursive display list detected"); - } - else - { - if (is_preprocess) - InterpretDisplayListPreprocess(address, count); - else - cycles = 6 + InterpretDisplayList(address, count); - } - } - break; - - case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that - cycles = 6; - DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); - break; - - case GX_CMD_INVL_VC: // Invalidate Vertex Cache - cycles = 6; - DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); - break; - - case GX_LOAD_BP_REG: //0x61 - // In skipped_frame case: We have to let BP writes through because they set - // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. - { - if (end - *bufp < 4) - return 0; - cycles = 12; - u32 bp_cmd = DataRead(bufp); - if (is_preprocess) - { - LoadBPRegPreprocess(bp_cmd); - } - else - { - LoadBPReg(bp_cmd); - INCSTAT(stats.thisFrame.numBPLoads); - } - } - break; - - // draw primitives - default: - if ((cmd_byte & 0xC0) == 0x80) - { - cycles = 1600; - // load vertices - if (end - *bufp < 2) - return 0; - u16 num_vertices = DataRead(bufp); - - if (is_preprocess) - { - size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess); - if ((size_t) (end - *bufp) < size) - return 0; - *bufp += size; - } - else - { - if (!VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - num_vertices, - end - *bufp, - g_bSkipCurrentFrame)) - return 0; - } - } - else - { - UnknownOpcode(cmd_byte, opcodeStart, is_preprocess); - cycles = 1; - } - break; - } - - // Display lists get added directly into the FIFO stream - if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) - FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart)); - - // In is_preprocess mode, we don't actually care about cycles, at least for - // now... make sure the compiler realizes that. - return is_preprocess ? 1 : cycles; -} - void OpcodeDecoder_Init() { - g_video_buffer_read_ptr = GetVideoBufferStartPtr(); } @@ -312,40 +130,185 @@ void OpcodeDecoder_Shutdown() { } -u8* OpcodeDecoder_Run(u8* start, u8* end, u32* cycles, bool in_display_list) +template +u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list) { - g_video_buffer_read_ptr = start; u32 totalCycles = 0; + u8* opcodeStart; while (true) { - u8* old = g_video_buffer_read_ptr; - u32 cycles_op = Decode(end, in_display_list); - if (cycles_op == 0) + src.WritePointer(&opcodeStart); + + if (!src.size()) + goto end; + + u8 cmd_byte = src.Read(); + int refarray; + switch (cmd_byte) { - g_video_buffer_read_ptr = old; + case GX_NOP: + totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly... + break; + + case GX_LOAD_CP_REG: //0x08 + { + if (src.size() < 1 + 4) + goto end; + totalCycles += 12; + u8 sub_cmd = src.Read(); + u32 value = src.Read(); + LoadCPReg(sub_cmd, value, is_preprocess); + if (!is_preprocess) + INCSTAT(stats.thisFrame.numCPLoads); + } + break; + + case GX_LOAD_XF_REG: + { + if (src.size() < 4) + goto end; + u32 Cmd2 = src.Read(); + int transfer_size = ((Cmd2 >> 16) & 15) + 1; + if (src.size() < transfer_size * sizeof(u32)) + goto end; + totalCycles += 18 + 6 * transfer_size; + if (!is_preprocess) + { + u32 xf_address = Cmd2 & 0xFFFF; + LoadXFReg(transfer_size, xf_address, src); + + INCSTAT(stats.thisFrame.numXFLoads); + } + src.Skip(transfer_size); + } + break; + + case GX_LOAD_INDX_A: //used for position matrices + refarray = 0xC; + goto load_indx; + case GX_LOAD_INDX_B: //used for normal matrices + refarray = 0xD; + goto load_indx; + case GX_LOAD_INDX_C: //used for postmatrices + refarray = 0xE; + goto load_indx; + case GX_LOAD_INDX_D: //used for lights + refarray = 0xF; + goto load_indx; + load_indx: + if (src.size() < 4) + goto end; + totalCycles += 6; + if (is_preprocess) + PreprocessIndexedXF(src.Read(), refarray); + else + LoadIndexedXF(src.Read(), refarray); + break; + + case GX_CMD_CALL_DL: + { + if (src.size() < 8) + goto end; + u32 address = src.Read(); + u32 count = src.Read(); + + if (in_display_list) + { + totalCycles += 6; + WARN_LOG(VIDEO,"recursive display list detected"); + } + else + { + if (is_preprocess) + InterpretDisplayListPreprocess(address, count); + else + totalCycles += 6 + InterpretDisplayList(address, count); + } + } + break; + + case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that + totalCycles += 6; + DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); + break; + + case GX_CMD_INVL_VC: // Invalidate Vertex Cache + totalCycles += 6; + DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); + break; + + case GX_LOAD_BP_REG: //0x61 + // In skipped_frame case: We have to let BP writes through because they set + // tokens and stuff. TODO: Call a much simplified LoadBPReg instead. + { + if (src.size() < 4) + goto end; + totalCycles += 12; + u32 bp_cmd = src.Read(); + if (is_preprocess) + { + LoadBPRegPreprocess(bp_cmd); + } + else + { + LoadBPReg(bp_cmd); + INCSTAT(stats.thisFrame.numBPLoads); + } + } + break; + + // draw primitives + default: + if ((cmd_byte & 0xC0) == 0x80) + { + // load vertices + if (src.size() < 2) + goto end; + u16 num_vertices = src.Read(); + + if (is_preprocess) + { + size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess); + if (src.size() < size) + goto end; + src.Skip(size); + } + else + { + if (!VertexLoaderManager::RunVertices( + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + num_vertices, + src, + g_bSkipCurrentFrame)) + goto end; + } + totalCycles += 1600; + } + else + { + UnknownOpcode(cmd_byte, opcodeStart, is_preprocess); + totalCycles += 1; + } break; } - totalCycles += cycles_op; + + // Display lists get added directly into the FIFO stream + if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) + { + u8* opcodeEnd; + src.WritePointer(&opcodeEnd); + FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart)); + } } + +end: if (cycles) { *cycles = totalCycles; } - return g_video_buffer_read_ptr; + return opcodeStart; } -u8* OpcodeDecoder_Preprocess(u8* start, u8 *end, bool in_display_list) -{ - s_video_buffer_pp_read_ptr = start; - while (true) - { - u8* old = s_video_buffer_pp_read_ptr; - u32 cycles = Decode(end, in_display_list); - if (cycles == 0) - { - s_video_buffer_pp_read_ptr = old; - break; - } - } - return s_video_buffer_pp_read_ptr; -} +template u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); +template u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index 96a79f35c7..5fd03e26f3 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -5,6 +5,7 @@ #pragma once #include "Common/CommonTypes.h" +#include "VideoCommon/DataReader.h" #define GX_NOP 0x00 @@ -40,5 +41,6 @@ extern bool g_bRecordFifoData; void OpcodeDecoder_Init(); void OpcodeDecoder_Shutdown(); -u8* OpcodeDecoder_Run(u8* start, u8* end, u32* cycles, bool in_display_list); -u8* OpcodeDecoder_Preprocess(u8* start, u8* end, bool in_display_list); + +template +u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 3f7161b8f7..dfdefd42eb 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -45,6 +45,9 @@ int colElements[2]; GC_ALIGNED128(float posScale[4]); GC_ALIGNED64(float tcScale[8][2]); +// This pointer is used as the source for all fixed function loader calls +u8* g_video_buffer_read_ptr; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index b8132fa806..0262cafb2d 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -130,7 +130,7 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) return loader; } -bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing) +bool RunVertices(int vtx_attr_group, int primitive, int count, DataReader& src, bool skip_drawing) { if (!count) return true; @@ -140,13 +140,13 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, VertexLoader* loader = RefreshLoader(vtx_attr_group, state); size_t size = count * loader->GetVertexSize(); - if (buf_size < size) + if (src.size() < size) return false; if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5)) { // if cull mode is CULL_ALL, ignore triangles and quads - DataSkip((u32)size); + src.Skip(size); return true; } @@ -160,7 +160,10 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, VertexManager::PrepareForAdditionalData(primitive, count, loader->GetNativeVertexDeclaration().stride); + + src.WritePointer(&g_video_buffer_read_ptr); loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count); + src = g_video_buffer_read_ptr; IndexGenerator::AddIndices(primitive, count); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 8995ad2d7a..a687b7641f 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -7,6 +7,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/DataReader.h" #include "VideoCommon/NativeVertexFormat.h" namespace VertexLoaderManager @@ -18,7 +19,7 @@ namespace VertexLoaderManager int GetVertexSize(int vtx_attr_group, bool preprocess); // Returns false if buf_size is insufficient. - bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false); + bool RunVertices(int vtx_attr_group, int primitive, int count, DataReader& src, bool skip_drawing = false); // For debugging void AppendListToString(std::string *dest); diff --git a/Source/Core/VideoCommon/XFMemory.h b/Source/Core/VideoCommon/XFMemory.h index 33077aa69b..6ec28476d8 100644 --- a/Source/Core/VideoCommon/XFMemory.h +++ b/Source/Core/VideoCommon/XFMemory.h @@ -6,6 +6,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/CPMemory.h" +#include "VideoCommon/DataReader.h" // Lighting @@ -273,6 +274,6 @@ struct XFMemory extern XFMemory xfmem; -void LoadXFReg(u32 transferSize, u32 address); +void LoadXFReg(u32 transferSize, u32 address, DataReader src); void LoadIndexedXF(u32 val, int array); void PreprocessIndexedXF(u32 val, int refarray); diff --git a/Source/Core/VideoCommon/XFStructs.cpp b/Source/Core/VideoCommon/XFStructs.cpp index 0552aa0986..927fed8077 100644 --- a/Source/Core/VideoCommon/XFStructs.cpp +++ b/Source/Core/VideoCommon/XFStructs.cpp @@ -19,14 +19,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress) VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize); } -static void XFRegWritten(int transferSize, u32 baseAddress) +static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src) { u32 address = baseAddress; u32 dataIndex = 0; while (transferSize > 0 && address < 0x1058) { - u32 newValue = DataPeek(dataIndex * sizeof(u32)); + u32 newValue = src.Peek(dataIndex * sizeof(u32)); u32 nextAddress = address + 1; switch (address) @@ -193,7 +193,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress) } } -void LoadXFReg(u32 transferSize, u32 baseAddress) +void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src) { // do not allow writes past registers if (baseAddress + transferSize > 0x1058) @@ -229,17 +229,17 @@ void LoadXFReg(u32 transferSize, u32 baseAddress) XFMemWritten(xfMemTransferSize, xfMemBase); for (u32 i = 0; i < xfMemTransferSize; i++) { - ((u32*)&xfmem)[xfMemBase + i] = DataRead(); + ((u32*)&xfmem)[xfMemBase + i] = src.Read(); } } // write to XF regs if (transferSize > 0) { - XFRegWritten(transferSize, baseAddress); + XFRegWritten(transferSize, baseAddress, src); for (u32 i = 0; i < transferSize; i++) { - ((u32*)&xfmem)[baseAddress + i] = DataRead(); + ((u32*)&xfmem)[baseAddress + i] = src.Read(); } } }