mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-09 23:59:27 +01:00
54773bc5d2
This fourth part of my series of patches to get rid of unsafe uses of GetPointer takes care of the "easy" cases in VideoCommon. Three uses of GetPointer now remain in Dolphin: VertexLoaderManager, TextureInfo, and the software renderer's TextureSampler.
278 lines
8.2 KiB
C++
278 lines
8.2 KiB
C++
// Copyright 2008 Dolphin Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
// DL facts:
|
|
// Ikaruga uses (nearly) NO display lists!
|
|
// Zelda WW uses TONS of display lists
|
|
// Zelda TP uses almost 100% display lists except menus (we like this!)
|
|
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
|
|
|
|
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they
|
|
// are while interpreting them, and hope that the vertex format doesn't change, though, if you do
|
|
// it right when they are called. The reason is that the vertex format affects the sizes of the
|
|
// vertices.
|
|
|
|
#include "VideoCommon/OpcodeDecoding.h"
|
|
|
|
#include "Common/Assert.h"
|
|
#include "Common/Logging/Log.h"
|
|
#include "Core/FifoPlayer/FifoRecorder.h"
|
|
#include "Core/HW/Memmap.h"
|
|
#include "Core/System.h"
|
|
#include "VideoCommon/BPMemory.h"
|
|
#include "VideoCommon/CPMemory.h"
|
|
#include "VideoCommon/CommandProcessor.h"
|
|
#include "VideoCommon/DataReader.h"
|
|
#include "VideoCommon/Fifo.h"
|
|
#include "VideoCommon/Statistics.h"
|
|
#include "VideoCommon/VertexLoaderBase.h"
|
|
#include "VideoCommon/VertexLoaderManager.h"
|
|
#include "VideoCommon/XFMemory.h"
|
|
#include "VideoCommon/XFStateManager.h"
|
|
#include "VideoCommon/XFStructs.h"
|
|
|
|
namespace OpcodeDecoder
|
|
{
|
|
bool g_record_fifo_data = false;
|
|
|
|
template <bool is_preprocess>
|
|
class RunCallback final : public Callback
|
|
{
|
|
public:
|
|
OPCODE_CALLBACK(void OnXF(u16 address, u8 count, const u8* data))
|
|
{
|
|
m_cycles += 18 + 6 * count;
|
|
|
|
if constexpr (!is_preprocess)
|
|
{
|
|
LoadXFReg(address, count, data);
|
|
|
|
INCSTAT(g_stats.this_frame.num_xf_loads);
|
|
}
|
|
}
|
|
OPCODE_CALLBACK(void OnCP(u8 command, u32 value))
|
|
{
|
|
m_cycles += 12;
|
|
const u8 sub_command = command & CP_COMMAND_MASK;
|
|
if constexpr (!is_preprocess)
|
|
{
|
|
if (sub_command == MATINDEX_A)
|
|
{
|
|
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
|
|
auto& system = Core::System::GetInstance();
|
|
system.GetXFStateManager().SetTexMatrixChangedA(value);
|
|
}
|
|
else if (sub_command == MATINDEX_B)
|
|
{
|
|
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
|
|
auto& system = Core::System::GetInstance();
|
|
system.GetXFStateManager().SetTexMatrixChangedB(value);
|
|
}
|
|
else if (sub_command == VCD_LO || sub_command == VCD_HI)
|
|
{
|
|
VertexLoaderManager::g_main_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
|
|
VertexLoaderManager::g_bases_dirty = true;
|
|
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
|
|
}
|
|
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
|
|
sub_command == CP_VAT_REG_C)
|
|
{
|
|
VertexLoaderManager::g_main_vat_dirty[command & CP_VAT_MASK] = true;
|
|
VertexLoaderManager::g_needs_cp_xf_consistency_check = true;
|
|
}
|
|
else if (sub_command == ARRAY_BASE)
|
|
{
|
|
VertexLoaderManager::g_bases_dirty = true;
|
|
}
|
|
|
|
INCSTAT(g_stats.this_frame.num_cp_loads);
|
|
}
|
|
else if constexpr (is_preprocess)
|
|
{
|
|
if (sub_command == VCD_LO || sub_command == VCD_HI)
|
|
{
|
|
VertexLoaderManager::g_preprocess_vat_dirty = BitSet8::AllTrue(CP_NUM_VAT_REG);
|
|
}
|
|
else if (sub_command == CP_VAT_REG_A || sub_command == CP_VAT_REG_B ||
|
|
sub_command == CP_VAT_REG_C)
|
|
{
|
|
VertexLoaderManager::g_preprocess_vat_dirty[command & CP_VAT_MASK] = true;
|
|
}
|
|
}
|
|
GetCPState().LoadCPReg(command, value);
|
|
}
|
|
OPCODE_CALLBACK(void OnBP(u8 command, u32 value))
|
|
{
|
|
m_cycles += 12;
|
|
|
|
if constexpr (is_preprocess)
|
|
{
|
|
LoadBPRegPreprocess(command, value, m_cycles);
|
|
}
|
|
else
|
|
{
|
|
LoadBPReg(command, value, m_cycles);
|
|
INCSTAT(g_stats.this_frame.num_bp_loads);
|
|
}
|
|
}
|
|
OPCODE_CALLBACK(void OnIndexedLoad(CPArray array, u32 index, u16 address, u8 size))
|
|
{
|
|
m_cycles += 6;
|
|
|
|
if constexpr (is_preprocess)
|
|
PreprocessIndexedXF(array, index, address, size);
|
|
else
|
|
LoadIndexedXF(array, index, address, size);
|
|
}
|
|
OPCODE_CALLBACK(void OnPrimitiveCommand(OpcodeDecoder::Primitive primitive, u8 vat,
|
|
u32 vertex_size, u16 num_vertices, const u8* vertex_data))
|
|
{
|
|
// load vertices
|
|
const u32 size = vertex_size * num_vertices;
|
|
|
|
const u32 bytes =
|
|
VertexLoaderManager::RunVertices<is_preprocess>(vat, primitive, num_vertices, vertex_data);
|
|
|
|
ASSERT(bytes == size);
|
|
|
|
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
|
|
m_cycles += num_vertices * 4 * 3 + 6;
|
|
}
|
|
// This can't be inlined since it calls Run, which makes it recursive
|
|
// m_in_display_list prevents it from actually recursing infinitely, but there's no real benefit
|
|
// to inlining Run for the display list directly.
|
|
OPCODE_CALLBACK_NOINLINE(void OnDisplayList(u32 address, u32 size))
|
|
{
|
|
m_cycles += 6;
|
|
|
|
if (m_in_display_list)
|
|
{
|
|
WARN_LOG_FMT(VIDEO, "recursive display list detected");
|
|
}
|
|
else
|
|
{
|
|
m_in_display_list = true;
|
|
|
|
auto& system = Core::System::GetInstance();
|
|
|
|
if constexpr (is_preprocess)
|
|
{
|
|
auto& memory = system.GetMemory();
|
|
const u8* const start_address = memory.GetPointerForRange(address, size);
|
|
|
|
system.GetFifo().PushFifoAuxBuffer(start_address, size);
|
|
|
|
if (start_address != nullptr)
|
|
{
|
|
Run(start_address, size, *this);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const u8* start_address;
|
|
|
|
auto& fifo = system.GetFifo();
|
|
if (fifo.UseDeterministicGPUThread())
|
|
{
|
|
start_address = static_cast<u8*>(fifo.PopFifoAuxBuffer(size));
|
|
}
|
|
else
|
|
{
|
|
auto& memory = system.GetMemory();
|
|
start_address = memory.GetPointerForRange(address, size);
|
|
}
|
|
|
|
// Avoid the crash if memory.GetPointerForRange failed ..
|
|
if (start_address != nullptr)
|
|
{
|
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
|
g_stats.SwapDL();
|
|
|
|
Run(start_address, size, *this);
|
|
INCSTAT(g_stats.this_frame.num_dlists_called);
|
|
|
|
// un-swap
|
|
g_stats.SwapDL();
|
|
}
|
|
}
|
|
|
|
m_in_display_list = false;
|
|
}
|
|
}
|
|
OPCODE_CALLBACK(void OnNop(u32 count))
|
|
{
|
|
m_cycles += 6 * count; // Hm, this means that we scan over nop streams pretty slowly...
|
|
}
|
|
OPCODE_CALLBACK(void OnUnknown(u8 opcode, const u8* data))
|
|
{
|
|
if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_UNKNOWN_METRICS)
|
|
{
|
|
// 'Zelda Four Swords' calls it and checks the metrics registers after that
|
|
m_cycles += 6;
|
|
DEBUG_LOG_FMT(VIDEO, "GX 0x44");
|
|
}
|
|
else if (static_cast<Opcode>(opcode) == Opcode::GX_CMD_INVL_VC)
|
|
{
|
|
// Invalidate Vertex Cache
|
|
m_cycles += 6;
|
|
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
|
|
}
|
|
else
|
|
{
|
|
auto& system = Core::System::GetInstance();
|
|
system.GetCommandProcessor().HandleUnknownOpcode(opcode, data, is_preprocess);
|
|
m_cycles += 1;
|
|
}
|
|
}
|
|
|
|
OPCODE_CALLBACK(void OnCommand(const u8* data, u32 size))
|
|
{
|
|
ASSERT(size >= 1);
|
|
if constexpr (!is_preprocess)
|
|
{
|
|
// Display lists get added directly into the FIFO stream since this same callback is used to
|
|
// process them.
|
|
if (g_record_fifo_data && static_cast<Opcode>(data[0]) != Opcode::GX_CMD_CALL_DL)
|
|
{
|
|
Core::System::GetInstance().GetFifoRecorder().WriteGPCommand(data, size);
|
|
}
|
|
}
|
|
}
|
|
|
|
OPCODE_CALLBACK(CPState& GetCPState())
|
|
{
|
|
if constexpr (is_preprocess)
|
|
return g_preprocess_cp_state;
|
|
else
|
|
return g_main_cp_state;
|
|
}
|
|
|
|
OPCODE_CALLBACK(u32 GetVertexSize(u8 vat))
|
|
{
|
|
VertexLoaderBase* loader = VertexLoaderManager::RefreshLoader<is_preprocess>(vat);
|
|
return loader->m_vertex_size;
|
|
}
|
|
|
|
u32 m_cycles = 0;
|
|
bool m_in_display_list = false;
|
|
};
|
|
|
|
template <bool is_preprocess>
|
|
u8* RunFifo(DataReader src, u32* cycles)
|
|
{
|
|
using CallbackT = RunCallback<is_preprocess>;
|
|
auto callback = CallbackT{};
|
|
u32 size = Run(src.GetPointer(), static_cast<u32>(src.size()), callback);
|
|
|
|
if (cycles != nullptr)
|
|
*cycles = callback.m_cycles;
|
|
|
|
src.Skip(size);
|
|
return src.GetPointer();
|
|
}
|
|
|
|
template u8* RunFifo<true>(DataReader src, u32* cycles);
|
|
template u8* RunFifo<false>(DataReader src, u32* cycles);
|
|
|
|
} // namespace OpcodeDecoder
|