mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 08:09:26 +01:00
f5c550e9cb
Fixes Bomberman Jetters in single core mode. When single core mode pauses the CPU to execute the GPU FIFO it greedily executes the whole thing. Before this commit, Finish and Token interrupts would happen instantly, not even taking into account how long the current FIFO window has taken to execute. The interrupts would be effectively backdated to the start of this execution window. This commit does two things: It pipes the current FIFO window execution time though to the interrupt scheduling and it enforces a minimum delay of 500 cycles before an interrupt will be fired.
283 lines
7.5 KiB
C++
283 lines
7.5 KiB
C++
// Copyright 2008 Dolphin Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
// DL facts:
|
|
// Ikaruga uses (nearly) NO display lists!
|
|
// Zelda WW uses TONS of display lists
|
|
// Zelda TP uses almost 100% display lists except menus (we like this!)
|
|
// Super Mario Galaxy has nearly all geometry and more than half of the state in DLs (great!)
|
|
|
|
// Note that it IS NOT GENERALLY POSSIBLE to precompile display lists! You can compile them as they
|
|
// are while interpreting them, and hope that the vertex format doesn't change, though, if you do
|
|
// it right when they are called. The reason is that the vertex format affects the sizes of the
|
|
// vertices.
|
|
|
|
#include "VideoCommon/OpcodeDecoding.h"
|
|
|
|
#include "Common/CommonTypes.h"
|
|
#include "Common/Logging/Log.h"
|
|
#include "Core/FifoPlayer/FifoRecorder.h"
|
|
#include "Core/HW/Memmap.h"
|
|
#include "VideoCommon/BPMemory.h"
|
|
#include "VideoCommon/CPMemory.h"
|
|
#include "VideoCommon/CommandProcessor.h"
|
|
#include "VideoCommon/DataReader.h"
|
|
#include "VideoCommon/Fifo.h"
|
|
#include "VideoCommon/Statistics.h"
|
|
#include "VideoCommon/VertexLoaderManager.h"
|
|
#include "VideoCommon/XFMemory.h"
|
|
|
|
namespace OpcodeDecoder
|
|
{
|
|
namespace
|
|
{
|
|
bool s_is_fifo_error_seen = false;
|
|
|
|
u32 InterpretDisplayList(u32 address, u32 size)
|
|
{
|
|
u8* start_address;
|
|
|
|
if (Fifo::UseDeterministicGPUThread())
|
|
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
|
|
else
|
|
start_address = Memory::GetPointer(address);
|
|
|
|
u32 cycles = 0;
|
|
|
|
// Avoid the crash if Memory::GetPointer failed ..
|
|
if (start_address != nullptr)
|
|
{
|
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
|
g_stats.SwapDL();
|
|
|
|
Run(DataReader(start_address, start_address + size), &cycles, true);
|
|
INCSTAT(g_stats.this_frame.num_dlists_called);
|
|
|
|
// un-swap
|
|
g_stats.SwapDL();
|
|
}
|
|
|
|
return cycles;
|
|
}
|
|
|
|
void InterpretDisplayListPreprocess(u32 address, u32 size)
|
|
{
|
|
u8* const start_address = Memory::GetPointer(address);
|
|
|
|
Fifo::PushFifoAuxBuffer(start_address, size);
|
|
|
|
if (start_address == nullptr)
|
|
return;
|
|
|
|
Run<true>(DataReader(start_address, start_address + size), nullptr, true);
|
|
}
|
|
} // Anonymous namespace
|
|
|
|
bool g_record_fifo_data = false;
|
|
|
|
void Init()
|
|
{
|
|
s_is_fifo_error_seen = false;
|
|
}
|
|
|
|
template <bool is_preprocess>
|
|
u8* Run(DataReader src, u32* cycles, bool in_display_list)
|
|
{
|
|
u32 total_cycles = 0;
|
|
u8* opcode_start = nullptr;
|
|
|
|
const auto finish_up = [cycles, &opcode_start, &total_cycles] {
|
|
if (cycles != nullptr)
|
|
{
|
|
*cycles = total_cycles;
|
|
}
|
|
return opcode_start;
|
|
};
|
|
|
|
while (true)
|
|
{
|
|
opcode_start = src.GetPointer();
|
|
|
|
if (!src.size())
|
|
return finish_up();
|
|
|
|
const u8 cmd_byte = src.Read<u8>();
|
|
switch (cmd_byte)
|
|
{
|
|
case GX_NOP:
|
|
total_cycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
|
|
break;
|
|
|
|
case GX_UNKNOWN_RESET:
|
|
total_cycles += 6; // Datel software uses this command
|
|
DEBUG_LOG_FMT(VIDEO, "GX Reset?: {:08x}", cmd_byte);
|
|
break;
|
|
|
|
case GX_LOAD_CP_REG:
|
|
{
|
|
if (src.size() < 1 + 4)
|
|
return finish_up();
|
|
|
|
total_cycles += 12;
|
|
|
|
const u8 sub_cmd = src.Read<u8>();
|
|
const u32 value = src.Read<u32>();
|
|
LoadCPReg(sub_cmd, value, is_preprocess);
|
|
if constexpr (!is_preprocess)
|
|
INCSTAT(g_stats.this_frame.num_cp_loads);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_XF_REG:
|
|
{
|
|
if (src.size() < 4)
|
|
return finish_up();
|
|
|
|
const u32 cmd2 = src.Read<u32>();
|
|
const u32 transfer_size = ((cmd2 >> 16) & 15) + 1;
|
|
if (src.size() < transfer_size * sizeof(u32))
|
|
return finish_up();
|
|
|
|
total_cycles += 18 + 6 * transfer_size;
|
|
|
|
if constexpr (!is_preprocess)
|
|
{
|
|
const u32 xf_address = cmd2 & 0xFFFF;
|
|
LoadXFReg(transfer_size, xf_address, src);
|
|
|
|
INCSTAT(g_stats.this_frame.num_xf_loads);
|
|
}
|
|
src.Skip<u32>(transfer_size);
|
|
}
|
|
break;
|
|
|
|
case GX_LOAD_INDX_A: // Used for position matrices
|
|
case GX_LOAD_INDX_B: // Used for normal matrices
|
|
case GX_LOAD_INDX_C: // Used for postmatrices
|
|
case GX_LOAD_INDX_D: // Used for lights
|
|
{
|
|
if (src.size() < 4)
|
|
return finish_up();
|
|
|
|
total_cycles += 6;
|
|
|
|
// Map the command byte to its ref array.
|
|
// GX_LOAD_INDX_A (32) -> 0xC
|
|
// GX_LOAD_INDX_B (40) -> 0xD
|
|
// GX_LOAD_INDX_C (48) -> 0xE
|
|
// GX_LOAD_INDX_D (56) -> 0xF
|
|
const int ref_array = (cmd_byte / 8) + 8;
|
|
|
|
if constexpr (is_preprocess)
|
|
PreprocessIndexedXF(src.Read<u32>(), ref_array);
|
|
else
|
|
LoadIndexedXF(src.Read<u32>(), ref_array);
|
|
}
|
|
break;
|
|
|
|
case GX_CMD_CALL_DL:
|
|
{
|
|
if (src.size() < 8)
|
|
return finish_up();
|
|
|
|
const u32 address = src.Read<u32>();
|
|
const u32 count = src.Read<u32>();
|
|
|
|
if (in_display_list)
|
|
{
|
|
total_cycles += 6;
|
|
INFO_LOG_FMT(VIDEO, "recursive display list detected");
|
|
}
|
|
else
|
|
{
|
|
if constexpr (is_preprocess)
|
|
InterpretDisplayListPreprocess(address, count);
|
|
else
|
|
total_cycles += 6 + InterpretDisplayList(address, count);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after
|
|
// that
|
|
total_cycles += 6;
|
|
DEBUG_LOG_FMT(VIDEO, "GX 0x44: {:08x}", cmd_byte);
|
|
break;
|
|
|
|
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
|
total_cycles += 6;
|
|
DEBUG_LOG_FMT(VIDEO, "Invalidate (vertex cache?)");
|
|
break;
|
|
|
|
case GX_LOAD_BP_REG:
|
|
// In skipped_frame case: We have to let BP writes through because they set
|
|
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
|
{
|
|
if (src.size() < 4)
|
|
return finish_up();
|
|
|
|
total_cycles += 12;
|
|
|
|
const u32 bp_cmd = src.Read<u32>();
|
|
if constexpr (is_preprocess)
|
|
{
|
|
LoadBPRegPreprocess(bp_cmd, total_cycles);
|
|
}
|
|
else
|
|
{
|
|
LoadBPReg(bp_cmd, total_cycles);
|
|
INCSTAT(g_stats.this_frame.num_bp_loads);
|
|
}
|
|
}
|
|
break;
|
|
|
|
// draw primitives
|
|
default:
|
|
if ((cmd_byte & 0xC0) == 0x80)
|
|
{
|
|
// load vertices
|
|
if (src.size() < 2)
|
|
return finish_up();
|
|
|
|
const u16 num_vertices = src.Read<u16>();
|
|
const int bytes = VertexLoaderManager::RunVertices(
|
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, num_vertices, src, is_preprocess);
|
|
|
|
if (bytes < 0)
|
|
return finish_up();
|
|
|
|
src.Skip(bytes);
|
|
|
|
// 4 GPU ticks per vertex, 3 CPU ticks per GPU tick
|
|
total_cycles += num_vertices * 4 * 3 + 6;
|
|
}
|
|
else
|
|
{
|
|
if (!s_is_fifo_error_seen)
|
|
CommandProcessor::HandleUnknownOpcode(cmd_byte, opcode_start, is_preprocess);
|
|
ERROR_LOG_FMT(VIDEO, "FIFO: Unknown Opcode({:#04x} @ {}, preprocessing = {})", cmd_byte,
|
|
fmt::ptr(opcode_start), is_preprocess ? "yes" : "no");
|
|
s_is_fifo_error_seen = true;
|
|
total_cycles += 1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Display lists get added directly into the FIFO stream
|
|
if constexpr (!is_preprocess)
|
|
{
|
|
if (g_record_fifo_data && cmd_byte != GX_CMD_CALL_DL)
|
|
{
|
|
const u8* const opcode_end = src.GetPointer();
|
|
FifoRecorder::GetInstance().WriteGPCommand(opcode_start, u32(opcode_end - opcode_start));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
template u8* Run<true>(DataReader src, u32* cycles, bool in_display_list);
|
|
template u8* Run<false>(DataReader src, u32* cycles, bool in_display_list);
|
|
|
|
} // namespace OpcodeDecoder
|