mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
Merge pull request #1812 from phire/real_zfreeze
Add proper zfreeze support.
This commit is contained in:
commit
43036af944
@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
|
||||
static std::thread g_save_thread;
|
||||
|
||||
// Don't forget to increase this after doing changes on the savestate system
|
||||
static const u32 STATE_VERSION = 38;
|
||||
static const u32 STATE_VERSION = 39;
|
||||
|
||||
enum
|
||||
{
|
||||
|
@ -59,7 +59,7 @@ DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
|
||||
|
||||
void D3DVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
|
||||
{
|
||||
vertex_stride = _vtx_decl.stride;
|
||||
vtx_decl = _vtx_decl;
|
||||
memset(m_elems, 0, sizeof(m_elems));
|
||||
const AttributeFormat* format = &_vtx_decl.position;
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "VideoCommon/ImageWrite.h"
|
||||
#include "VideoCommon/OnScreenDisplay.h"
|
||||
#include "VideoCommon/PixelEngine.h"
|
||||
#include "VideoCommon/PixelShaderManager.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
@ -231,6 +232,7 @@ Renderer::Renderer(void *&window_handle)
|
||||
s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0;
|
||||
s_last_xfb_mode = g_ActiveConfig.bUseRealXFB;
|
||||
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
|
||||
PixelShaderManager::SetEfbScaleChanged();
|
||||
|
||||
SetupDeviceObjects();
|
||||
|
||||
@ -946,6 +948,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
|
||||
s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0;
|
||||
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
|
||||
|
||||
PixelShaderManager::SetEfbScaleChanged();
|
||||
|
||||
D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr);
|
||||
|
||||
delete g_framebuffer_manager;
|
||||
|
@ -58,7 +58,7 @@ static void SetPointer(u32 attrib, u32 stride, const AttributeFormat &format)
|
||||
void GLVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
|
||||
{
|
||||
this->vtx_decl = _vtx_decl;
|
||||
vertex_stride = vtx_decl.stride;
|
||||
u32 vertex_stride = _vtx_decl.stride;
|
||||
|
||||
// We will not allow vertex components causing uneven strides.
|
||||
if (vertex_stride & 3)
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "VideoCommon/ImageWrite.h"
|
||||
#include "VideoCommon/OnScreenDisplay.h"
|
||||
#include "VideoCommon/PixelEngine.h"
|
||||
#include "VideoCommon/PixelShaderManager.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
@ -618,6 +619,8 @@ Renderer::Renderer()
|
||||
s_last_efb_scale = g_ActiveConfig.iEFBScale;
|
||||
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
|
||||
|
||||
PixelShaderManager::SetEfbScaleChanged();
|
||||
|
||||
// Because of the fixed framebuffer size we need to disable the resolution
|
||||
// options while running
|
||||
g_Config.bRunning = true;
|
||||
@ -1681,6 +1684,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
|
||||
delete g_framebuffer_manager;
|
||||
g_framebuffer_manager = new FramebufferManager(s_target_width, s_target_height,
|
||||
s_MSAASamples);
|
||||
|
||||
PixelShaderManager::SetEfbScaleChanged();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,8 @@ static size_t s_index_offset;
|
||||
VertexManager::VertexManager()
|
||||
{
|
||||
CreateDeviceObjects();
|
||||
CpuVBuffer.resize(MAX_VBUFFER_SIZE);
|
||||
CpuIBuffer.resize(MAX_IBUFFER_SIZE);
|
||||
}
|
||||
|
||||
VertexManager::~VertexManager()
|
||||
@ -81,14 +83,25 @@ void VertexManager::PrepareDrawBuffers(u32 stride)
|
||||
|
||||
void VertexManager::ResetBuffer(u32 stride)
|
||||
{
|
||||
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first;
|
||||
s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE;
|
||||
s_baseVertex = buffer.second / stride;
|
||||
if (CullAll)
|
||||
{
|
||||
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = CpuVBuffer.data();
|
||||
s_pEndBufferPointer = s_pBaseBufferPointer + CpuVBuffer.size();
|
||||
|
||||
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
|
||||
IndexGenerator::Start((u16*)buffer.first);
|
||||
s_index_offset = buffer.second;
|
||||
IndexGenerator::Start((u16*)CpuIBuffer.data());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first;
|
||||
s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE;
|
||||
s_baseVertex = buffer.second / stride;
|
||||
|
||||
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
|
||||
IndexGenerator::Start((u16*)buffer.first);
|
||||
s_index_offset = buffer.second;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexManager::Draw(u32 stride)
|
||||
|
@ -13,8 +13,6 @@ namespace OGL
|
||||
{
|
||||
class GLVertexFormat : public NativeVertexFormat
|
||||
{
|
||||
PortableVertexDeclaration vtx_decl;
|
||||
|
||||
public:
|
||||
GLVertexFormat();
|
||||
~GLVertexFormat();
|
||||
@ -42,10 +40,15 @@ public:
|
||||
GLuint m_last_vao;
|
||||
protected:
|
||||
virtual void ResetBuffer(u32 stride) override;
|
||||
|
||||
private:
|
||||
void Draw(u32 stride);
|
||||
void vFlush(bool useDstAlpha) override;
|
||||
void PrepareDrawBuffers(u32 stride);
|
||||
|
||||
// Alternative buffers in CPU memory for primatives we are going to discard.
|
||||
std::vector<u8> CpuVBuffer;
|
||||
std::vector<u16> CpuIBuffer;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ struct PixelShaderConstants
|
||||
int4 fogcolor;
|
||||
int4 fogi;
|
||||
float4 fogf[2];
|
||||
float4 zslope;
|
||||
float4 efbscale;
|
||||
};
|
||||
|
||||
struct VertexShaderConstants
|
||||
|
@ -26,7 +26,11 @@ void GeometryShaderManager::Init()
|
||||
{
|
||||
memset(&constants, 0, sizeof(constants));
|
||||
|
||||
Dirty();
|
||||
// Init any intial constants which aren't zero when bpmem is zero.
|
||||
SetViewportChanged();
|
||||
SetProjectionChanged();
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void GeometryShaderManager::Shutdown()
|
||||
@ -35,12 +39,9 @@ void GeometryShaderManager::Shutdown()
|
||||
|
||||
void GeometryShaderManager::Dirty()
|
||||
{
|
||||
SetViewportChanged();
|
||||
SetProjectionChanged();
|
||||
SetLinePtWidthChanged();
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
SetTexCoordChanged(i);
|
||||
// This function is called after a savestate is loaded.
|
||||
// Any constants that can changed based on settings should be re-calculated
|
||||
s_projection_changed = true;
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
@ -110,9 +111,14 @@ void GeometryShaderManager::SetTexCoordChanged(u8 texmapid)
|
||||
|
||||
void GeometryShaderManager::DoState(PointerWrap &p)
|
||||
{
|
||||
p.Do(s_projection_changed);
|
||||
p.Do(s_viewport_changed);
|
||||
|
||||
p.Do(constants);
|
||||
|
||||
if (p.GetMode() == PointerWrap::MODE_READ)
|
||||
{
|
||||
// Reload current state from global GPU state
|
||||
// Fixup the current state from global GPU state
|
||||
// NOTE: This requires that all GPU memory has been loaded already.
|
||||
Dirty();
|
||||
}
|
||||
|
@ -109,7 +109,8 @@ public:
|
||||
virtual void Initialize(const PortableVertexDeclaration &vtx_decl) = 0;
|
||||
virtual void SetupVertexPointers() = 0;
|
||||
|
||||
u32 GetVertexStride() const { return vertex_stride; }
|
||||
u32 GetVertexStride() const { return vtx_decl.stride; }
|
||||
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
|
||||
|
||||
// TODO: move this under private:
|
||||
u32 m_components; // VB_HAS_X. Bitmask telling what vertex components are present.
|
||||
@ -118,5 +119,5 @@ protected:
|
||||
// Let subclasses construct.
|
||||
NativeVertexFormat() {}
|
||||
|
||||
u32 vertex_stride;
|
||||
PortableVertexDeclaration vtx_decl;
|
||||
};
|
||||
|
@ -144,6 +144,7 @@ template<class T> static inline void WriteTevRegular(T& out, const char* compone
|
||||
template<class T> static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
|
||||
template<class T> static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
|
||||
template<class T> static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data);
|
||||
template<class T> static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType);
|
||||
|
||||
template<class T>
|
||||
static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
|
||||
@ -228,6 +229,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
"\tint4 " I_FOGCOLOR";\n"
|
||||
"\tint4 " I_FOGI";\n"
|
||||
"\tfloat4 " I_FOGF"[2];\n"
|
||||
"\tfloat4 " I_ZSLOPE";\n"
|
||||
"\tfloat4 " I_EFBSCALE";\n"
|
||||
"};\n");
|
||||
|
||||
if (g_ActiveConfig.bEnablePixelLighting)
|
||||
@ -268,8 +271,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
GenerateVSOutputMembers<T>(out, ApiType);
|
||||
out.Write("};\n");
|
||||
|
||||
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
|
||||
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z);
|
||||
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest()
|
||||
&& (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
|
||||
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
|
||||
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
|
||||
&& !bpmem.genMode.zfreeze;
|
||||
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
|
||||
|
||||
if (forced_early_z)
|
||||
{
|
||||
@ -362,7 +369,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
out.Write("void main(\n");
|
||||
out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
|
||||
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
|
||||
per_pixel_depth ? "\n out float depth : SV_Depth," : "");
|
||||
(per_pixel_depth && bpmem.zmode.testenable) ? "\n out float depth : SV_Depth," : "");
|
||||
|
||||
out.Write(" in centroid float4 colors_0 : COLOR0,\n");
|
||||
out.Write(" in centroid float4 colors_1 : COLOR1\n");
|
||||
@ -538,10 +545,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
|
||||
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
|
||||
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
|
||||
uid_data->zfreeze = bpmem.genMode.zfreeze;
|
||||
|
||||
// Note: z-textures are not written to depth buffer if early depth test is used
|
||||
if (per_pixel_depth && bpmem.UseEarlyDepthTest())
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
{
|
||||
WritePerPixelDepth<T>(out, uid_data, ApiType);
|
||||
}
|
||||
|
||||
// Note: depth texture output is only written to depth buffer if late depth test is used
|
||||
// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
|
||||
@ -555,7 +565,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
||||
}
|
||||
|
||||
if (per_pixel_depth && bpmem.UseLateDepthTest())
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
{
|
||||
WritePerPixelDepth<T>(out, uid_data, ApiType);
|
||||
}
|
||||
|
||||
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
|
||||
{
|
||||
@ -1015,7 +1027,11 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_T
|
||||
// Tests seem to have proven that writing depth even when the alpha test fails is more
|
||||
// important that a reliable alpha test, so we just force the alpha test to always succeed.
|
||||
// At least this seems to be less buggy.
|
||||
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ;
|
||||
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest()
|
||||
&& bpmem.zmode.updateenable
|
||||
&& !g_ActiveConfig.backend_info.bSupportsEarlyZ
|
||||
&& !bpmem.genMode.zfreeze;
|
||||
|
||||
if (!uid_data->alpha_test_use_zcomploc_hack)
|
||||
{
|
||||
out.Write("\t\tdiscard;\n");
|
||||
@ -1095,6 +1111,29 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data)
|
||||
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType)
|
||||
{
|
||||
if (bpmem.genMode.zfreeze)
|
||||
{
|
||||
out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
|
||||
out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);
|
||||
|
||||
out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE".xy;\n");
|
||||
|
||||
// Opengl has reversed vertical screenspace coordiantes
|
||||
if (ApiType == API_OPENGL)
|
||||
out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT);
|
||||
|
||||
out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
|
||||
{
|
||||
GeneratePixelShader<PixelShaderUid>(object, dstAlphaMode, ApiType, components);
|
||||
|
@ -21,8 +21,10 @@
|
||||
#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
|
||||
#define C_FOGI (C_FOGCOLOR + 1) //28
|
||||
#define C_FOGF (C_FOGI + 1) //29
|
||||
#define C_ZSLOPE (C_FOGF + 2) //31
|
||||
#define C_EFBSCALE (C_ZSLOPE + 1) //32
|
||||
|
||||
#define C_PENVCONST_END (C_FOGF + 2)
|
||||
#define C_PENVCONST_END (C_EFBSCALE + 1)
|
||||
|
||||
// Different ways to achieve rendering with destination alpha
|
||||
enum DSTALPHA_MODE
|
||||
@ -63,6 +65,10 @@ struct pixel_shader_uid_data
|
||||
u32 early_ztest : 1;
|
||||
u32 bounding_box : 1;
|
||||
|
||||
// TODO: 31 bits of padding is a waste. Can we free up some bits elseware?
|
||||
u32 zfreeze : 1;
|
||||
u32 pad : 31;
|
||||
|
||||
u32 texMtxInfo_n_projection : 8; // 8x1 bit
|
||||
u32 tevindref_bi0 : 3;
|
||||
u32 tevindref_bc0 : 3;
|
||||
|
@ -15,41 +15,18 @@
|
||||
bool PixelShaderManager::s_bFogRangeAdjustChanged;
|
||||
bool PixelShaderManager::s_bViewPortChanged;
|
||||
|
||||
std::array<int4,4> PixelShaderManager::s_tev_color;
|
||||
std::array<int4,4> PixelShaderManager::s_tev_konst_color;
|
||||
|
||||
PixelShaderConstants PixelShaderManager::constants;
|
||||
bool PixelShaderManager::dirty;
|
||||
|
||||
void PixelShaderManager::Init()
|
||||
{
|
||||
memset(&constants, 0, sizeof(constants));
|
||||
memset(s_tev_color.data(), 0, sizeof(s_tev_color));
|
||||
memset(s_tev_konst_color.data(), 0, sizeof(s_tev_konst_color));
|
||||
|
||||
Dirty();
|
||||
}
|
||||
|
||||
void PixelShaderManager::Dirty()
|
||||
{
|
||||
// Init any intial constants which aren't zero when bpmem is zero.
|
||||
s_bFogRangeAdjustChanged = true;
|
||||
s_bViewPortChanged = true;
|
||||
s_bViewPortChanged = false;
|
||||
|
||||
for (unsigned index = 0; index < s_tev_color.size(); ++index)
|
||||
{
|
||||
for (int comp = 0; comp < 4; ++comp)
|
||||
{
|
||||
SetTevColor(index, comp, s_tev_color[index][comp]);
|
||||
SetTevKonstColor(index, comp, s_tev_konst_color[index][comp]);
|
||||
}
|
||||
}
|
||||
|
||||
SetAlpha();
|
||||
SetDestAlpha();
|
||||
SetZTextureBias();
|
||||
SetViewportChanged();
|
||||
SetIndTexScaleChanged(false);
|
||||
SetIndTexScaleChanged(true);
|
||||
SetEfbScaleChanged();
|
||||
SetIndMatrixChanged(0);
|
||||
SetIndMatrixChanged(1);
|
||||
SetIndMatrixChanged(2);
|
||||
@ -62,8 +39,20 @@ void PixelShaderManager::Dirty()
|
||||
SetTexCoordChanged(5);
|
||||
SetTexCoordChanged(6);
|
||||
SetTexCoordChanged(7);
|
||||
SetFogColorChanged();
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::Dirty()
|
||||
{
|
||||
// This function is called after a savestate is loaded.
|
||||
// Any constants that can changed based on settings should be re-calculated
|
||||
s_bFogRangeAdjustChanged = true;
|
||||
|
||||
SetEfbScaleChanged();
|
||||
SetFogParamChanged();
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::Shutdown()
|
||||
@ -117,7 +106,7 @@ void PixelShaderManager::SetConstants()
|
||||
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
|
||||
{
|
||||
auto& c = constants.colors[index];
|
||||
c[component] = s_tev_color[index][component] = value;
|
||||
c[component] = value;
|
||||
dirty = true;
|
||||
|
||||
PRIM_LOG("tev color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
|
||||
@ -126,7 +115,7 @@ void PixelShaderManager::SetTevColor(int index, int component, s32 value)
|
||||
void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value)
|
||||
{
|
||||
auto& c = constants.kcolors[index];
|
||||
c[component] = s_tev_konst_color[index][component] = value;
|
||||
c[component] = value;
|
||||
dirty = true;
|
||||
|
||||
PRIM_LOG("tev konst color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
|
||||
@ -168,6 +157,21 @@ void PixelShaderManager::SetViewportChanged()
|
||||
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetEfbScaleChanged()
|
||||
{
|
||||
constants.efbscale[0] = 1.0f / float(Renderer::EFBToScaledXf(1));
|
||||
constants.efbscale[1] = 1.0f / float(Renderer::EFBToScaledYf(1));
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0)
|
||||
{
|
||||
constants.zslope[0] = dfdx;
|
||||
constants.zslope[1] = dfdy;
|
||||
constants.zslope[2] = f0;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetIndTexScaleChanged(bool high)
|
||||
{
|
||||
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
|
||||
@ -278,12 +282,14 @@ void PixelShaderManager::SetFogRangeAdjustChanged()
|
||||
|
||||
void PixelShaderManager::DoState(PointerWrap &p)
|
||||
{
|
||||
p.DoArray(s_tev_color);
|
||||
p.DoArray(s_tev_konst_color);
|
||||
p.Do(s_bFogRangeAdjustChanged);
|
||||
p.Do(s_bViewPortChanged);
|
||||
|
||||
p.Do(constants);
|
||||
|
||||
if (p.GetMode() == PointerWrap::MODE_READ)
|
||||
{
|
||||
// Reload current state from global GPU state
|
||||
// Fixup the current state from global GPU state
|
||||
// NOTE: This requires that all GPU memory has been loaded already.
|
||||
Dirty();
|
||||
}
|
||||
|
@ -36,6 +36,8 @@ public:
|
||||
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
|
||||
static void SetZTextureBias();
|
||||
static void SetViewportChanged();
|
||||
static void SetEfbScaleChanged();
|
||||
static void SetZSlope(float dfdx, float dfdy, float f0);
|
||||
static void SetIndMatrixChanged(int matrixidx);
|
||||
static void SetTevKSelChanged(int id);
|
||||
static void SetZTextureTypeChanged();
|
||||
@ -50,9 +52,4 @@ public:
|
||||
|
||||
static bool s_bFogRangeAdjustChanged;
|
||||
static bool s_bViewPortChanged;
|
||||
|
||||
// These colors aren't available from global BP state,
|
||||
// hence we keep a copy of them around.
|
||||
static std::array<int4,4> s_tev_color;
|
||||
static std::array<int4,4> s_tev_konst_color;
|
||||
};
|
||||
|
@ -291,6 +291,8 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b
|
||||
#define I_FOGCOLOR "cfogcolor"
|
||||
#define I_FOGI "cfogi"
|
||||
#define I_FOGF "cfogf"
|
||||
#define I_ZSLOPE "czslope"
|
||||
#define I_EFBSCALE "cefbscale"
|
||||
|
||||
#define I_POSNORMALMATRIX "cpnmtx"
|
||||
#define I_PROJECTION "cproj"
|
||||
|
@ -149,19 +149,20 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
|
||||
if ((int)src.size() < size)
|
||||
return -1;
|
||||
|
||||
if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
|
||||
{
|
||||
// if cull mode is CULL_ALL, ignore triangles and quads
|
||||
if (skip_drawing)
|
||||
return size;
|
||||
}
|
||||
|
||||
// If the native vertex format changed, force a flush.
|
||||
if (loader->m_native_vertex_format != s_current_vtx_fmt)
|
||||
VertexManager::Flush();
|
||||
s_current_vtx_fmt = loader->m_native_vertex_format;
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope.
|
||||
bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);
|
||||
|
||||
DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count,
|
||||
loader->m_native_vtx_decl.stride);
|
||||
loader->m_native_vtx_decl.stride, cullall);
|
||||
|
||||
count = loader->RunVertices(primitive, count, src, dst);
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "VideoCommon/RenderBase.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/TextureCacheBase.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
@ -25,7 +26,10 @@ u8 *VertexManager::s_pEndBufferPointer;
|
||||
|
||||
PrimitiveType VertexManager::current_primitive_type;
|
||||
|
||||
Slope VertexManager::ZSlope;
|
||||
|
||||
bool VertexManager::IsFlushed;
|
||||
bool VertexManager::CullAll;
|
||||
|
||||
static const PrimitiveType primitive_from_gx[8] = {
|
||||
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
|
||||
@ -41,6 +45,7 @@ static const PrimitiveType primitive_from_gx[8] = {
|
||||
VertexManager::VertexManager()
|
||||
{
|
||||
IsFlushed = true;
|
||||
CullAll = false;
|
||||
}
|
||||
|
||||
VertexManager::~VertexManager()
|
||||
@ -52,7 +57,7 @@ u32 VertexManager::GetRemainingSize()
|
||||
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
|
||||
}
|
||||
|
||||
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
|
||||
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall)
|
||||
{
|
||||
// The SSE vertex loader can write up to 4 bytes past the end
|
||||
u32 const needed_vertex_bytes = count * stride + 4;
|
||||
@ -78,6 +83,8 @@ DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32
|
||||
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
|
||||
}
|
||||
|
||||
CullAll = cullall;
|
||||
|
||||
// need to alloc new buffer
|
||||
if (IsFlushed)
|
||||
{
|
||||
@ -189,45 +196,66 @@ void VertexManager::Flush()
|
||||
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
|
||||
#endif
|
||||
|
||||
BitSet32 usedtextures;
|
||||
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
||||
if (bpmem.tevorders[i / 2].getEnable(i & 1))
|
||||
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
|
||||
|
||||
if (bpmem.genMode.numindstages > 0)
|
||||
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
||||
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
|
||||
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
|
||||
|
||||
for (unsigned int i : usedtextures)
|
||||
// If the primitave is marked CullAll. All we need to do is update the vertex constants and calculate the zfreeze refrence slope
|
||||
if (!CullAll)
|
||||
{
|
||||
g_renderer->SetSamplerState(i & 3, i >> 2);
|
||||
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
|
||||
BitSet32 usedtextures;
|
||||
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
||||
if (bpmem.tevorders[i / 2].getEnable(i & 1))
|
||||
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
|
||||
|
||||
if (tentry)
|
||||
if (bpmem.genMode.numindstages > 0)
|
||||
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
||||
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
|
||||
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
|
||||
|
||||
for (unsigned int i : usedtextures)
|
||||
{
|
||||
// 0s are probably for no manual wrapping needed.
|
||||
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
|
||||
g_renderer->SetSamplerState(i & 3, i >> 2);
|
||||
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
|
||||
|
||||
if (tentry)
|
||||
{
|
||||
// 0s are probably for no manual wrapping needed.
|
||||
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
|
||||
}
|
||||
else
|
||||
ERROR_LOG(VIDEO, "error loading texture");
|
||||
}
|
||||
else
|
||||
ERROR_LOG(VIDEO, "error loading texture");
|
||||
}
|
||||
|
||||
// set global constants
|
||||
// set global vertex constants
|
||||
VertexShaderManager::SetConstants();
|
||||
GeometryShaderManager::SetConstants();
|
||||
PixelShaderManager::SetConstants();
|
||||
|
||||
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
|
||||
bpmem.dstalpha.enable &&
|
||||
bpmem.blendmode.alphaupdate &&
|
||||
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
|
||||
// Calculate ZSlope for zfreeze
|
||||
if (!bpmem.genMode.zfreeze)
|
||||
{
|
||||
// Must be done after VertexShaderManager::SetConstants()
|
||||
CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
|
||||
}
|
||||
else if (ZSlope.dirty && !CullAll) // or apply any dirty ZSlopes
|
||||
{
|
||||
PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0);
|
||||
ZSlope.dirty = false;
|
||||
}
|
||||
|
||||
if (PerfQueryBase::ShouldEmulate())
|
||||
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
g_vertex_manager->vFlush(useDstAlpha);
|
||||
if (PerfQueryBase::ShouldEmulate())
|
||||
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
if (!CullAll)
|
||||
{
|
||||
// set the rest of the global constants
|
||||
GeometryShaderManager::SetConstants();
|
||||
PixelShaderManager::SetConstants();
|
||||
|
||||
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
|
||||
bpmem.dstalpha.enable &&
|
||||
bpmem.blendmode.alphaupdate &&
|
||||
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
|
||||
|
||||
if (PerfQueryBase::ShouldEmulate())
|
||||
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
g_vertex_manager->vFlush(useDstAlpha);
|
||||
if (PerfQueryBase::ShouldEmulate())
|
||||
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
}
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
||||
|
||||
@ -235,9 +263,69 @@ void VertexManager::Flush()
|
||||
ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
|
||||
|
||||
IsFlushed = true;
|
||||
CullAll = false;
|
||||
}
|
||||
|
||||
void VertexManager::DoState(PointerWrap& p)
|
||||
{
|
||||
p.Do(ZSlope);
|
||||
g_vertex_manager->vDoState(p);
|
||||
}
|
||||
|
||||
void VertexManager::CalculateZSlope(NativeVertexFormat *format)
|
||||
{
|
||||
float vtx[9];
|
||||
float out[12];
|
||||
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
|
||||
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
|
||||
|
||||
// Global matrix ID.
|
||||
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
|
||||
size_t posOff = vert_decl.position.offset;
|
||||
size_t mtxOff = vert_decl.posmtx.offset;
|
||||
|
||||
// Lookup vertices of the last rendered triangle and software-transform them
|
||||
// This allows us to determine the depth slope, which will be used if z--freeze
|
||||
// is enabled in the following flush.
|
||||
for (unsigned int i = 0; i < 3; ++i)
|
||||
{
|
||||
u8* vtx_ptr = s_pCurBufferPointer - vert_decl.stride * (3 - i);
|
||||
vtx[0 + i * 3] = ((float*)(vtx_ptr + posOff))[0];
|
||||
vtx[1 + i * 3] = ((float*)(vtx_ptr + posOff))[1];
|
||||
vtx[2 + i * 3] = ((float*)(vtx_ptr + posOff))[2];
|
||||
|
||||
// If this vertex format has per-vertex position matrix IDs, look it up.
|
||||
if(vert_decl.posmtx.enable)
|
||||
mtxIdx = *((u32*)(vtx_ptr + mtxOff));
|
||||
|
||||
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4], mtxIdx);
|
||||
|
||||
// Transform to Screenspace
|
||||
float inv_w = 1.0f / out[3 + i * 4];
|
||||
|
||||
out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
|
||||
out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
|
||||
out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
||||
}
|
||||
|
||||
float dx31 = out[8] - out[0];
|
||||
float dx12 = out[0] - out[4];
|
||||
float dy12 = out[1] - out[5];
|
||||
float dy31 = out[9] - out[1];
|
||||
|
||||
float DF31 = out[10] - out[2];
|
||||
float DF21 = out[6] - out[2];
|
||||
float a = DF31 * -dy12 - DF21 * dy31;
|
||||
float b = dx31 * DF21 + dx12 * DF31;
|
||||
float c = -dx12 * dy31 - dx31 * -dy12;
|
||||
|
||||
// Sometimes we process de-generate triangles. Stop any divide by zeros
|
||||
if (c == 0)
|
||||
return;
|
||||
|
||||
ZSlope.dfdx = -a / c;
|
||||
ZSlope.dfdy = -b / c;
|
||||
ZSlope.f0 = out[2] - (out[0] * ZSlope.dfdx + out[1] * ZSlope.dfdy);
|
||||
ZSlope.dirty = true;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
|
||||
class NativeVertexFormat;
|
||||
class PointerWrap;
|
||||
@ -14,6 +15,14 @@ enum PrimitiveType {
|
||||
PRIMITIVE_TRIANGLES,
|
||||
};
|
||||
|
||||
struct Slope
|
||||
{
|
||||
float dfdx;
|
||||
float dfdy;
|
||||
float f0;
|
||||
bool dirty;
|
||||
};
|
||||
|
||||
class VertexManager
|
||||
{
|
||||
private:
|
||||
@ -32,7 +41,7 @@ public:
|
||||
// needs to be virtual for DX11's dtor
|
||||
virtual ~VertexManager();
|
||||
|
||||
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride);
|
||||
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
|
||||
static void FlushData(u32 count, u32 stride);
|
||||
|
||||
static void Flush();
|
||||
@ -55,6 +64,11 @@ protected:
|
||||
static u32 GetRemainingSize();
|
||||
static u32 GetRemainingIndices(int primitive);
|
||||
|
||||
static Slope ZSlope;
|
||||
static void CalculateZSlope(NativeVertexFormat *format);
|
||||
|
||||
static bool CullAll;
|
||||
|
||||
private:
|
||||
static bool IsFlushed;
|
||||
|
||||
|
@ -167,7 +167,21 @@ static void ViewportCorrectionMatrix(Matrix44& result)
|
||||
|
||||
void VertexShaderManager::Init()
|
||||
{
|
||||
Dirty();
|
||||
// Initialize state tracking variables
|
||||
nTransformMatricesChanged[0] = -1;
|
||||
nTransformMatricesChanged[1] = -1;
|
||||
nNormalMatricesChanged[0] = -1;
|
||||
nNormalMatricesChanged[1] = -1;
|
||||
nPostTransformMatricesChanged[0] = -1;
|
||||
nPostTransformMatricesChanged[1] = -1;
|
||||
nLightsChanged[0] = -1;
|
||||
nLightsChanged[1] = -1;
|
||||
nMaterialsChanged = BitSet32(0);
|
||||
bTexMatricesChanged[0] = false;
|
||||
bTexMatricesChanged[1] = false;
|
||||
bPosNormalMatrixChanged = false;
|
||||
bProjectionChanged = true;
|
||||
bViewportChanged = false;
|
||||
|
||||
memset(&xfmem, 0, sizeof(xfmem));
|
||||
memset(&constants, 0 , sizeof(constants));
|
||||
@ -178,6 +192,8 @@ void VertexShaderManager::Init()
|
||||
memset(g_fProjectionMatrix, 0, sizeof(g_fProjectionMatrix));
|
||||
for (int i = 0; i < 4; ++i)
|
||||
g_fProjectionMatrix[i*5] = 1.0f;
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::Shutdown()
|
||||
@ -186,26 +202,10 @@ void VertexShaderManager::Shutdown()
|
||||
|
||||
void VertexShaderManager::Dirty()
|
||||
{
|
||||
nTransformMatricesChanged[0] = 0;
|
||||
nTransformMatricesChanged[1] = 256;
|
||||
|
||||
nNormalMatricesChanged[0] = 0;
|
||||
nNormalMatricesChanged[1] = 96;
|
||||
|
||||
nPostTransformMatricesChanged[0] = 0;
|
||||
nPostTransformMatricesChanged[1] = 256;
|
||||
|
||||
nLightsChanged[0] = 0;
|
||||
nLightsChanged[1] = 0x80;
|
||||
|
||||
bPosNormalMatrixChanged = true;
|
||||
bTexMatricesChanged[0] = true;
|
||||
bTexMatricesChanged[1] = true;
|
||||
|
||||
// This function is called after a savestate is loaded.
|
||||
// Any constants that can changed based on settings should be re-calculated
|
||||
bProjectionChanged = true;
|
||||
|
||||
nMaterialsChanged = BitSet32::AllTrue(4);
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
@ -690,6 +690,25 @@ void VertexShaderManager::ResetView()
|
||||
bProjectionChanged = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::TransformToClipSpace(const float* data, float *out, u32 MtxIdx)
|
||||
{
|
||||
const float *world_matrix = (const float *)xfmem.posMatrices + (MtxIdx & 0x3f) * 4;
|
||||
// We use the projection matrix calculated by vertexShaderManager, because it
|
||||
// includes any free look transformations.
|
||||
// Make sure VertexManager::SetConstants() has been called first.
|
||||
const float *proj_matrix = &g_fProjectionMatrix[0];
|
||||
|
||||
float t[3];
|
||||
t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3];
|
||||
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
|
||||
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
|
||||
|
||||
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
|
||||
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
|
||||
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
|
||||
out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
|
||||
}
|
||||
|
||||
void VertexShaderManager::DoState(PointerWrap &p)
|
||||
{
|
||||
p.Do(g_fProjectionMatrix);
|
||||
@ -698,8 +717,19 @@ void VertexShaderManager::DoState(PointerWrap &p)
|
||||
p.Do(s_viewInvRotationMatrix);
|
||||
p.Do(s_fViewTranslationVector);
|
||||
p.Do(s_fViewRotation);
|
||||
|
||||
p.Do(nTransformMatricesChanged);
|
||||
p.Do(nNormalMatricesChanged);
|
||||
p.Do(nPostTransformMatricesChanged);
|
||||
p.Do(nLightsChanged);
|
||||
|
||||
p.Do(nMaterialsChanged);
|
||||
p.Do(bTexMatricesChanged);
|
||||
p.Do(bPosNormalMatrixChanged);
|
||||
p.Do(bProjectionChanged);
|
||||
p.Do(bViewportChanged);
|
||||
|
||||
p.Do(constants);
|
||||
p.Do(dirty);
|
||||
|
||||
if (p.GetMode() == PointerWrap::MODE_READ)
|
||||
{
|
||||
|
@ -34,6 +34,12 @@ public:
|
||||
static void RotateView(float x, float y);
|
||||
static void ResetView();
|
||||
|
||||
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
|
||||
// out: 4 floats which will be initialized with the corresponding clip space coordinates
|
||||
// NOTE: g_fProjectionMatrix must be up to date when this is called
|
||||
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
|
||||
static void TransformToClipSpace(const float* data, float *out, u32 mtxIdx);
|
||||
|
||||
static VertexShaderConstants constants;
|
||||
static bool dirty;
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user