2009-07-28 21:32:10 +00:00
|
|
|
// Copyright (C) 2003 Dolphin Project.
|
2009-02-23 06:15:48 +00:00
|
|
|
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
#include "Common.h"
|
|
|
|
#include "FileUtil.h"
|
|
|
|
#include "LinearDiskCache.h"
|
|
|
|
|
|
|
|
#include "Globals.h"
|
2009-02-23 06:15:48 +00:00
|
|
|
#include "D3DBase.h"
|
2009-02-28 22:10:38 +00:00
|
|
|
#include "D3DShader.h"
|
2009-02-23 06:15:48 +00:00
|
|
|
#include "Statistics.h"
|
|
|
|
#include "Utils.h"
|
2009-09-13 09:23:30 +00:00
|
|
|
#include "VideoConfig.h"
|
2009-02-28 22:10:38 +00:00
|
|
|
#include "PixelShaderGen.h"
|
|
|
|
#include "PixelShaderManager.h"
|
2009-02-23 06:15:48 +00:00
|
|
|
#include "PixelShaderCache.h"
|
|
|
|
#include "VertexLoader.h"
|
2009-06-22 09:31:30 +00:00
|
|
|
#include "BPMemory.h"
|
2009-02-23 06:15:48 +00:00
|
|
|
#include "XFMemory.h"
|
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
|
|
|
#include "ImageWrite.h"
|
2009-02-23 06:15:48 +00:00
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
#include "Debugger/Debugger.h"
|
2009-09-02 06:33:41 +00:00
|
|
|
|
2009-02-28 22:10:38 +00:00
|
|
|
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
|
2009-09-01 19:48:45 +00:00
|
|
|
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
|
2010-01-17 17:44:09 +00:00
|
|
|
|
|
|
|
LinearDiskCache g_ps_disk_cache;
|
|
|
|
|
2009-09-10 03:36:32 +00:00
|
|
|
static float lastPSconstants[C_COLORMATRIX+16][4];
|
2009-02-23 06:15:48 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[4];
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[4];
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[4];
|
2009-11-22 02:37:00 +00:00
|
|
|
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
|
2009-11-10 12:45:03 +00:00
|
|
|
|
2009-11-08 20:35:11 +00:00
|
|
|
|
2009-11-10 12:45:03 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
|
2009-11-08 20:35:11 +00:00
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
return s_ColorMatrixProgram[SSAAMode];
|
2009-11-08 20:35:11 +00:00
|
|
|
}
|
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
|
2009-11-22 02:37:00 +00:00
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
return s_DepthMatrixProgram[SSAAMode];
|
2009-11-22 02:37:00 +00:00
|
|
|
}
|
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
|
2010-02-03 03:52:50 +00:00
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
return s_ColorCopyProgram[SSAAMode];
|
2010-02-03 03:52:50 +00:00
|
|
|
}
|
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
|
2010-02-03 03:52:50 +00:00
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
return s_ClearProgram;
|
2010-02-03 03:52:50 +00:00
|
|
|
}
|
|
|
|
|
2009-02-28 22:10:38 +00:00
|
|
|
void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
2009-02-23 06:15:48 +00:00
|
|
|
{
|
2010-01-17 17:44:09 +00:00
|
|
|
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
|
2010-05-17 22:17:46 +00:00
|
|
|
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4)
|
2009-09-09 05:22:16 +00:00
|
|
|
{
|
2009-09-10 03:36:32 +00:00
|
|
|
lastPSconstants[const_number][0] = f1;
|
|
|
|
lastPSconstants[const_number][1] = f2;
|
|
|
|
lastPSconstants[const_number][2] = f3;
|
|
|
|
lastPSconstants[const_number][3] = f4;
|
2010-05-17 22:17:46 +00:00
|
|
|
D3D::dev->SetPixelShaderConstantF(const_number, lastPSconstants[const_number], 1);
|
|
|
|
|
|
|
|
}
|
2009-02-23 06:15:48 +00:00
|
|
|
}
|
|
|
|
|
2009-02-28 22:10:38 +00:00
|
|
|
void SetPSConstant4fv(int const_number, const float *f)
|
|
|
|
{
|
2010-05-17 22:17:46 +00:00
|
|
|
if (memcmp(&lastPSconstants[const_number], f, sizeof(float) * 4)) {
|
|
|
|
memcpy(&lastPSconstants[const_number], f, sizeof(float) * 4);
|
2009-09-09 05:22:16 +00:00
|
|
|
D3D::dev->SetPixelShaderConstantF(const_number, f, 1);
|
2010-05-17 22:17:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetMultiPSConstant4fv(int const_number, int count, const float *f)
|
|
|
|
{
|
|
|
|
if (memcmp(&lastPSconstants[const_number], f, count * sizeof(float) * 4)) {
|
|
|
|
memcpy(&lastPSconstants[const_number], f, count * sizeof(float) * 4);
|
|
|
|
D3D::dev->SetPixelShaderConstantF(const_number, f, count);
|
2009-09-09 05:22:16 +00:00
|
|
|
}
|
2009-02-28 22:10:38 +00:00
|
|
|
}
|
2009-02-23 06:15:48 +00:00
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
class PixelShaderCacheInserter : public LinearDiskCacheReader {
|
|
|
|
public:
|
|
|
|
void Read(const u8 *key, int key_size, const u8 *value, int value_size)
|
|
|
|
{
|
|
|
|
PIXELSHADERUID uid;
|
|
|
|
if (key_size != sizeof(uid)) {
|
|
|
|
ERROR_LOG(VIDEO, "Wrong key size in pixel shader cache");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
memcpy(&uid, key, key_size);
|
|
|
|
PixelShaderCache::InsertByteCode(uid, value, value_size, false);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2009-02-28 22:10:38 +00:00
|
|
|
void PixelShaderCache::Init()
|
2009-02-23 06:15:48 +00:00
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
//program used for clear screen
|
|
|
|
char pprog[3072];
|
2010-01-17 17:44:09 +00:00
|
|
|
sprintf(pprog, "void main(\n"
|
2009-11-08 20:35:11 +00:00
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-03 22:19:00 +00:00
|
|
|
" in float4 incol0 : COLOR0){\n"
|
2009-11-23 14:08:08 +00:00
|
|
|
"ocol0 = incol0;\n"
|
|
|
|
"}\n");
|
2010-01-17 17:44:09 +00:00
|
|
|
s_ClearProgram = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2009-11-23 14:08:08 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
//Used for Copy/resolve the color buffer
|
|
|
|
//1 Sample
|
2010-01-17 17:44:09 +00:00
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
2009-11-08 20:35:11 +00:00
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
"in float2 uv0 : TEXCOORD0){\n"
|
|
|
|
"ocol0 = tex2D(samp0,uv0);\n"
|
2009-11-08 20:35:11 +00:00
|
|
|
"}\n");
|
2010-02-08 23:23:04 +00:00
|
|
|
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2009-11-23 14:08:08 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
//2 samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2){\n"
|
|
|
|
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy))*0.5f;\n"
|
|
|
|
"}\n");
|
|
|
|
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//4 Samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4){\n"
|
|
|
|
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n"
|
|
|
|
"}\n");
|
|
|
|
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//9 Samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4){\n"
|
|
|
|
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n"
|
|
|
|
"}\n");
|
|
|
|
s_ColorCopyProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//Color conversion Programs
|
|
|
|
//1 sample
|
2010-01-17 17:44:09 +00:00
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
2009-11-23 14:08:08 +00:00
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
2009-11-22 02:37:00 +00:00
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
" in float2 uv0 : TEXCOORD0){\n"
|
|
|
|
"float4 texcol = tex2D(samp0,uv0);\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
2009-11-23 14:08:08 +00:00
|
|
|
"}\n",C_COLORMATRIX);
|
2010-02-08 23:23:04 +00:00
|
|
|
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//2 samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
|
|
|
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//4 samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
|
|
|
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//9 samples
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
|
|
|
s_ColorMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2009-11-22 02:37:00 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
//Depth copy programs
|
|
|
|
//1 sample
|
2010-01-17 17:44:09 +00:00
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
2009-11-10 12:45:03 +00:00
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
" in float2 uv0 : TEXCOORD0){\n"
|
|
|
|
"float4 texcol = tex2D(samp0,uv0);\n"
|
2009-11-22 02:37:00 +00:00
|
|
|
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
|
|
|
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
2009-11-10 12:45:03 +00:00
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
2010-02-08 23:23:04 +00:00
|
|
|
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
|
|
|
|
|
|
|
//2 sample
|
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w)))) * 0.5f;\n"
|
|
|
|
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
|
|
|
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
|
|
|
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2010-02-03 03:52:50 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
//4 sample
|
2010-02-04 22:25:09 +00:00
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
2010-02-03 03:52:50 +00:00
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
|
|
|
|
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
|
|
|
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
|
|
|
"}\n",C_COLORMATRIX);
|
|
|
|
s_DepthMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2010-02-03 03:52:50 +00:00
|
|
|
|
2010-02-08 23:23:04 +00:00
|
|
|
//9 sample
|
2010-02-04 22:25:09 +00:00
|
|
|
sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
|
2010-02-03 03:52:50 +00:00
|
|
|
"uniform float4 cColMatrix[5] : register(c%d);\n"
|
|
|
|
"void main(\n"
|
|
|
|
"out float4 ocol0 : COLOR0,\n"
|
2010-02-08 23:23:04 +00:00
|
|
|
"in float4 uv0 : TEXCOORD0,\n"
|
|
|
|
"in float4 uv1 : TEXCOORD1,\n"
|
|
|
|
"in float4 uv2 : TEXCOORD2,\n"
|
|
|
|
"in float4 uv3 : TEXCOORD3,\n"
|
|
|
|
"in float4 uv4 : TEXCOORD4,\n"
|
|
|
|
"in float4 uv5 : TEXCOORD5){\n"
|
|
|
|
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
|
|
|
|
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
|
|
|
|
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
|
|
|
|
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
|
2010-02-03 03:52:50 +00:00
|
|
|
"}\n",C_COLORMATRIX);
|
2010-02-08 23:23:04 +00:00
|
|
|
s_DepthMatrixProgram[3] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
|
2010-02-03 03:52:50 +00:00
|
|
|
|
2009-09-13 17:46:33 +00:00
|
|
|
Clear();
|
2010-01-17 17:44:09 +00:00
|
|
|
|
2010-02-02 21:56:29 +00:00
|
|
|
if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
|
|
|
|
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));
|
2010-01-17 17:44:09 +00:00
|
|
|
|
|
|
|
char cache_filename[MAX_PATH];
|
2010-02-25 06:12:35 +00:00
|
|
|
sprintf(cache_filename, "%s%s-ps.cache", File::GetUserPath(D_SHADERCACHE_IDX), globals->unique_id);
|
2010-01-17 17:44:09 +00:00
|
|
|
PixelShaderCacheInserter inserter;
|
|
|
|
int read_items = g_ps_disk_cache.OpenAndRead(cache_filename, &inserter);
|
2009-02-23 06:15:48 +00:00
|
|
|
}
|
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
// ONLY to be used during shutdown.
|
2009-09-13 17:46:33 +00:00
|
|
|
void PixelShaderCache::Clear()
|
2009-02-23 06:15:48 +00:00
|
|
|
{
|
2009-02-28 22:10:38 +00:00
|
|
|
PSCache::iterator iter = PixelShaders.begin();
|
|
|
|
for (; iter != PixelShaders.end(); iter++)
|
2009-02-23 06:15:48 +00:00
|
|
|
iter->second.Destroy();
|
2009-02-28 22:10:38 +00:00
|
|
|
PixelShaders.clear();
|
2009-09-13 17:46:33 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < (C_COLORMATRIX + 16) * 4; i++)
|
2010-01-17 17:44:09 +00:00
|
|
|
lastPSconstants[i / 4][i % 4] = -100000000.0f;
|
2009-09-13 17:46:33 +00:00
|
|
|
memset(&last_pixel_shader_uid, 0xFF, sizeof(last_pixel_shader_uid));
|
|
|
|
}
|
|
|
|
|
|
|
|
void PixelShaderCache::Shutdown()
|
|
|
|
{
|
2010-02-08 23:23:04 +00:00
|
|
|
for(int i = 0;i<4;i++)
|
|
|
|
{
|
|
|
|
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
|
|
|
|
s_ColorMatrixProgram[i] = NULL;
|
|
|
|
if (s_ColorCopyProgram[i]) s_ColorCopyProgram[i]->Release();
|
|
|
|
s_ColorCopyProgram[i] = NULL;
|
|
|
|
if (s_DepthMatrixProgram[i]) s_DepthMatrixProgram[i]->Release();
|
|
|
|
s_DepthMatrixProgram[i] = NULL;
|
|
|
|
}
|
2010-01-17 17:44:09 +00:00
|
|
|
if (s_ClearProgram) s_ClearProgram->Release();
|
|
|
|
s_ClearProgram = NULL;
|
2010-02-08 23:23:04 +00:00
|
|
|
|
2009-09-13 17:46:33 +00:00
|
|
|
Clear();
|
2010-01-17 17:44:09 +00:00
|
|
|
g_ps_disk_cache.Sync();
|
|
|
|
g_ps_disk_cache.Close();
|
2009-02-23 06:15:48 +00:00
|
|
|
}
|
|
|
|
|
2009-09-02 21:19:35 +00:00
|
|
|
bool PixelShaderCache::SetShader(bool dstAlpha)
|
2009-02-23 06:15:48 +00:00
|
|
|
{
|
2009-02-28 22:10:38 +00:00
|
|
|
PIXELSHADERUID uid;
|
2010-01-17 17:44:09 +00:00
|
|
|
GetPixelShaderId(&uid, PixelShaderManager::GetTextureMask(), dstAlpha);
|
|
|
|
|
|
|
|
// Is the shader already set?
|
2009-09-16 05:31:19 +00:00
|
|
|
if (uid == last_pixel_shader_uid && PixelShaders[uid].frameCount == frameCount)
|
2009-09-09 05:22:16 +00:00
|
|
|
{
|
2009-09-13 17:46:33 +00:00
|
|
|
PSCache::const_iterator iter = PixelShaders.find(uid);
|
|
|
|
if (iter != PixelShaders.end() && iter->second.shader)
|
2010-01-17 17:44:09 +00:00
|
|
|
return true; // Sure, we're done.
|
2009-09-09 05:22:16 +00:00
|
|
|
else
|
2010-01-17 17:44:09 +00:00
|
|
|
return false; // ?? something is wrong.
|
2009-09-09 05:22:16 +00:00
|
|
|
}
|
2009-02-23 06:15:48 +00:00
|
|
|
|
2009-09-10 03:36:32 +00:00
|
|
|
memcpy(&last_pixel_shader_uid, &uid, sizeof(PIXELSHADERUID));
|
2009-09-09 05:22:16 +00:00
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
// Is the shader already in the cache?
|
2009-02-23 06:15:48 +00:00
|
|
|
PSCache::iterator iter;
|
2009-02-28 22:10:38 +00:00
|
|
|
iter = PixelShaders.find(uid);
|
|
|
|
if (iter != PixelShaders.end())
|
2009-02-23 06:15:48 +00:00
|
|
|
{
|
|
|
|
iter->second.frameCount = frameCount;
|
2009-09-01 19:48:45 +00:00
|
|
|
const PSCacheEntry &entry = iter->second;
|
|
|
|
last_entry = &entry;
|
2009-09-09 05:22:16 +00:00
|
|
|
|
2009-09-18 02:03:56 +00:00
|
|
|
DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
|
2009-09-03 02:21:14 +00:00
|
|
|
|
|
|
|
if (entry.shader)
|
|
|
|
{
|
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
|
|
|
D3D::SetPixelShader(entry.shader);
|
2009-09-03 02:21:14 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return false;
|
2009-02-23 06:15:48 +00:00
|
|
|
}
|
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
// OK, need to generate and compile it.
|
|
|
|
const char *code = GeneratePixelShaderCode(PixelShaderManager::GetTextureMask(), dstAlpha, 2);
|
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
|
|
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
|
|
|
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
|
|
|
|
static int counter = 0;
|
|
|
|
char szTemp[MAX_PATH];
|
2010-02-02 21:56:29 +00:00
|
|
|
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX), counter++);
|
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
|
|
|
|
|
|
|
SaveData(szTemp, code);
|
|
|
|
}
|
|
|
|
#endif
|
2010-01-17 17:44:09 +00:00
|
|
|
|
|
|
|
u8 *bytecode = 0;
|
|
|
|
int bytecodelen = 0;
|
|
|
|
if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) {
|
|
|
|
if (g_ActiveConfig.bShowShaderErrors)
|
|
|
|
{
|
|
|
|
PanicAlert("Failed to compile Pixel Shader:\n\n%s", code);
|
2010-05-19 03:15:36 +00:00
|
|
|
static int counter = 0;
|
|
|
|
char szTemp[MAX_PATH];
|
|
|
|
sprintf(szTemp, "%sBADps_%04i.txt", File::GetUserPath(D_DUMP_IDX), counter++);
|
|
|
|
SaveData(szTemp, code);
|
2010-01-17 17:44:09 +00:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Here we have the UID and the byte code. Insert it into the disk cache.
|
|
|
|
g_ps_disk_cache.Append((u8 *)&uid, sizeof(uid), bytecode, bytecodelen);
|
|
|
|
g_ps_disk_cache.Sync();
|
|
|
|
|
|
|
|
// And insert it into the shader cache.
|
|
|
|
bool result = InsertByteCode(uid, bytecode, bytecodelen, true);
|
|
|
|
delete [] bytecode;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
|
|
|
|
LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
|
2009-09-02 04:10:40 +00:00
|
|
|
|
2009-09-03 02:21:14 +00:00
|
|
|
// Make an entry in the table
|
|
|
|
PSCacheEntry newentry;
|
|
|
|
newentry.shader = shader;
|
|
|
|
newentry.frameCount = frameCount;
|
|
|
|
PixelShaders[uid] = newentry;
|
|
|
|
last_entry = &PixelShaders[uid];
|
2009-02-28 22:10:38 +00:00
|
|
|
|
2010-01-17 17:44:09 +00:00
|
|
|
if (!shader) {
|
|
|
|
// INCSTAT(stats.numPixelShadersFailed);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-09-03 02:21:14 +00:00
|
|
|
INCSTAT(stats.numPixelShadersCreated);
|
|
|
|
SETSTAT(stats.numPixelShadersAlive, (int)PixelShaders.size());
|
2010-01-17 17:44:09 +00:00
|
|
|
if (activate)
|
2009-09-03 02:21:14 +00:00
|
|
|
{
|
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
|
|
|
D3D::SetPixelShader(shader);
|
2009-03-08 19:36:00 +00:00
|
|
|
}
|
2010-01-17 17:44:09 +00:00
|
|
|
return true;
|
2009-03-01 01:09:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-02 21:19:35 +00:00
|
|
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
2009-09-01 19:48:45 +00:00
|
|
|
std::string PixelShaderCache::GetCurrentShaderCode()
|
|
|
|
{
|
|
|
|
if (last_entry)
|
|
|
|
return last_entry->code;
|
|
|
|
else
|
|
|
|
return "(no shader)\n";
|
|
|
|
}
|
2010-02-02 21:56:29 +00:00
|
|
|
#endif
|