2009-07-28 21:32:10 +00:00
// Copyright (C) 2003 Dolphin Project.
2009-02-23 06:15:48 +00:00
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
2010-06-20 22:23:34 +00:00
# include <map>
# include <set>
2010-01-17 17:44:09 +00:00
# include "Common.h"
2010-06-20 22:23:34 +00:00
# include "Hash.h"
2010-01-17 17:44:09 +00:00
# include "FileUtil.h"
# include "LinearDiskCache.h"
# include "Globals.h"
2009-02-23 06:15:48 +00:00
# include "D3DBase.h"
2009-02-28 22:10:38 +00:00
# include "D3DShader.h"
2009-02-23 06:15:48 +00:00
# include "Statistics.h"
2009-09-13 09:23:30 +00:00
# include "VideoConfig.h"
2009-02-28 22:10:38 +00:00
# include "PixelShaderGen.h"
# include "PixelShaderManager.h"
2009-02-23 06:15:48 +00:00
# include "PixelShaderCache.h"
# include "VertexLoader.h"
2009-06-22 09:31:30 +00:00
# include "BPMemory.h"
2009-02-23 06:15:48 +00:00
# include "XFMemory.h"
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
# include "ImageWrite.h"
2010-12-05 14:15:36 +00:00
# include "Debugger.h"
2011-01-31 01:28:32 +00:00
# include "ConfigManager.h"
2009-09-02 06:33:41 +00:00
2011-01-29 20:16:51 +00:00
namespace DX9
{
2009-02-28 22:10:38 +00:00
PixelShaderCache : : PSCache PixelShaderCache : : PixelShaders ;
2009-09-01 19:48:45 +00:00
const PixelShaderCache : : PSCacheEntry * PixelShaderCache : : last_entry ;
2013-03-26 23:35:14 +01:00
PixelShaderUid PixelShaderCache : : last_uid ;
2010-01-17 17:44:09 +00:00
2013-03-26 23:35:14 +01:00
static LinearDiskCache < PixelShaderUid , u8 > g_ps_disk_cache ;
2010-06-20 22:23:34 +00:00
static std : : set < u32 > unique_shaders ;
2010-01-17 17:44:09 +00:00
2010-07-02 17:09:53 +00:00
# define MAX_SSAA_SHADERS 3
2010-12-19 20:59:23 +00:00
enum
{
COPY_TYPE_DIRECT ,
COPY_TYPE_MATRIXCOLOR ,
NUM_COPY_TYPES
} ;
2011-01-06 02:24:03 +00:00
enum
{
DEPTH_CONVERSION_TYPE_NONE ,
DEPTH_CONVERSION_TYPE_ON ,
NUM_DEPTH_CONVERSION_TYPES
} ;
2010-07-02 17:09:53 +00:00
2011-01-06 02:24:03 +00:00
static LPDIRECT3DPIXELSHADER9 s_CopyProgram [ NUM_COPY_TYPES ] [ NUM_DEPTH_CONVERSION_TYPES ] [ MAX_SSAA_SHADERS ] ;
2010-12-27 21:56:20 +00:00
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL ;
static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL ;
static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL ;
2009-11-10 12:45:03 +00:00
2013-03-26 23:35:14 +01:00
class PixelShaderCacheInserter : public LinearDiskCacheReader < PixelShaderUid , u8 >
2011-01-29 20:16:51 +00:00
{
public :
2013-03-26 23:35:14 +01:00
void Read ( const PixelShaderUid & key , const u8 * value , u32 value_size )
2011-01-29 20:16:51 +00:00
{
2011-06-11 19:37:21 +00:00
PixelShaderCache : : InsertByteCode ( key , value , value_size , false ) ;
2011-01-29 20:16:51 +00:00
}
} ;
2010-02-08 23:23:04 +00:00
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : GetColorMatrixProgram ( int SSAAMode )
2009-11-08 20:35:11 +00:00
{
2010-12-19 20:59:23 +00:00
return s_CopyProgram [ COPY_TYPE_MATRIXCOLOR ] [ DEPTH_CONVERSION_TYPE_NONE ] [ SSAAMode % MAX_SSAA_SHADERS ] ;
2009-11-08 20:35:11 +00:00
}
2011-01-06 02:24:03 +00:00
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : GetDepthMatrixProgram ( int SSAAMode , bool depthConversion )
2009-11-22 02:37:00 +00:00
{
2011-01-06 02:24:03 +00:00
return s_CopyProgram [ COPY_TYPE_MATRIXCOLOR ] [ depthConversion ? DEPTH_CONVERSION_TYPE_ON : DEPTH_CONVERSION_TYPE_NONE ] [ SSAAMode % MAX_SSAA_SHADERS ] ;
2009-11-22 02:37:00 +00:00
}
2010-02-08 23:23:04 +00:00
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : GetColorCopyProgram ( int SSAAMode )
2010-02-03 03:52:50 +00:00
{
2010-12-19 20:59:23 +00:00
return s_CopyProgram [ COPY_TYPE_DIRECT ] [ DEPTH_CONVERSION_TYPE_NONE ] [ SSAAMode % MAX_SSAA_SHADERS ] ;
2010-02-03 03:52:50 +00:00
}
2010-02-08 23:23:04 +00:00
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : GetClearProgram ( )
2010-02-03 03:52:50 +00:00
{
2010-02-08 23:23:04 +00:00
return s_ClearProgram ;
2010-02-03 03:52:50 +00:00
}
2010-12-27 21:56:20 +00:00
static LPDIRECT3DPIXELSHADER9 s_rgb8 = NULL ;
static LPDIRECT3DPIXELSHADER9 s_rgba6 = NULL ;
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : ReinterpRGBA6ToRGB8 ( )
{
const char code [ ] =
{
" uniform sampler samp0 : register(s0); \n "
" void main( \n "
" out float4 ocol0 : COLOR0, \n "
" in float2 uv0 : TEXCOORD0){ \n "
" ocol0 = tex2D(samp0,uv0); \n "
2011-02-04 17:31:58 +00:00
" float4 src6 = round(ocol0 * 63.f); \n "
" ocol0.r = floor(src6.r*4.f) + floor(src6.g/16.f); \n " // dst8r = (src6r<<2)|(src6g>>4);
" ocol0.g = frac(src6.g/16.f)*16.f*16.f + floor(src6.b/4.f); \n " // dst8g = ((src6g&0xF)<<4)|(src6b>>2);
" ocol0.b = frac(src6.b/4.f)*4.f*64.f + src6.a; \n " // dst8b = ((src6b&0x3)<<6)|src6a;
2010-12-27 21:56:20 +00:00
" ocol0.a = 255.f; \n "
" ocol0 /= 255.f; \n "
" } \n "
} ;
2011-03-30 07:17:23 +00:00
if ( ! s_rgba6_to_rgb8 )
s_rgba6_to_rgb8 = D3D : : CompileAndCreatePixelShader ( code , ( int ) strlen ( code ) ) ;
2010-12-27 21:56:20 +00:00
return s_rgba6_to_rgb8 ;
}
LPDIRECT3DPIXELSHADER9 PixelShaderCache : : ReinterpRGB8ToRGBA6 ( )
{
2011-01-09 14:13:24 +00:00
/* old code here for reference
2010-12-27 21:56:20 +00:00
const char code [ ] =
{
" uniform sampler samp0 : register(s0); \n "
" void main( \n "
" out float4 ocol0 : COLOR0, \n "
" in float2 uv0 : TEXCOORD0){ \n "
" ocol0 = tex2D(samp0,uv0); \n "
2011-02-04 17:31:58 +00:00
" float4 src8 = round(ocol0*255.f); \n "
" ocol0.r = floor(src8.r/4.f); \n " // dst6r = src8r>>2;
" ocol0.g = frac(src8.r/4.f)*4.f*16.f + floor(src8.g/16.f); \n " // dst6g = ((src8r&0x3)<<4)|(src8g>>4);
" ocol0.b = frac(src8.g/16.f)*16.f*4.f + floor(src8.b/64.f); \n " // dst6b = ((src8g&0xF)<<2)|(src8b>>6);
2010-12-27 21:56:20 +00:00
" ocol0.a = frac(src8.b/64.f)*64.f; \n " // dst6a = src8b&0x3F;
" ocol0 /= 63.f; \n "
" } \n "
} ;
2011-01-09 14:13:24 +00:00
*/
const char code [ ] =
{
" uniform sampler samp0 : register(s0); \n "
" void main( \n "
" out float4 ocol0 : COLOR0, \n "
" in float2 uv0 : TEXCOORD0){ \n "
" float4 temp1 = float4(1.0f/4.0f,1.0f/16.0f,1.0f/64.0f,0.0f); \n "
" float4 temp2 = float4(1.0f,64.0f,255.0f,1.0f/63.0f); \n "
2011-02-04 17:31:58 +00:00
" float4 src8 = round(tex2D(samp0,uv0)*temp2.z) * temp1; \n "
" ocol0 = (frac(src8.wxyz) * temp2.xyyy + floor(src8)) * temp2.w; \n "
2011-01-09 14:13:24 +00:00
" } \n "
} ;
2010-12-27 21:56:20 +00:00
if ( ! s_rgb8_to_rgba6 ) s_rgb8_to_rgba6 = D3D : : CompileAndCreatePixelShader ( code , ( int ) strlen ( code ) ) ;
return s_rgb8_to_rgba6 ;
}
2010-12-19 20:59:23 +00:00
# define WRITE p+=sprintf
2009-11-23 14:08:08 +00:00
2010-12-19 20:59:23 +00:00
static LPDIRECT3DPIXELSHADER9 CreateCopyShader ( int copyMatrixType , int depthConversionType , int SSAAMode )
{
2010-02-08 23:23:04 +00:00
//Used for Copy/resolve the color buffer
2010-12-19 20:59:23 +00:00
//Color conversion Programs
//Depth copy programs
// this should create the same shaders as before (plus some extras added for DF16), just... more manageably than listing the full program for each combination
char text [ 3072 ] ;
2010-02-08 23:23:04 +00:00
2010-12-19 20:59:23 +00:00
setlocale ( LC_NUMERIC , " C " ) ; // Reset locale for compilation
text [ sizeof ( text ) - 1 ] = 0x7C ; // canary
2010-07-02 17:09:53 +00:00
2010-12-19 20:59:23 +00:00
char * p = text ;
WRITE ( p , " // Copy/Color Matrix/Depth Matrix shader (matrix=%d, depth=%d, ssaa=%d) \n " , copyMatrixType , depthConversionType , SSAAMode ) ;
2010-07-02 17:09:53 +00:00
2010-12-19 20:59:23 +00:00
WRITE ( p , " uniform sampler samp0 : register(s0); \n " ) ;
if ( copyMatrixType = = COPY_TYPE_MATRIXCOLOR )
2011-01-07 19:23:57 +00:00
WRITE ( p , " uniform float4 cColMatrix[7] : register(c%d); \n " , C_COLORMATRIX ) ;
2010-12-19 20:59:23 +00:00
WRITE ( p , " void main( \n "
" out float4 ocol0 : COLOR0, \n " ) ;
2009-11-22 02:37:00 +00:00
2010-12-19 20:59:23 +00:00
switch ( SSAAMode % MAX_SSAA_SHADERS )
{
case 0 : // 1 Sample
2010-12-27 03:18:01 +00:00
WRITE ( p , " in float2 uv0 : TEXCOORD0, \n "
" in float uv1 : TEXCOORD1){ \n "
" float4 texcol = tex2D(samp0,uv0.xy); \n " ) ;
2010-12-19 20:59:23 +00:00
break ;
case 1 : // 1 Samples SSAA
2011-04-30 12:58:03 +00:00
WRITE ( p , " in float2 uv0 : TEXCOORD0, \n "
2010-12-27 03:18:01 +00:00
" in float uv1 : TEXCOORD1){ \n "
2010-12-19 20:59:23 +00:00
" float4 texcol = tex2D(samp0,uv0.xy); \n " ) ;
break ;
case 2 : // 4 Samples SSAA
WRITE ( p , " in float4 uv0 : TEXCOORD0, \n "
2010-12-27 03:18:01 +00:00
" in float uv1 : TEXCOORD1, \n "
2010-12-19 20:59:23 +00:00
" in float4 uv2 : TEXCOORD2, \n "
" in float4 uv3 : TEXCOORD3){ \n "
2010-12-27 03:18:01 +00:00
" float4 texcol = (tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz))*0.25f; \n " ) ;
2010-12-19 20:59:23 +00:00
break ;
}
2011-01-06 02:24:03 +00:00
if ( depthConversionType ! = DEPTH_CONVERSION_TYPE_NONE )
2010-12-19 20:59:23 +00:00
{
2011-02-24 15:04:24 +00:00
// Watch out for the fire fumes effect in Metroid it's really sensitive to this,
// the lighting in RE0 is also way beyond sensitive since the "good value" is hardcoded and Dolphin is almost always off.
WRITE ( p , " float4 EncodedDepth = frac(texcol.r * (16777215.f/16777216.f) * float4(1.0f,256.0f,256.0f*256.0f,1.0f)); \n "
" texcol = floor(EncodedDepth * float4(256.f,256.f,256.f,15.0f)) / float4(255.0f,255.0f,255.0f,15.0f); \n " ) ;
2010-12-19 20:59:23 +00:00
}
2011-01-06 02:24:03 +00:00
else
2010-12-27 03:18:01 +00:00
{
2011-01-06 02:24:03 +00:00
//Apply Gamma Correction
2011-06-04 19:56:18 +00:00
WRITE ( p , " texcol = pow(texcol,uv1.xxxx); \n " ) ;
2010-12-27 03:18:01 +00:00
}
2010-12-19 20:59:23 +00:00
if ( copyMatrixType = = COPY_TYPE_MATRIXCOLOR )
2010-12-27 03:18:01 +00:00
{
2011-01-07 19:23:57 +00:00
if ( depthConversionType = = DEPTH_CONVERSION_TYPE_NONE )
2011-06-04 19:56:18 +00:00
WRITE ( p , " texcol = round(texcol * cColMatrix[5])*cColMatrix[6]; \n " ) ;
2011-01-07 19:23:57 +00:00
2011-06-04 19:56:18 +00:00
WRITE ( p , " ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4]; \n " ) ;
2010-12-27 03:18:01 +00:00
}
2010-12-19 20:59:23 +00:00
else
WRITE ( p , " ocol0 = texcol; \n " ) ;
WRITE ( p , " } \n " ) ;
if ( text [ sizeof ( text ) - 1 ] ! = 0x7C )
PanicAlert ( " PixelShaderCache copy shader generator - buffer too small, canary has been eaten! " ) ;
setlocale ( LC_NUMERIC , " " ) ; // restore locale
2011-06-04 19:56:18 +00:00
return D3D : : CompileAndCreatePixelShader ( text , ( int ) strlen ( text ) ) ;
2010-12-19 20:59:23 +00:00
}
void PixelShaderCache : : Init ( )
{
2011-09-29 22:54:52 +02:00
last_entry = NULL ;
2010-12-19 20:59:23 +00:00
//program used for clear screen
{
char pprog [ 3072 ] ;
sprintf ( pprog , " void main( \n "
" out float4 ocol0 : COLOR0, \n "
" in float4 incol0 : COLOR0){ \n "
" ocol0 = incol0; \n "
" } \n " ) ;
2011-06-04 19:56:18 +00:00
s_ClearProgram = D3D : : CompileAndCreatePixelShader ( pprog , ( int ) strlen ( pprog ) ) ;
2010-12-19 20:59:23 +00:00
}
int shaderModel = ( ( D3D : : GetCaps ( ) . PixelShaderVersion > > 8 ) & 0xFF ) ;
2011-06-04 19:56:18 +00:00
int maxConstants = ( shaderModel < 3 ) ? 32 : ( ( shaderModel < 4 ) ? 224 : 65536 ) ;
2010-12-19 20:59:23 +00:00
// other screen copy/convert programs
for ( int copyMatrixType = 0 ; copyMatrixType < NUM_COPY_TYPES ; copyMatrixType + + )
{
for ( int depthType = 0 ; depthType < NUM_DEPTH_CONVERSION_TYPES ; depthType + + )
{
for ( int ssaaMode = 0 ; ssaaMode < MAX_SSAA_SHADERS ; ssaaMode + + )
{
if ( ssaaMode & & ! s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode - 1 ]
| | depthType & & ! s_CopyProgram [ copyMatrixType ] [ depthType - 1 ] [ ssaaMode ]
| | copyMatrixType & & ! s_CopyProgram [ copyMatrixType - 1 ] [ depthType ] [ ssaaMode ] )
{
// if it failed at a lower setting, it's going to fail here for the same reason it did there,
// so skip this attempt to avoid duplicate error messages.
s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ] = NULL ;
}
else
{
s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ] = CreateCopyShader ( copyMatrixType , depthType , ssaaMode ) ;
}
}
}
}
2010-02-03 03:52:50 +00:00
2009-09-13 17:46:33 +00:00
Clear ( ) ;
2010-01-17 17:44:09 +00:00
2011-03-01 03:06:14 +00:00
if ( ! File : : Exists ( File : : GetUserPath ( D_SHADERCACHE_IDX ) ) )
2011-02-28 20:40:15 +00:00
File : : CreateDir ( File : : GetUserPath ( D_SHADERCACHE_IDX ) . c_str ( ) ) ;
2010-01-17 17:44:09 +00:00
2010-06-19 16:22:24 +00:00
SETSTAT ( stats . numPixelShadersCreated , 0 ) ;
SETSTAT ( stats . numPixelShadersAlive , 0 ) ;
2010-01-17 17:44:09 +00:00
char cache_filename [ MAX_PATH ] ;
2011-02-28 20:40:15 +00:00
sprintf ( cache_filename , " %sdx9-%s-ps.cache " , File : : GetUserPath ( D_SHADERCACHE_IDX ) . c_str ( ) ,
SConfig : : GetInstance ( ) . m_LocalCoreStartupParameter . m_strUniqueID . c_str ( ) ) ;
2011-06-11 19:37:21 +00:00
PixelShaderCacheInserter inserter ;
g_ps_disk_cache . OpenAndRead ( cache_filename , inserter ) ;
2011-09-10 03:10:28 +02:00
2011-09-29 23:32:05 +02:00
if ( g_Config . bEnableShaderDebugging )
2011-09-10 03:10:28 +02:00
Clear ( ) ;
2009-02-23 06:15:48 +00:00
}
2010-01-17 17:44:09 +00:00
// ONLY to be used during shutdown.
2009-09-13 17:46:33 +00:00
void PixelShaderCache : : Clear ( )
2009-02-23 06:15:48 +00:00
{
2010-09-28 02:15:02 +00:00
for ( PSCache : : iterator iter = PixelShaders . begin ( ) ; iter ! = PixelShaders . end ( ) ; iter + + )
2009-02-23 06:15:48 +00:00
iter - > second . Destroy ( ) ;
2011-06-04 19:56:18 +00:00
PixelShaders . clear ( ) ;
2009-09-13 17:46:33 +00:00
2011-09-29 22:54:52 +02:00
last_entry = NULL ;
2009-09-13 17:46:33 +00:00
}
void PixelShaderCache : : Shutdown ( )
{
2010-12-19 20:59:23 +00:00
for ( int copyMatrixType = 0 ; copyMatrixType < NUM_COPY_TYPES ; copyMatrixType + + )
for ( int depthType = 0 ; depthType < NUM_DEPTH_CONVERSION_TYPES ; depthType + + )
for ( int ssaaMode = 0 ; ssaaMode < MAX_SSAA_SHADERS ; ssaaMode + + )
if ( s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ]
2010-12-27 21:56:20 +00:00
& & ( copyMatrixType = = 0 | | s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ] ! = s_CopyProgram [ copyMatrixType - 1 ] [ depthType ] [ ssaaMode ] ) )
2010-12-19 20:59:23 +00:00
s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ] - > Release ( ) ;
for ( int copyMatrixType = 0 ; copyMatrixType < NUM_COPY_TYPES ; copyMatrixType + + )
for ( int depthType = 0 ; depthType < NUM_DEPTH_CONVERSION_TYPES ; depthType + + )
for ( int ssaaMode = 0 ; ssaaMode < MAX_SSAA_SHADERS ; ssaaMode + + )
s_CopyProgram [ copyMatrixType ] [ depthType ] [ ssaaMode ] = NULL ;
2010-12-27 21:56:20 +00:00
if ( s_ClearProgram ) s_ClearProgram - > Release ( ) ;
s_ClearProgram = NULL ;
if ( s_rgb8_to_rgba6 ) s_rgb8_to_rgba6 - > Release ( ) ;
s_rgb8_to_rgba6 = NULL ;
if ( s_rgba6_to_rgb8 ) s_rgba6_to_rgb8 - > Release ( ) ;
s_rgba6_to_rgb8 = NULL ;
2011-06-04 19:56:18 +00:00
2009-09-13 17:46:33 +00:00
Clear ( ) ;
2010-01-17 17:44:09 +00:00
g_ps_disk_cache . Sync ( ) ;
g_ps_disk_cache . Close ( ) ;
2010-06-20 22:23:34 +00:00
unique_shaders . clear ( ) ;
2009-02-23 06:15:48 +00:00
}
2012-08-10 18:57:37 +02:00
bool PixelShaderCache : : SetShader ( DSTALPHA_MODE dstAlphaMode , u32 components )
2009-02-23 06:15:48 +00:00
{
2011-09-10 03:10:28 +02:00
const API_TYPE api = ( ( D3D : : GetCaps ( ) . PixelShaderVersion > > 8 ) & 0xFF ) < 3 ? API_D3D9_SM20 : API_D3D9_SM30 ;
2013-03-26 23:35:14 +01:00
PixelShaderUid uid ;
GetPixelShaderUid ( uid , dstAlphaMode , components ) ;
2010-01-17 17:44:09 +00:00
2010-09-28 02:15:02 +00:00
// Check if the shader is already set
2011-09-29 22:54:52 +02:00
if ( last_entry )
2009-09-09 05:22:16 +00:00
{
2011-09-29 22:54:52 +02:00
if ( uid = = last_uid )
{
GFX_DEBUGGER_PAUSE_AT ( NEXT_PIXEL_SHADER_CHANGE , true ) ;
return last_entry - > shader ! = NULL ;
}
2009-09-09 05:22:16 +00:00
}
2009-02-23 06:15:48 +00:00
2011-09-29 22:54:52 +02:00
last_uid = uid ;
2010-09-28 02:15:02 +00:00
// Check if the shader is already in the cache
2009-02-23 06:15:48 +00:00
PSCache : : iterator iter ;
2009-02-28 22:10:38 +00:00
iter = PixelShaders . find ( uid ) ;
if ( iter ! = PixelShaders . end ( ) )
2009-02-23 06:15:48 +00:00
{
2009-09-01 19:48:45 +00:00
const PSCacheEntry & entry = iter - > second ;
last_entry = & entry ;
2011-06-04 19:56:18 +00:00
2010-12-05 14:15:36 +00:00
if ( entry . shader ) D3D : : SetPixelShader ( entry . shader ) ;
GFX_DEBUGGER_PAUSE_AT ( NEXT_PIXEL_SHADER_CHANGE , true ) ;
return ( entry . shader ! = NULL ) ;
2009-02-23 06:15:48 +00:00
}
2011-06-04 19:56:18 +00:00
2010-09-28 02:15:02 +00:00
// Need to compile a new shader
2013-03-26 23:35:14 +01:00
PixelShaderCode code ;
GeneratePixelShaderCode ( code , dstAlphaMode , api , components ) ;
2010-06-20 22:23:34 +00:00
2011-09-09 21:34:46 +02:00
if ( g_ActiveConfig . bEnableShaderDebugging )
{
2013-03-26 23:35:14 +01:00
u32 code_hash = HashAdler32 ( ( const u8 * ) code . GetBuffer ( ) , strlen ( code . GetBuffer ( ) ) ) ;
2011-09-09 21:34:46 +02:00
unique_shaders . insert ( code_hash ) ;
SETSTAT ( stats . numUniquePixelShaders , unique_shaders . size ( ) ) ;
}
2010-06-20 22:23:34 +00:00
2010-09-28 02:15:02 +00:00
# if defined(_DEBUG) || defined(DEBUGFAST)
2013-03-26 23:35:14 +01:00
if ( g_ActiveConfig . iLog & CONF_SAVESHADERS ) {
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
static int counter = 0 ;
char szTemp [ MAX_PATH ] ;
2011-02-28 20:40:15 +00:00
sprintf ( szTemp , " %sps_%04i.txt " , File : : GetUserPath ( D_DUMP_IDX ) . c_str ( ) , counter + + ) ;
2011-06-04 19:56:18 +00:00
2013-03-26 23:35:14 +01:00
SaveData ( szTemp , code . GetBuffer ( ) ) ;
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
}
2010-09-28 02:15:02 +00:00
# endif
2010-01-17 17:44:09 +00:00
u8 * bytecode = 0 ;
int bytecodelen = 0 ;
2013-03-26 23:35:14 +01:00
if ( ! D3D : : CompilePixelShader ( code . GetBuffer ( ) , ( int ) strlen ( code . GetBuffer ( ) ) , & bytecode , & bytecodelen ) ) {
2010-12-05 14:15:36 +00:00
GFX_DEBUGGER_PAUSE_AT ( NEXT_ERROR , true ) ;
2010-01-17 17:44:09 +00:00
return false ;
}
2010-09-28 02:15:02 +00:00
// Insert the bytecode into the caches
2010-11-15 05:22:03 +00:00
g_ps_disk_cache . Append ( uid , bytecode , bytecodelen ) ;
2010-01-17 17:44:09 +00:00
// And insert it into the shader cache.
2011-09-09 00:32:04 +02:00
bool success = InsertByteCode ( uid , bytecode , bytecodelen , true ) ;
2010-01-17 17:44:09 +00:00
delete [ ] bytecode ;
2010-11-29 16:16:48 +00:00
2011-09-09 21:34:46 +02:00
if ( g_ActiveConfig . bEnableShaderDebugging & & success )
2011-09-09 00:32:04 +02:00
{
2013-03-26 23:35:14 +01:00
PixelShaders [ uid ] . code = code . GetBuffer ( ) ;
2011-09-09 00:32:04 +02:00
}
2010-12-05 14:15:36 +00:00
GFX_DEBUGGER_PAUSE_AT ( NEXT_PIXEL_SHADER_CHANGE , true ) ;
2011-09-09 00:32:04 +02:00
return success ;
2010-01-17 17:44:09 +00:00
}
2013-03-26 23:35:14 +01:00
bool PixelShaderCache : : InsertByteCode ( const PixelShaderUid & uid , const u8 * bytecode , int bytecodelen , bool activate )
2010-09-28 02:15:02 +00:00
{
2010-01-17 17:44:09 +00:00
LPDIRECT3DPIXELSHADER9 shader = D3D : : CreatePixelShaderFromByteCode ( bytecode , bytecodelen ) ;
2009-09-02 04:10:40 +00:00
2009-09-03 02:21:14 +00:00
// Make an entry in the table
PSCacheEntry newentry ;
newentry . shader = shader ;
PixelShaders [ uid ] = newentry ;
last_entry = & PixelShaders [ uid ] ;
2009-02-28 22:10:38 +00:00
2010-01-17 17:44:09 +00:00
if ( ! shader ) {
// INCSTAT(stats.numPixelShadersFailed);
return false ;
}
2009-09-03 02:21:14 +00:00
INCSTAT ( stats . numPixelShadersCreated ) ;
2010-09-28 02:15:02 +00:00
SETSTAT ( stats . numPixelShadersAlive , PixelShaders . size ( ) ) ;
2010-01-17 17:44:09 +00:00
if ( activate )
2009-09-03 02:21:14 +00:00
{
ok big changes here:
in videocommon little fix for the alpha test values, return to the original values as they are more accurate.
in D3D:
huge change in state management, now all the state management is centralized and redundant state changes are eliminated.
Fixed the overlapped viewport error in non ati cards:
the error was caused by this: when a viewport is defined larger than the current rendertarget, an error is thrown and the last valid viewport is used, this is the reference behavior, in ati cards if a larger viewport is defined, no eror is returned, the rendering is valid and is rendered using the projection defined by the viewport but limited to the rendertarget are, exactly like opengl or the GC hardware.
to solve this in reference drivers defined a large rendertarget (2x the size of the original) and proceed to render in a centered quad insithe the larger rendertarget, in this way larger viewports always falls inside a valid rendertarget size, the drawback of this is the waste of resources. it can be dynamized, depending or games or changed at runtime when a oversized viewport is detected, but i live that to future commits.
please test this and let me know the results.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4841 8ced0084-cf51-0410-be5f-012b33b47a6e
2010-01-15 15:52:08 +00:00
D3D : : SetPixelShader ( shader ) ;
2009-03-08 19:36:00 +00:00
}
2010-01-17 17:44:09 +00:00
return true ;
2009-03-01 01:09:32 +00:00
}
2011-01-29 20:16:51 +00:00
2011-01-31 01:28:32 +00:00
void Renderer : : SetPSConstant4f ( unsigned int const_number , float f1 , float f2 , float f3 , float f4 )
2011-01-29 20:16:51 +00:00
{
float f [ 4 ] = { f1 , f2 , f3 , f4 } ;
DX9 : : D3D : : dev - > SetPixelShaderConstantF ( const_number , f , 1 ) ;
}
2011-01-31 01:28:32 +00:00
void Renderer : : SetPSConstant4fv ( unsigned int const_number , const float * f )
2011-01-29 20:16:51 +00:00
{
DX9 : : D3D : : dev - > SetPixelShaderConstantF ( const_number , f , 1 ) ;
}
2011-01-31 01:28:32 +00:00
void Renderer : : SetMultiPSConstant4fv ( unsigned int const_number , unsigned int count , const float * f )
2011-01-29 20:16:51 +00:00
{
DX9 : : D3D : : dev - > SetPixelShaderConstantF ( const_number , f , count ) ;
}
2011-01-31 01:28:32 +00:00
} // namespace DX9