this is a pure optimization commit:

return to the  old values in constants in pixelshader, the old values give mi 3 or 4 fps more,(maybe some kind of compiler optimization) in some games and with the current algorithm i notice no difference with this values, please report any problem.
optimizes SSAA to make it a little faster and, the quality should be the same but with a little speedup.
change the way the frame is processed depending is xfb is enabled or not to make this a little faster.
please test and report any problem

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5820 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado
2010-07-02 17:09:53 +00:00
parent 6cacc52ebf
commit 12a676c273
14 changed files with 312 additions and 243 deletions

View File

@ -22,7 +22,7 @@ static const char ID[4] = {'D', 'C', 'A', 'C'};
// Update this to the current SVN revision every time you change shader generation code. // Update this to the current SVN revision every time you change shader generation code.
// We don't automatically get this from SVN_REV because that would mean regenerating the // We don't automatically get this from SVN_REV because that would mean regenerating the
// shader cache for every revision, graphics-related or not, which is simply annoying. // shader cache for every revision, graphics-related or not, which is simply annoying.
const int version = 5813; const int version = 5820;
LinearDiskCache::LinearDiskCache() LinearDiskCache::LinearDiskCache()
: file_(NULL), num_entries_(0) { : file_(NULL), num_entries_(0) {

View File

@ -152,18 +152,16 @@ static void SampleTexture(char *&p, const char *destination, const char *texcoor
static bool WriteAlphaTest(char *&p, API_TYPE ApiType); static bool WriteAlphaTest(char *&p, API_TYPE ApiType);
static void WriteFog(char *&p); static void WriteFog(char *&p);
const float epsilon8bit = 1.0f / 255.0f;
static const char *tevKSelTableC[] = // KCSEL static const char *tevKSelTableC[] = // KCSEL
{ {
"1.0f,1.0f,1.0f", // 1 = 0x00 "1.0f,1.0f,1.0f", // 1 = 0x00
"(223.0f/255.0f),(223.0f/255.0f),(223.0f/255.0f)", // 7_8 = 0x01 "0.875f,0.875f,0.875f", // 7_8 = 0x01
"(191.0f/255.0f),(191.0f/255.0f),(191.0f/255.0f)", // 3_4 = 0x02 "0.75f,0.75f,0.75f", // 3_4 = 0x02
"(159.0f/255.0f),(159.0f/255.0f),(159.0f/255.0f)", // 5_8 = 0x03 "0.625f,0.625f,0.625f", // 5_8 = 0x03
"(127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f)", // 1_2 = 0x04 "0.5f,0.5f,0.5f", // 1_2 = 0x04
"(95.0f/255.0f),(95.0f/255.0f),(95.0f/255.0f)", // 3_8 = 0x05 "0.375f,0.375f,0.375f", // 3_8 = 0x05
"(63.0f/255.0f),(63.0f/255.0f),(63.0f/255.0f)", // 1_4 = 0x06 "0.25f,0.25f,0.25f", // 1_4 = 0x06
"(31.0f/255.0f),(31.0f/255.0f),(31.0f/255.0f)", // 1_8 = 0x07 "0.125f,0.125f,0.125f", // 1_8 = 0x07
"ERROR", // 0x08 "ERROR", // 0x08
"ERROR", // 0x09 "ERROR", // 0x09
"ERROR", // 0x0a "ERROR", // 0x0a
@ -193,13 +191,13 @@ static const char *tevKSelTableC[] = // KCSEL
static const char *tevKSelTableA[] = // KASEL static const char *tevKSelTableA[] = // KASEL
{ {
"1.0f", // 1 = 0x00 "1.0f", // 1 = 0x00
"(223.0f/255.0f)",// 7_8 = 0x01 "0.875f",// 7_8 = 0x01
"(191.0f/255.0f)", // 3_4 = 0x02 "0.75f", // 3_4 = 0x02
"(159.0f/255.0f)",// 5_8 = 0x03 "0.625f",// 5_8 = 0x03
"(127.0f/255.0f)", // 1_2 = 0x04 "0.5f", // 1_2 = 0x04
"(95.0f/255.0f)",// 3_8 = 0x05 "0.375f",// 3_8 = 0x05
"(63.0f/255.0f)", // 1_4 = 0x06 "0.25f", // 1_4 = 0x06
"(31.0f/255.0f)",// 1_8 = 0x07 "0.125f",// 1_8 = 0x07
"ERROR", // 0x08 "ERROR", // 0x08
"ERROR", // 0x09 "ERROR", // 0x09
"ERROR", // 0x0a "ERROR", // 0x0a
@ -237,8 +235,8 @@ static const char *tevScaleTable[] = // CS
static const char *tevBiasTable[] = // TB static const char *tevBiasTable[] = // TB
{ {
"", // ZERO, "", // ZERO,
"+(127.0f/255.0f)", // ADDHALF, "+0.5f", // ADDHALF,
"-(127.0f/255.0f)", // SUBHALF, "-0.5f", // SUBHALF,
"", "",
}; };
@ -262,7 +260,7 @@ static const char *tevCInputTable[] = // CC
"(rastemp.rgb)", // RASC, "(rastemp.rgb)", // RASC,
"(rastemp.aaa)", // RASA, "(rastemp.aaa)", // RASA,
"float3(1.0f,1.0f,1.0f)", // ONE "float3(1.0f,1.0f,1.0f)", // ONE
"float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))", // HALF "float3(0.5f,0.5f,0.5f)", // HALF
"(konsttemp.rgb)", //"konsttemp.rgb", // KONST "(konsttemp.rgb)", //"konsttemp.rgb", // KONST
"float3(0.0f,0.0f,0.0f)", // ZERO "float3(0.0f,0.0f,0.0f)", // ZERO
///aded extra values to map clamped values ///aded extra values to map clamped values
@ -279,7 +277,7 @@ static const char *tevCInputTable[] = // CC
"(rastemp.rgb)", // RASC, "(rastemp.rgb)", // RASC,
"(rastemp.aaa)", // RASA, "(rastemp.aaa)", // RASA,
"float3(1.0f,1.0f,1.0f)", // ONE "float3(1.0f,1.0f,1.0f)", // ONE
"float3((127.0f/255.0f),(127.0f/255.0f),(127.0f/255.0f))", // HALF "float3(0.5f,0.5f,0.5f)", // HALF
"(konsttemp.rgb)", //"konsttemp.rgb", // KONST "(konsttemp.rgb)", //"konsttemp.rgb", // KONST
"float3(0.0f,0.0f,0.0f)", // ZERO "float3(0.0f,0.0f,0.0f)", // ZERO
"PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR", "PADERROR",

View File

@ -95,7 +95,7 @@ public:
static void RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc); static void RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc);
// Finish up the current frame, print some stats // Finish up the current frame, print some stats
static void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight); static void Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc);
}; };
void UpdateViewport(); void UpdateViewport();

View File

@ -516,14 +516,16 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect
return; return;
VideoFifo_CheckEFBAccess(); VideoFifo_CheckEFBAccess();
VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight);
FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
XFBWrited = true; XFBWrited = true;
// XXX: Without the VI, how would we know what kind of field this is? So // XXX: Without the VI, how would we know what kind of field this is? So
// just use progressive. // just use progressive.
if (!g_ActiveConfig.bUseXFB) if (g_ActiveConfig.bUseXFB)
{ {
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
}
else
{
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }
@ -801,7 +803,7 @@ void Renderer::SetBlendMode(bool forceUpdate)
} }
} }
void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc)
{ {
if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight)
{ {
@ -815,7 +817,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2;
u32 xfbCount = 0; u32 xfbCount = 0;
const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount);
if (!xfbSourceList || xfbCount == 0) if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{ {
g_VideoInitialize.pCopiedToXFB(false); g_VideoInitialize.pCopiedToXFB(false);
return; return;
@ -849,27 +851,65 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), NULL); D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), NULL);
// TODO: Enable linear filtering here // TODO: Enable linear filtering here
// draw each xfb source if(g_ActiveConfig.bUseXFB)
for (u32 i = 0; i < xfbCount; ++i)
{ {
const XFBSource* xfbSource = xfbSourceList[i]; const XFBSource* xfbSource;
MathUtil::Rectangle<float> sourceRc; // draw each xfb source
sourceRc.left = 0; for (u32 i = 0; i < xfbCount; ++i)
sourceRc.top = 0; {
sourceRc.right = xfbSource->texWidth; xfbSource = xfbSourceList[i];
sourceRc.bottom = xfbSource->texHeight; MathUtil::Rectangle<float> sourceRc;
sourceRc.left = 0;
sourceRc.top = 0;
sourceRc.right = xfbSource->texWidth;
sourceRc.bottom = xfbSource->texHeight;
MathUtil::Rectangle<float> drawRc; MathUtil::Rectangle<float> drawRc;
drawRc.top = -1;
drawRc.bottom = 1;
drawRc.left = -1;
drawRc.right = 1;
D3D::drawShadedTexSubQuad(xfbSource->tex->GetSRV(), &sourceRc, xfbSource->texWidth, xfbSource->texHeight, &drawRc, PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout()); if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{
// use virtual xfb with offset
int xfbHeight = xfbSource->srcHeight;
int xfbWidth = xfbSource->srcWidth;
int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2);
drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight);
drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight);
drawRc.left = -(xfbWidth / (float)fbWidth);
drawRc.right = (xfbWidth / (float)fbWidth);
if (!g_ActiveConfig.bAutoScale)
{
// scale draw area for a 1 to 1 pixel mapping with the draw target
float vScale = (float)fbHeight / (float)s_backbuffer_height;
float hScale = (float)fbWidth / (float)s_backbuffer_width;
drawRc.top *= vScale;
drawRc.bottom *= vScale;
drawRc.left *= hScale;
drawRc.right *= hScale;
}
}
else
{
drawRc.top = -1;
drawRc.bottom = 1;
drawRc.left = -1;
drawRc.right = 1;
}
D3D::drawShadedTexSubQuad(xfbSource->tex->GetSRV(), &sourceRc, xfbSource->texWidth, xfbSource->texHeight, &drawRc, PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout());
}
}
else
{
TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc);
D3DTexture2D* read_texture = FBManager.GetEFBColorTexture();
D3D::drawShadedTexQuad(read_texture->GetSRV(), targetRc.AsRECT(), Renderer::GetFullTargetWidth(), Renderer::GetFullTargetHeight(), PixelShaderCache::GetColorCopyProgram(),VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout());
} }
// done with drawing the game stuff, good moment to save a screenshot // done with drawing the game stuff, good moment to save a screenshot
if (s_bScreenshot) if (s_bScreenshot)
{ {

View File

@ -312,7 +312,8 @@ void VideoFifo_CheckSwapRequest()
{ {
if (Common::AtomicLoadAcquire(s_swapRequested)) if (Common::AtomicLoadAcquire(s_swapRequested))
{ {
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); EFBRectangle rc;
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }

View File

@ -311,7 +311,6 @@ void GFXConfigDialogDX::CreateGUIControls()
sbDebuggingTools = new wxStaticBoxSizer( new wxStaticBox( m_PageAdvanced, wxID_ANY, wxT("Debugging tools") ), wxVERTICAL ); sbDebuggingTools = new wxStaticBoxSizer( new wxStaticBox( m_PageAdvanced, wxID_ANY, wxT("Debugging tools") ), wxVERTICAL );
m_OverlayStats = new wxCheckBox( m_PageAdvanced, ID_OVERLAYSTATS, wxT("Overlay Some Statics"), wxDefaultPosition, wxDefaultSize, 0 ); m_OverlayStats = new wxCheckBox( m_PageAdvanced, ID_OVERLAYSTATS, wxT("Overlay Some Statics"), wxDefaultPosition, wxDefaultSize, 0 );
m_ShaderErrors = new wxCheckBox( m_PageAdvanced, ID_SHADERERRORS, wxT("Show Shader Compilation Errors"), wxDefaultPosition, wxDefaultSize, 0 ); m_ShaderErrors = new wxCheckBox( m_PageAdvanced, ID_SHADERERRORS, wxT("Show Shader Compilation Errors"), wxDefaultPosition, wxDefaultSize, 0 );
m_ShaderErrors->Enable( false );
m_TexfmtOverlay = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_OVERLAY, wxT("Enable TexFmt Overlay"), wxDefaultPosition, wxDefaultSize, 0 ); m_TexfmtOverlay = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_OVERLAY, wxT("Enable TexFmt Overlay"), wxDefaultPosition, wxDefaultSize, 0 );
m_TexfmtCenter = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_CENTER, wxT("Centered"), wxDefaultPosition, wxDefaultSize, 0 ); m_TexfmtCenter = new wxCheckBox( m_PageAdvanced, ID_TEXFMT_CENTER, wxT("Centered"), wxDefaultPosition, wxDefaultSize, 0 );
m_ProjStats = new wxCheckBox( m_PageAdvanced, wxID_ANY, wxT("Overlay Projection Stats"), wxDefaultPosition, wxDefaultSize, 0 ); m_ProjStats = new wxCheckBox( m_PageAdvanced, wxID_ANY, wxT("Overlay Projection Stats"), wxDefaultPosition, wxDefaultSize, 0 );

View File

@ -332,7 +332,6 @@ void FramebufferManager::copyToVirtualXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight
--it; --it;
} }
float SuperSampleCompensation = 1.0f;
float scaleX = Renderer::GetXFBScaleX(); float scaleX = Renderer::GetXFBScaleX();
float scaleY = Renderer::GetXFBScaleY(); float scaleY = Renderer::GetXFBScaleY();
TargetRectangle targetSource,efbSource; TargetRectangle targetSource,efbSource;

View File

@ -46,24 +46,26 @@ static std::set<u32> unique_shaders;
static float lastPSconstants[C_COLORMATRIX+16][4]; static float lastPSconstants[C_COLORMATRIX+16][4];
static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[3]; #define MAX_SSAA_SHADERS 3
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[3];
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[3]; static LPDIRECT3DPIXELSHADER9 s_ColorMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ColorCopyProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_DepthMatrixProgram[MAX_SSAA_SHADERS];
static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0; static LPDIRECT3DPIXELSHADER9 s_ClearProgram = 0;
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode) LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorMatrixProgram(int SSAAMode)
{ {
return s_ColorMatrixProgram[SSAAMode % 3]; return s_ColorMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
} }
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode) LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetDepthMatrixProgram(int SSAAMode)
{ {
return s_DepthMatrixProgram[SSAAMode % 3]; return s_DepthMatrixProgram[SSAAMode % MAX_SSAA_SHADERS];
} }
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode) LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetColorCopyProgram(int SSAAMode)
{ {
return s_ColorCopyProgram[SSAAMode % 3]; return s_ColorCopyProgram[SSAAMode % MAX_SSAA_SHADERS];
} }
LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram() LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
@ -134,34 +136,32 @@ void PixelShaderCache::Init()
"in float2 uv0 : TEXCOORD0){\n" "in float2 uv0 : TEXCOORD0){\n"
"ocol0 = tex2D(samp0,uv0);\n" "ocol0 = tex2D(samp0,uv0);\n"
"}\n"); "}\n");
s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 Samples //1 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1){\n"
"in float4 uv2 : TEXCOORD2,\n" "ocol0 = tex2D(samp0,uv0.xy);\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4){\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv4.xy))*0.25f;\n"
"}\n"); "}\n");
s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 Samples //4 Samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n" "in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n" "in float4 uv3 : TEXCOORD3){\n"
"in float4 uv4 : TEXCOORD4){\n" "ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25;\n"
"ocol0 = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz) + tex2D(samp0,uv3.xy) + tex2D(samp0,uv3.wz) + tex2D(samp0,uv4.xy) + tex2D(samp0,uv4.wz) + tex2D(samp0,uv0.xy))/9.0f;\n"
"}\n"); "}\n");
s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorCopyProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//Color conversion Programs //Color conversion Programs
//1 sample //1 sample
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
@ -172,25 +172,21 @@ void PixelShaderCache::Init()
"float4 texcol = tex2D(samp0,uv0);\n" "float4 texcol = tex2D(samp0,uv0);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX); "}\n",C_COLORMATRIX);
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 samples //1 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n" "uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1){\n"
"in float4 uv2 : TEXCOORD2,\n" "float4 texcol = tex2D(samp0,uv0.xy);\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX); "}\n",C_COLORMATRIX);
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 samples //4 samples SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n" "uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n" "void main(\n"
@ -198,10 +194,8 @@ void PixelShaderCache::Init()
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n" "in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n" "in float4 uv3 : TEXCOORD3){\n"
"in float4 uv4 : TEXCOORD4,\n" "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX); "}\n",C_COLORMATRIX);
s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_ColorMatrixProgram[2] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
@ -216,29 +210,25 @@ void PixelShaderCache::Init()
"float4 texcol = tex2D(samp0,uv0);\n" "float4 texcol = tex2D(samp0,uv0);\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX); "}\n",C_COLORMATRIX);
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//4 sample //1 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n" "uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : COLOR0,\n" "out float4 ocol0 : COLOR0,\n"
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1){\n"
"in float4 uv2 : TEXCOORD2,\n" "float4 texcol = tex2D(samp0,uv0.xy);\n"
"in float4 uv3 : TEXCOORD3,\n"
"in float4 uv4 : TEXCOORD4,\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))))*0.25f;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n",C_COLORMATRIX); "}\n",C_COLORMATRIX);
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog)); s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(pprog, (int)strlen(pprog));
//9 sample //4 sample SSAA
sprintf(pprog, "uniform sampler samp0 : register(s0);\n" sprintf(pprog, "uniform sampler samp0 : register(s0);\n"
"uniform float4 cColMatrix[5] : register(c%d);\n" "uniform float4 cColMatrix[5] : register(c%d);\n"
"void main(\n" "void main(\n"
@ -246,10 +236,8 @@ void PixelShaderCache::Init()
"in float4 uv0 : TEXCOORD0,\n" "in float4 uv0 : TEXCOORD0,\n"
"in float4 uv1 : TEXCOORD1,\n" "in float4 uv1 : TEXCOORD1,\n"
"in float4 uv2 : TEXCOORD2,\n" "in float4 uv2 : TEXCOORD2,\n"
"in float4 uv3 : TEXCOORD3,\n" "in float4 uv3 : TEXCOORD3){\n"
"in float4 uv4 : TEXCOORD4,\n" "float4 texcol = (tex2D(samp0,uv1.xy) + tex2D(samp0,uv1.wz) + tex2D(samp0,uv2.xy) + tex2D(samp0,uv2.wz))*0.25f;\n"
"in float4 uv5 : TEXCOORD5){\n"
"float4 texcol = (tex2D(samp0,float2(clamp(uv1.x,uv5.x,uv5.z),clamp(uv1.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv1.w,uv5.x,uv5.z),clamp(uv1.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.x,uv5.x,uv5.z),clamp(uv2.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv2.w,uv5.x,uv5.z),clamp(uv2.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.x,uv5.x,uv5.z),clamp(uv3.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv3.w,uv5.x,uv5.z),clamp(uv3.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.x,uv5.x,uv5.z),clamp(uv4.y,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv4.w,uv5.x,uv5.z),clamp(uv4.z,uv5.y,uv5.w))) + tex2D(samp0,float2(clamp(uv0.x,uv5.x,uv5.z),clamp(uv0.y,uv5.y,uv5.w))))/9;\n"
"float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n" "float4 EncodedDepth = frac((texcol.r * (16777215.0f/16777216.0f)) * float4(1.0f,255.0f,255.0f*255.0f,255.0f*255.0f*255.0f));\n"
"texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n" "texcol = float4((EncodedDepth.rgb * (16777216.0f/16777215.0f)),1.0f);\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
@ -285,7 +273,7 @@ void PixelShaderCache::Clear()
void PixelShaderCache::Shutdown() void PixelShaderCache::Shutdown()
{ {
for(int i = 0;i<3;i++) for(int i = 0;i < MAX_SSAA_SHADERS; i++)
{ {
if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release(); if (s_ColorMatrixProgram[i]) s_ColorMatrixProgram[i]->Release();
s_ColorMatrixProgram[i] = NULL; s_ColorMatrixProgram[i] = NULL;

View File

@ -543,14 +543,17 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect
if(!fbWidth || !fbHeight) if(!fbWidth || !fbHeight)
return; return;
VideoFifo_CheckEFBAccess(); VideoFifo_CheckEFBAccess();
VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight);
FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
XFBWrited = true; XFBWrited = true;
// XXX: Without the VI, how would we know what kind of field this is? So // XXX: Without the VI, how would we know what kind of field this is? So
// just use progressive. // just use progressive.
if (!g_ActiveConfig.bUseXFB) if (g_ActiveConfig.bUseXFB)
{ {
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); FBManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
}
else
{
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }
@ -953,7 +956,7 @@ void Renderer::SetBlendMode(bool forceUpdate)
void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& rc)
{ {
if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight)
{ {
@ -967,7 +970,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2;
u32 xfbCount = 0; u32 xfbCount = 0;
const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); const XFBSource** xfbSourceList = FBManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount);
if (!xfbSourceList || xfbCount == 0) if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{ {
g_VideoInitialize.pCopiedToXFB(false); g_VideoInitialize.pCopiedToXFB(false);
return; return;
@ -1015,57 +1018,65 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
const XFBSource* xfbSource; if(g_ActiveConfig.bUseXFB)
// draw each xfb source
for (u32 i = 0; i < xfbCount; ++i)
{ {
xfbSource = xfbSourceList[i]; const XFBSource* xfbSource;
MathUtil::Rectangle<float> sourceRc;
sourceRc.left = 0;
sourceRc.top = 0;
sourceRc.right = xfbSource->texWidth;
sourceRc.bottom = xfbSource->texHeight;
MathUtil::Rectangle<float> drawRc; // draw each xfb source
for (u32 i = 0; i < xfbCount; ++i)
if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{ {
// use virtual xfb with offset xfbSource = xfbSourceList[i];
int xfbHeight = xfbSource->srcHeight; MathUtil::Rectangle<float> sourceRc;
int xfbWidth = xfbSource->srcWidth;
int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2);
drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight);
drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight);
drawRc.left = -(xfbWidth / (float)fbWidth);
drawRc.right = (xfbWidth / (float)fbWidth);
sourceRc.left = 0;
sourceRc.top = 0;
sourceRc.right = xfbSource->texWidth;
sourceRc.bottom = xfbSource->texHeight;
if (!g_ActiveConfig.bAutoScale) MathUtil::Rectangle<float> drawRc;
if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{ {
// scale draw area for a 1 to 1 pixel mapping with the draw target // use virtual xfb with offset
float vScale = (float)fbHeight / (float)s_backbuffer_height; int xfbHeight = xfbSource->srcHeight;
float hScale = (float)fbWidth / (float)s_backbuffer_width; int xfbWidth = xfbSource->srcWidth;
int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2);
drawRc.top *= vScale; drawRc.bottom = 1.0f - 2.0f * ((hOffset) / (float)fbHeight);
drawRc.bottom *= vScale; drawRc.top = 1.0f - 2.0f * ((hOffset + xfbHeight) / (float)fbHeight);
drawRc.left *= hScale; drawRc.left = -(xfbWidth / (float)fbWidth);
drawRc.right *= hScale; drawRc.right = (xfbWidth / (float)fbWidth);
if (!g_ActiveConfig.bAutoScale)
{
// scale draw area for a 1 to 1 pixel mapping with the draw target
float vScale = (float)fbHeight / (float)s_backbuffer_height;
float hScale = (float)fbWidth / (float)s_backbuffer_width;
drawRc.top *= vScale;
drawRc.bottom *= vScale;
drawRc.left *= hScale;
drawRc.right *= hScale;
}
}
else
{
drawRc.top = -1;
drawRc.bottom = 1;
drawRc.left = -1;
drawRc.right = 1;
} }
}
else
{
drawRc.top = -1;
drawRc.bottom = 1;
drawRc.left = -1;
drawRc.right = 1;
}
D3D::drawShadedTexSubQuad(xfbSource->texture,&sourceRc,xfbSource->texWidth,xfbSource->texHeight,&drawRc,Width,Height,PixelShaderCache::GetColorCopyProgram(0),VertexShaderCache::GetSimpleVertexShader(0)); D3D::drawShadedTexSubQuad(xfbSource->texture,&sourceRc,xfbSource->texWidth,xfbSource->texHeight,&drawRc,Width,Height,PixelShaderCache::GetColorCopyProgram(0),VertexShaderCache::GetSimpleVertexShader(0));
}
}
else
{
TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc);
LPDIRECT3DTEXTURE9 read_texture = FBManager.GetEFBColorTexture(rc);
D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode));
} }
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
vp.X = 0; vp.X = 0;
@ -1203,7 +1214,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
yScale = (float)(dst_rect.bottom - dst_rect.top) / (float)s_XFB_height; yScale = (float)(dst_rect.bottom - dst_rect.top) / (float)s_XFB_height;
} }
float SupersampleCoeficient = s_LastAA + 1; float SupersampleCoeficient = s_LastAA + 1;
switch(s_LastEFBScale) switch(s_LastEFBScale)
{ {
case 0: case 0:
@ -1373,7 +1384,8 @@ void Renderer::SetSamplerState(int stage, int texindex)
D3D::SetSamplerState(stage, D3DSAMP_ADDRESSU, d3dClamps[tm0.wrap_s]); D3D::SetSamplerState(stage, D3DSAMP_ADDRESSU, d3dClamps[tm0.wrap_s]);
D3D::SetSamplerState(stage, D3DSAMP_ADDRESSV, d3dClamps[tm0.wrap_t]); D3D::SetSamplerState(stage, D3DSAMP_ADDRESSV, d3dClamps[tm0.wrap_t]);
float lodbias = tm0.lod_bias / 32.0f; //float SuperSampleCoeficient = (s_LastAA < 3)? s_LastAA + 1 : s_LastAA - 1;// uncoment this changes to conserve detail when incresing ssaa level
float lodbias = (tm0.lod_bias / 32.0f);// + (s_LastAA)?(log(SuperSampleCoeficient) / log(2.0f)):0;
D3D::SetSamplerState(stage,D3DSAMP_MIPMAPLODBIAS,*(DWORD*)&lodbias); D3D::SetSamplerState(stage,D3DSAMP_MIPMAPLODBIAS,*(DWORD*)&lodbias);
D3D::SetSamplerState(stage,D3DSAMP_MAXMIPLEVEL,tm1.min_lod>>4); D3D::SetSamplerState(stage,D3DSAMP_MAXMIPLEVEL,tm1.min_lod>>4);
} }

View File

@ -551,20 +551,29 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, boo
sourcerect.right = targetSource.right; sourcerect.right = targetSource.right;
sourcerect.top = targetSource.top; sourcerect.top = targetSource.top;
if(bScaleByHalf)
if(bFromZBuffer)
{
if(bScaleByHalf || g_ActiveConfig.iMultisampleMode)
{
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
}
else
{
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
}
}
else
{ {
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
} }
else
{
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
D3D::ChangeSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
}
D3DFORMAT bformat = FBManager.GetEFBDepthRTSurfaceFormat(); D3DFORMAT bformat = FBManager.GetEFBDepthRTSurfaceFormat();
int SSAAMode = ( g_ActiveConfig.iMultisampleMode > 3 )? 0 : g_ActiveConfig.iMultisampleMode; int SSAAMode = g_ActiveConfig.iMultisampleMode;
D3D::drawShadedTexQuad( D3D::drawShadedTexQuad(
read_texture, read_texture,
&sourcerect, &sourcerect,

View File

@ -38,15 +38,16 @@ VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry; const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS + 8][4]); static float GC_ALIGNED16(lastVSconstants[C_FOGPARAMS + 8][4]);
#define MAX_SSAA_SHADERS 3
static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[3]; static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS];
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader; static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;
LinearDiskCache g_vs_disk_cache; LinearDiskCache g_vs_disk_cache;
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level) LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level)
{ {
return SimpleVertexShader[level % 3]; return SimpleVertexShader[level % MAX_SSAA_SHADERS];
} }
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader() LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
@ -163,22 +164,14 @@ void VertexShaderCache::Init()
"{\n" "{\n"
"float4 vPosition : POSITION;\n" "float4 vPosition : POSITION;\n"
"float4 vTexCoord : TEXCOORD0;\n" "float4 vTexCoord : TEXCOORD0;\n"
"float4 vTexCoord1 : TEXCOORD1;\n" "float4 vTexCoord1 : TEXCOORD1;\n"
"float4 vTexCoord2 : TEXCOORD2;\n"
"float4 vTexCoord3 : TEXCOORD3;\n"
"float4 vTexCoord4 : TEXCOORD4;\n"
"float4 vTexCoord5 : TEXCOORD5;\n"
"};\n" "};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n"
"{\n" "{\n"
"VSOUTPUT OUT;" "VSOUTPUT OUT;"
"OUT.vPosition = inPosition;\n" "OUT.vPosition = inPosition;\n"
"OUT.vTexCoord = inTEX0.xyyx;\n" "OUT.vTexCoord = inTEX0.xyyx;\n"
"OUT.vTexCoord1 = inTEX0.xyyx + (float4(-0.5f,-0.5f,-0.5f,-0.5f) * inTEX1.xyyx);\n" "OUT.vTexCoord1 = inTEX2;\n"
"OUT.vTexCoord2 = inTEX0.xyyx + (float4(-0.5f, 0.5f, 0.5f,-0.5f) * inTEX1.xyyx);\n"
"OUT.vTexCoord3 = inTEX0.xyyx + (float4( 0.5f,-0.5f,-0.5f, 0.5f) * inTEX1.xyyx);\n"
"OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.5f, 0.5f, 0.5f, 0.5f) * inTEX1.xyyx);\n"
"OUT.vTexCoord5 = inTEX2;\n"
"return OUT;\n" "return OUT;\n"
"}\n"); "}\n");
SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); SimpleVertexShader[1] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
@ -189,20 +182,16 @@ void VertexShaderCache::Init()
"float4 vTexCoord : TEXCOORD0;\n" "float4 vTexCoord : TEXCOORD0;\n"
"float4 vTexCoord1 : TEXCOORD1;\n" "float4 vTexCoord1 : TEXCOORD1;\n"
"float4 vTexCoord2 : TEXCOORD2;\n" "float4 vTexCoord2 : TEXCOORD2;\n"
"float4 vTexCoord3 : TEXCOORD3;\n" "float4 vTexCoord3 : TEXCOORD3;\n"
"float4 vTexCoord4 : TEXCOORD4;\n"
"float4 vTexCoord5 : TEXCOORD5;\n"
"};\n" "};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n" "VSOUTPUT main(float4 inPosition : POSITION,float2 inTEX0 : TEXCOORD0,float2 inTEX1 : TEXCOORD1,float4 inTEX2 : TEXCOORD2)\n"
"{\n" "{\n"
"VSOUTPUT OUT;" "VSOUTPUT OUT;"
"OUT.vPosition = inPosition;\n" "OUT.vPosition = inPosition;\n"
"OUT.vTexCoord = inTEX0.xyyx;\n" "OUT.vTexCoord = inTEX0.xyyx;\n"
"OUT.vTexCoord1 = inTEX0.xyyx + (float4(-1.0f,-1.0f,-1.0f, 1.0f) * inTEX1.xyyx);\n" "OUT.vTexCoord1 = inTEX0.xyyx + (float4(-1.0f,-0.5f, 1.0f,-0.5f) * inTEX1.xyyx);\n"
"OUT.vTexCoord2 = inTEX0.xyyx + (float4(-1.0f, 0.0f, 0.0f, 1.0f) * inTEX1.xyyx);\n" "OUT.vTexCoord2 = inTEX0.xyyx + (float4( 1.0f, 0.5f,-1.0f, 0.5f) * inTEX1.xyyx);\n"
"OUT.vTexCoord3 = inTEX0.xyyx + (float4(-1.0f, 1.0f, 1.0f, 1.0f) * inTEX1.xyyx);\n" "OUT.vTexCoord3 = inTEX2;\n"
"OUT.vTexCoord4 = inTEX0.xyyx + (float4( 0.0f, 1.0f,-1.0f, 0.0f) * inTEX1.xyyx);\n"
"OUT.vTexCoord5 = inTEX2;\n"
"return OUT;\n" "return OUT;\n"
"}\n"); "}\n");
SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg)); SimpleVertexShader[2] = D3D::CompileAndCreateVertexShader(vProg, (int)strlen(vProg));
@ -236,7 +225,7 @@ void VertexShaderCache::Clear()
void VertexShaderCache::Shutdown() void VertexShaderCache::Shutdown()
{ {
for (int i = 0; i < 3; i++) for (int i = 0; i < MAX_SSAA_SHADERS; i++)
{ {
if (SimpleVertexShader[i]) if (SimpleVertexShader[i])
SimpleVertexShader[i]->Release(); SimpleVertexShader[i]->Release();

View File

@ -347,7 +347,8 @@ void VideoFifo_CheckSwapRequest()
{ {
if (Common::AtomicLoadAcquire(s_swapRequested)) if (Common::AtomicLoadAcquire(s_swapRequested))
{ {
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); EFBRectangle rc;
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }

View File

@ -824,19 +824,22 @@ void Renderer::RenderToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRect
s_skipSwap = g_bSkipCurrentFrame; s_skipSwap = g_bSkipCurrentFrame;
VideoFifo_CheckEFBAccess(); VideoFifo_CheckEFBAccess();
VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight); VideoFifo_CheckSwapRequestAt(xfbAddr, fbWidth, fbHeight);
g_framebufferManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
XFBWrited = true; XFBWrited = true;
// XXX: Without the VI, how would we know what kind of field this is? So // XXX: Without the VI, how would we know what kind of field this is? So
// just use progressive. // just use progressive.
if (!g_ActiveConfig.bUseXFB) if (g_ActiveConfig.bUseXFB)
{ {
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight); g_framebufferManager.CopyToXFB(xfbAddr, fbWidth, fbHeight, sourceRc);
}
else
{
Renderer::Swap(xfbAddr, FIELD_PROGRESSIVE, fbWidth, fbHeight,sourceRc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }
// This function has the final picture. We adjust the aspect ratio here. // This function has the final picture. We adjust the aspect ratio here.
void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,const EFBRectangle& Rc)
{ {
if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight) if (g_bSkipCurrentFrame || (!XFBWrited && !g_ActiveConfig.bUseRealXFB) || !fbWidth || !fbHeight)
{ {
@ -846,7 +849,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2; if (field == FIELD_LOWER) xfbAddr -= fbWidth * 2;
u32 xfbCount = 0; u32 xfbCount = 0;
const XFBSource** xfbSourceList = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount); const XFBSource** xfbSourceList = g_framebufferManager.GetXFBSource(xfbAddr, fbWidth, fbHeight, xfbCount);
if (!xfbSourceList) if ((!xfbSourceList || xfbCount == 0) && g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
{ {
g_VideoInitialize.pCopiedToXFB(false); g_VideoInitialize.pCopiedToXFB(false);
WARN_LOG(VIDEO, "Failed to get video for this frame"); WARN_LOG(VIDEO, "Failed to get video for this frame");
@ -892,90 +895,119 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
const XFBSource* xfbSource = NULL; const XFBSource* xfbSource = NULL;
// draw each xfb source if(g_ActiveConfig.bUseXFB)
for (u32 i = 0; i < xfbCount; ++i)
{ {
xfbSource = xfbSourceList[i]; // draw each xfb source
for (u32 i = 0; i < xfbCount; ++i)
TargetRectangle sourceRc;
if (g_ActiveConfig.bAutoScale || g_ActiveConfig.bUseXFB)
{ {
sourceRc = xfbSource->sourceRc; xfbSource = xfbSourceList[i];
}
else
{
sourceRc.left = 0;
sourceRc.top = xfbSource->texHeight;
sourceRc.right = xfbSource->texWidth;
sourceRc.bottom = 0;
}
MathUtil::Rectangle<float> drawRc; TargetRectangle sourceRc;
if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB) if (g_ActiveConfig.bAutoScale || g_ActiveConfig.bUseXFB)
{
// use virtual xfb with offset
int xfbHeight = xfbSource->srcHeight;
int xfbWidth = xfbSource->srcWidth;
int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2);
drawRc.top = 1.0f - (2.0f * (hOffset) / (float)fbHeight);
drawRc.bottom = 1.0f - (2.0f * (hOffset + xfbHeight) / (float)fbHeight);
drawRc.left = -(xfbWidth / (float)fbWidth);
drawRc.right = (xfbWidth / (float)fbWidth);
if (!g_ActiveConfig.bAutoScale)
{ {
// scale draw area for a 1 to 1 pixel mapping with the draw target sourceRc = xfbSource->sourceRc;
float vScale = (float)fbHeight / (float)back_rc.GetHeight(); }
float hScale = (float)fbWidth / (float)back_rc.GetWidth(); else
{
drawRc.top *= vScale; sourceRc.left = 0;
drawRc.bottom *= vScale; sourceRc.top = xfbSource->texHeight;
drawRc.left *= hScale; sourceRc.right = xfbSource->texWidth;
drawRc.right *= hScale; sourceRc.bottom = 0;
} }
}
else
{
drawRc.top = 1;
drawRc.bottom = -1;
drawRc.left = -1;
drawRc.right = 1;
}
// Tell the OSD Menu about the current internal resolution
OSDInternalW = xfbSource->sourceRc.GetWidth(); OSDInternalH = xfbSource->sourceRc.GetHeight();
// Texture map xfbSource->texture onto the main buffer MathUtil::Rectangle<float> drawRc;
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbSource->texture);
// We must call ApplyShader here even if no post proc is selected - it takes if (g_ActiveConfig.bUseXFB && !g_ActiveConfig.bUseRealXFB)
// care of disabling it in that case. It returns false in case of no post processing. {
// use virtual xfb with offset
int xfbHeight = xfbSource->srcHeight;
int xfbWidth = xfbSource->srcWidth;
int hOffset = ((s32)xfbSource->srcAddr - (s32)xfbAddr) / ((s32)fbWidth * 2);
drawRc.top = 1.0f - (2.0f * (hOffset) / (float)fbHeight);
drawRc.bottom = 1.0f - (2.0f * (hOffset + xfbHeight) / (float)fbHeight);
drawRc.left = -(xfbWidth / (float)fbWidth);
drawRc.right = (xfbWidth / (float)fbWidth);
if (!g_ActiveConfig.bAutoScale)
{
// scale draw area for a 1 to 1 pixel mapping with the draw target
float vScale = (float)fbHeight / (float)back_rc.GetHeight();
float hScale = (float)fbWidth / (float)back_rc.GetWidth();
drawRc.top *= vScale;
drawRc.bottom *= vScale;
drawRc.left *= hScale;
drawRc.right *= hScale;
}
}
else
{
drawRc.top = 1;
drawRc.bottom = -1;
drawRc.left = -1;
drawRc.right = 1;
}
// Tell the OSD Menu about the current internal resolution
OSDInternalW = xfbSource->sourceRc.GetWidth(); OSDInternalH = xfbSource->sourceRc.GetHeight();
// Texture map xfbSource->texture onto the main buffer
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, xfbSource->texture);
// We must call ApplyShader here even if no post proc is selected - it takes
// care of disabling it in that case. It returns false in case of no post processing.
if (applyShader)
{
glBegin(GL_QUADS);
glTexCoord2f(sourceRc.left, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(drawRc.left, drawRc.bottom);
glTexCoord2f(sourceRc.left, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(drawRc.left, drawRc.top);
glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f(drawRc.right, drawRc.top);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f(drawRc.right, drawRc.bottom);
glEnd();
PixelShaderCache::DisableShader();
}
else
{
glBegin(GL_QUADS);
glTexCoord2f(sourceRc.left, sourceRc.bottom); glVertex2f(drawRc.left, drawRc.bottom);
glTexCoord2f(sourceRc.left, sourceRc.top); glVertex2f(drawRc.left, drawRc.top);
glTexCoord2f(sourceRc.right, sourceRc.top); glVertex2f(drawRc.right, drawRc.top);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glVertex2f(drawRc.right, drawRc.bottom);
glEnd();
}
GL_REPORT_ERRORD();
}
}
else
{
TargetRectangle targetRc = Renderer::ConvertEFBRectangle(Rc);
GLuint read_texture = g_framebufferManager.ResolveAndGetRenderTarget(Rc);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, read_texture);
if (applyShader) if (applyShader)
{ {
glBegin(GL_QUADS); glBegin(GL_QUADS);
glTexCoord2f(sourceRc.left, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(drawRc.left, drawRc.bottom); glTexCoord2f(targetRc.left, targetRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 0); glVertex2f(-1, -1);
glTexCoord2f(sourceRc.left, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(drawRc.left, drawRc.top); glTexCoord2f(targetRc.left, targetRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 0, 1); glVertex2f(-1, 1);
glTexCoord2f(sourceRc.right, sourceRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f(drawRc.right, drawRc.top); glTexCoord2f(targetRc.right, targetRc.top); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 1); glVertex2f( 1, 1);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f(drawRc.right, drawRc.bottom); glTexCoord2f(targetRc.right, targetRc.bottom); glMultiTexCoord2fARB(GL_TEXTURE1, 1, 0); glVertex2f( 1, -1);
glEnd(); glEnd();
PixelShaderCache::DisableShader(); PixelShaderCache::DisableShader();
} }
else else
{ {
glBegin(GL_QUADS); glBegin(GL_QUADS);
glTexCoord2f(sourceRc.left, sourceRc.bottom); glVertex2f(drawRc.left, drawRc.bottom); glTexCoord2f(targetRc.left, targetRc.bottom); glVertex2f(-1, -1);
glTexCoord2f(sourceRc.left, sourceRc.top); glVertex2f(drawRc.left, drawRc.top); glTexCoord2f(targetRc.left, targetRc.top); glVertex2f(-1, 1);
glTexCoord2f(sourceRc.right, sourceRc.top); glVertex2f(drawRc.right, drawRc.top); glTexCoord2f(targetRc.right, targetRc.top); glVertex2f( 1, 1);
glTexCoord2f(sourceRc.right, sourceRc.bottom); glVertex2f(drawRc.right, drawRc.bottom); glTexCoord2f(targetRc.right, targetRc.bottom); glVertex2f( 1, -1);
glEnd(); glEnd();
} }
GL_REPORT_ERRORD();
} }
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0); glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
TextureMngr::DisableStage(0); TextureMngr::DisableStage(0);

View File

@ -370,7 +370,8 @@ void VideoFifo_CheckSwapRequest()
{ {
if (Common::AtomicLoadAcquire(s_swapRequested)) if (Common::AtomicLoadAcquire(s_swapRequested))
{ {
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); EFBRectangle rc;
Renderer::Swap(s_beginFieldArgs.xfbAddr, s_beginFieldArgs.field, s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight,rc);
Common::AtomicStoreRelease(s_swapRequested, FALSE); Common::AtomicStoreRelease(s_swapRequested, FALSE);
} }
} }