mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-12 00:59:11 +01:00
d02426a8e9
in D3D and Opengl: fixed one nasty bug in texture loading where if a dynamic texture keeps his format but the tlut format is changed, the try or reloading the texture in the same texture could cause a hang if the size of the resulting texture is different than the original (size in bytes) Applied a ugly temporal hack to the texture conversor to solve efb to ram misalignments and effect distortions. in D3D: Pseudo implementation of logic ops using basic blending: the first 8 operations are "good approximations", the remaining 8 are bullshit :) if someone have a better approximation to emulate this logic please let me know. please test if i don't break anything in the process and test Mario kart wee you will get a nice surprise.:) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4656 8ced0084-cf51-0410-be5f-012b33b47a6e
464 lines
14 KiB
C++
464 lines
14 KiB
C++
// Copyright (C) 2003 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
// Fast image conversion using OpenGL shaders.
|
|
// This kind of stuff would be a LOT nicer with OpenCL.
|
|
|
|
#include "TextureConverter.h"
|
|
#include "TextureConversionShader.h"
|
|
#include "PixelShaderCache.h"
|
|
#include "VertexShaderManager.h"
|
|
#include "VertexShaderCache.h"
|
|
#include "FramebufferManager.h"
|
|
#include "Globals.h"
|
|
#include "VideoConfig.h"
|
|
#include "ImageWrite.h"
|
|
#include "Render.h"
|
|
#include "D3DBase.h"
|
|
#include "D3DTexture.h"
|
|
#include "D3DUtil.h"
|
|
#include "D3DShader.h"
|
|
#include "TextureCache.h"
|
|
#include "Math.h"
|
|
|
|
namespace TextureConverter
|
|
{
|
|
struct TransformBuffer
|
|
{
|
|
LPDIRECT3DTEXTURE9 FBTexture;
|
|
LPDIRECT3DSURFACE9 RenderSurface;
|
|
LPDIRECT3DSURFACE9 ReadSurface;
|
|
int Width;
|
|
int Height;
|
|
};
|
|
const u32 NUM_TRANSFORM_BUFFERS = 16;
|
|
static TransformBuffer TrnBuffers[NUM_TRANSFORM_BUFFERS];
|
|
static u32 WorkingBuffers = 0;
|
|
|
|
static LPDIRECT3DPIXELSHADER9 s_rgbToYuyvProgram = NULL;
|
|
static LPDIRECT3DPIXELSHADER9 s_yuyvToRgbProgram = NULL;
|
|
|
|
// Not all slots are taken - but who cares.
|
|
const u32 NUM_ENCODING_PROGRAMS = 64;
|
|
static LPDIRECT3DPIXELSHADER9 s_encodingPrograms[NUM_ENCODING_PROGRAMS];
|
|
|
|
void CreateRgbToYuyvProgram()
|
|
{
|
|
// Output is BGRA because that is slightly faster than RGBA.
|
|
const char *FProgram =
|
|
"uniform sampler samp0 : register(s0);\n"
|
|
"void main(\n"
|
|
" out float4 ocol0 : COLOR0,\n"
|
|
" in float2 uv0 : TEXCOORD0)\n"
|
|
"{\n"
|
|
" float2 uv1 = float2(uv0.x + 1.0f, uv0.y);\n"
|
|
" float3 c0 = tex2D(samp0, uv0).rgb;\n"
|
|
" float3 c1 = tex2D(samp0, uv1).rgb;\n"
|
|
" float3 y_const = float3(0.257f,0.504f,0.098f);\n"
|
|
" float3 u_const = float3(-0.148f,-0.291f,0.439f);\n"
|
|
" float3 v_const = float3(0.439f,-0.368f,-0.071f);\n"
|
|
" float4 const3 = float4(0.0625f,0.5f,0.0625f,0.5f);\n"
|
|
" float3 c01 = (c0 + c1) * 0.5f;\n"
|
|
" ocol0 = float4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
|
|
"}\n";
|
|
s_rgbToYuyvProgram = D3D::CompilePixelShader(FProgram, (int)strlen(FProgram));
|
|
if (!s_rgbToYuyvProgram) {
|
|
ERROR_LOG(VIDEO, "Failed to create RGB to YUYV fragment program");
|
|
}
|
|
}
|
|
|
|
void CreateYuyvToRgbProgram()
|
|
{
|
|
const char *FProgram =
|
|
"uniform sampler samp0 : register(s0);\n"
|
|
"void main(\n"
|
|
" out float4 ocol0 : COLOR0,\n"
|
|
" in float2 uv0 : TEXCOORD0)\n"
|
|
"{\n"
|
|
" float4 c0 = tex2D(samp0, uv0).rgba;\n"
|
|
|
|
" float f = step(0.5, frac(uv0.x));\n"
|
|
" float y = lerp(c0.b, c0.r, f);\n"
|
|
" float yComp = 1.164f * (y - 0.0625f);\n"
|
|
" float uComp = c0.g - 0.5f;\n"
|
|
" float vComp = c0.a - 0.5f;\n"
|
|
|
|
" ocol0 = float4(yComp + (1.596f * vComp),\n"
|
|
" yComp - (0.813f * vComp) - (0.391f * uComp),\n"
|
|
" yComp + (2.018f * uComp),\n"
|
|
" 1.0f);\n"
|
|
"}\n";
|
|
s_yuyvToRgbProgram = D3D::CompilePixelShader(FProgram, (int)strlen(FProgram));
|
|
if (!s_yuyvToRgbProgram) {
|
|
ERROR_LOG(VIDEO, "Failed to create YUYV to RGB fragment program");
|
|
}
|
|
}
|
|
|
|
LPDIRECT3DPIXELSHADER9 GetOrCreateEncodingShader(u32 format)
|
|
{
|
|
if (format > NUM_ENCODING_PROGRAMS)
|
|
{
|
|
PanicAlert("Unknown texture copy format: 0x%x\n", format);
|
|
return s_encodingPrograms[0];
|
|
}
|
|
|
|
if (!s_encodingPrograms[format])
|
|
{
|
|
const char* shader = TextureConversionShader::GenerateEncodingShader(format,true);
|
|
|
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
|
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) {
|
|
static int counter = 0;
|
|
char szTemp[MAX_PATH];
|
|
sprintf(szTemp, "%s/enc_%04i.txt", FULL_DUMP_DIR, counter++);
|
|
|
|
SaveData(szTemp, shader);
|
|
}
|
|
#endif
|
|
s_encodingPrograms[format] = D3D::CompilePixelShader(shader, (int)strlen(shader));
|
|
if (!s_encodingPrograms[format]) {
|
|
ERROR_LOG(VIDEO, "Failed to create encoding fragment program");
|
|
}
|
|
}
|
|
return s_encodingPrograms[format];
|
|
}
|
|
|
|
void Init()
|
|
{
|
|
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
|
|
{
|
|
s_encodingPrograms[i] = NULL;
|
|
}
|
|
for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++)
|
|
{
|
|
TrnBuffers[i].FBTexture = NULL;
|
|
TrnBuffers[i].RenderSurface = NULL;
|
|
TrnBuffers[i].ReadSurface = NULL;
|
|
TrnBuffers[i].Width = 0;
|
|
TrnBuffers[i].Height = 0;
|
|
}
|
|
CreateRgbToYuyvProgram();
|
|
CreateYuyvToRgbProgram();
|
|
|
|
}
|
|
|
|
void Shutdown()
|
|
{
|
|
if(s_rgbToYuyvProgram)
|
|
s_rgbToYuyvProgram->Release();
|
|
s_rgbToYuyvProgram = NULL;
|
|
if(s_yuyvToRgbProgram)
|
|
s_yuyvToRgbProgram->Release();
|
|
s_yuyvToRgbProgram=NULL;
|
|
|
|
for (unsigned int i = 0; i < NUM_ENCODING_PROGRAMS; i++)
|
|
{
|
|
if(s_encodingPrograms[i])
|
|
s_encodingPrograms[i]->Release();
|
|
s_encodingPrograms[i] = NULL;
|
|
}
|
|
for (unsigned int i = 0; i < NUM_TRANSFORM_BUFFERS; i++)
|
|
{
|
|
if(TrnBuffers[i].RenderSurface != NULL)
|
|
TrnBuffers[i].RenderSurface->Release();
|
|
TrnBuffers[i].RenderSurface = NULL;
|
|
|
|
if(TrnBuffers[i].ReadSurface != NULL)
|
|
TrnBuffers[i].ReadSurface->Release();
|
|
TrnBuffers[i].ReadSurface = NULL;
|
|
|
|
if(TrnBuffers[i].FBTexture != NULL)
|
|
TrnBuffers[i].FBTexture->Release();
|
|
TrnBuffers[i].FBTexture = NULL;
|
|
|
|
TrnBuffers[i].Width = 0;
|
|
TrnBuffers[i].Height = 0;
|
|
}
|
|
WorkingBuffers = 0;
|
|
}
|
|
|
|
void EncodeToRamUsingShader(LPDIRECT3DPIXELSHADER9 shader, LPDIRECT3DTEXTURE9 srcTexture, const TargetRectangle& sourceRc,
|
|
u8* destAddr, int dstWidth, int dstHeight, int readStride, bool toTexture, bool linearFilter)
|
|
{
|
|
HRESULT hr;
|
|
Renderer::ResetAPIState();
|
|
u32 index =0;
|
|
while(index < WorkingBuffers && (TrnBuffers[index].Width != dstWidth || TrnBuffers[index].Height != dstHeight))
|
|
index++;
|
|
|
|
LPDIRECT3DSURFACE9 s_texConvReadSurface = NULL;
|
|
LPDIRECT3DSURFACE9 Rendersurf = NULL;
|
|
|
|
if(index >= WorkingBuffers)
|
|
{
|
|
if(WorkingBuffers < NUM_TRANSFORM_BUFFERS)
|
|
WorkingBuffers++;
|
|
if(index >= WorkingBuffers)
|
|
index--;
|
|
if(TrnBuffers[index].RenderSurface != NULL)
|
|
{
|
|
TrnBuffers[index].RenderSurface->Release();
|
|
TrnBuffers[index].RenderSurface = NULL;
|
|
}
|
|
if(TrnBuffers[index].ReadSurface != NULL)
|
|
{
|
|
TrnBuffers[index].ReadSurface->Release();
|
|
TrnBuffers[index].ReadSurface = NULL;
|
|
}
|
|
if(TrnBuffers[index].FBTexture != NULL)
|
|
{
|
|
TrnBuffers[index].FBTexture->Release();
|
|
TrnBuffers[index].FBTexture = NULL;
|
|
}
|
|
TrnBuffers[index].Width = dstWidth;
|
|
TrnBuffers[index].Height = dstHeight;
|
|
D3D::dev->CreateTexture(dstWidth, dstHeight, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8,
|
|
D3DPOOL_DEFAULT, &TrnBuffers[index].FBTexture, NULL);
|
|
TrnBuffers[index].FBTexture->GetSurfaceLevel(0,&TrnBuffers[index].RenderSurface);
|
|
D3D::dev->CreateOffscreenPlainSurface(dstWidth, dstHeight, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &TrnBuffers[index].ReadSurface, NULL );
|
|
}
|
|
|
|
s_texConvReadSurface = TrnBuffers[index].ReadSurface;
|
|
Rendersurf = TrnBuffers[index].RenderSurface;
|
|
|
|
hr = D3D::dev->SetDepthStencilSurface(NULL);
|
|
hr = D3D::dev->SetRenderTarget(0, Rendersurf);
|
|
|
|
if (linearFilter)
|
|
{
|
|
D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
|
|
}
|
|
else
|
|
{
|
|
D3D::dev->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
|
|
}
|
|
|
|
D3DVIEWPORT9 vp;
|
|
vp.X = 0;
|
|
vp.Y = 0;
|
|
vp.Width = dstWidth;
|
|
vp.Height = dstHeight;
|
|
vp.MinZ = 0.0f;
|
|
vp.MaxZ = 1.0f;
|
|
hr = D3D::dev->SetViewport(&vp);
|
|
RECT SrcRect;
|
|
SrcRect.top = sourceRc.top;
|
|
SrcRect.left = sourceRc.left;
|
|
SrcRect.right = sourceRc.right;
|
|
SrcRect.bottom = sourceRc.bottom;
|
|
RECT DstRect;
|
|
DstRect.top = 0;
|
|
DstRect.left = 0;
|
|
DstRect.right = dstWidth;
|
|
DstRect.bottom = dstHeight;
|
|
|
|
|
|
// Draw...
|
|
D3D::drawShadedTexQuad(srcTexture,&SrcRect,1,1,&DstRect,shader,VertexShaderCache::GetSimpleVertexShader());
|
|
hr = D3D::dev->SetRenderTarget(0, FBManager::GetEFBColorRTSurface());
|
|
hr = D3D::dev->SetDepthStencilSurface(FBManager::GetEFBDepthRTSurface());
|
|
Renderer::RestoreAPIState();
|
|
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
|
|
// .. and then readback the results.
|
|
// TODO: make this less slow.
|
|
|
|
D3DLOCKED_RECT drect;
|
|
|
|
|
|
hr = D3D::dev->GetRenderTargetData(Rendersurf,s_texConvReadSurface);
|
|
if((hr = s_texConvReadSurface->LockRect(&drect, &DstRect, D3DLOCK_READONLY)) != D3D_OK)
|
|
{
|
|
PanicAlert("ERROR: %s", hr == D3DERR_WASSTILLDRAWING ? "Still drawing" :
|
|
hr == D3DERR_INVALIDCALL ? "Invalid call" : "w00t");
|
|
|
|
}
|
|
else
|
|
{
|
|
int writeStride = bpmem.copyMipMapStrideChannels * 32;
|
|
|
|
if (writeStride != readStride && toTexture)
|
|
{
|
|
// writing to a texture of a different size
|
|
|
|
int readHeight = readStride / dstWidth;
|
|
|
|
int readStart = 0;
|
|
int readLoops = dstHeight / (readHeight/4); // 4 bytes per pixel
|
|
u8 *Source = (u8*)drect.pBits;
|
|
for (int i = 0; i < readLoops; i++)
|
|
{
|
|
int readDist = dstWidth*readHeight;
|
|
memcpy(destAddr,Source,readDist);
|
|
Source += readDist;
|
|
destAddr += writeStride;
|
|
}
|
|
}
|
|
else
|
|
memcpy(destAddr,drect.pBits,dstWidth*dstHeight*4);// 4 bytes per pixel
|
|
|
|
hr = s_texConvReadSurface->UnlockRect();
|
|
}
|
|
}
|
|
|
|
void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source)
|
|
{
|
|
u32 format = copyfmt;
|
|
|
|
if (bFromZBuffer)
|
|
{
|
|
format |= _GX_TF_ZTF;
|
|
if (copyfmt == 11)
|
|
format = GX_TF_Z16;
|
|
else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
|
|
format |= _GX_TF_CTF;
|
|
}
|
|
else
|
|
if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
|
|
format |= _GX_TF_CTF;
|
|
|
|
LPDIRECT3DPIXELSHADER9 texconv_shader = GetOrCreateEncodingShader(format);
|
|
if (!texconv_shader)
|
|
return;
|
|
|
|
u8 *dest_ptr = Memory_GetPtr(address);
|
|
|
|
LPDIRECT3DTEXTURE9 source_texture = bFromZBuffer ? FBManager::GetEFBDepthTexture(source) : FBManager::GetEFBColorTexture(source);
|
|
int width = (source.right - source.left) >> bScaleByHalf;
|
|
int height = (source.bottom - source.top) >> bScaleByHalf;
|
|
|
|
int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
|
|
|
|
// Invalidate any existing texture covering this memory range.
|
|
// TODO - don't delete the texture if it already exists, just replace the contents.
|
|
TextureCache::InvalidateRange(address, size_in_bytes);
|
|
|
|
u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
|
|
u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
|
|
u16 samples = TextureConversionShader::GetEncodedSampleCount(format);
|
|
|
|
// only copy on cache line boundaries
|
|
// extra pixels are copied but not displayed in the resulting texture
|
|
s32 expandedWidth = (width + blkW) & (~blkW);
|
|
s32 expandedHeight = (height + blkH) & (~blkH);
|
|
|
|
float MValueX = Renderer::GetTargetScaleX();
|
|
float MValueY = Renderer::GetTargetScaleY();
|
|
|
|
float sampleStride = bScaleByHalf?2.0f:1.0f;
|
|
|
|
TextureConversionShader::SetShaderParameters(
|
|
(float)expandedWidth,
|
|
expandedHeight * MValueY,
|
|
source.left * MValueX,
|
|
source.top * MValueY,
|
|
sampleStride * MValueX,
|
|
sampleStride * MValueY,
|
|
(float)Renderer::GetTargetWidth(),
|
|
(float)Renderer::GetTargetHeight());
|
|
|
|
TargetRectangle scaledSource;
|
|
scaledSource.top = 0;
|
|
scaledSource.bottom = expandedHeight;
|
|
scaledSource.left = 0;
|
|
scaledSource.right = expandedWidth / samples;
|
|
int cacheBytes = 32;
|
|
if ((format & 0x0f) == 6)
|
|
cacheBytes = 64;
|
|
|
|
int readStride = (expandedWidth * cacheBytes) / TexDecoder_GetBlockWidthInTexels(format);
|
|
EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight,readStride, true, bScaleByHalf > 0);
|
|
}
|
|
|
|
/*void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc,
|
|
u8* destAddr, int dstWidth, int dstHeight)
|
|
{
|
|
EncodeToRamUsingShader(s_rgbToYuyvProgram, srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, false);
|
|
}
|
|
|
|
|
|
// Should be scale free.
|
|
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture)
|
|
{
|
|
u8* srcAddr = Memory_GetPtr(xfbAddr);
|
|
if (!srcAddr)
|
|
{
|
|
WARN_LOG(VIDEO, "Tried to decode from invalid memory address");
|
|
return;
|
|
}
|
|
|
|
Renderer::ResetAPIState();
|
|
|
|
float srcFormatFactor = 0.5f;
|
|
float srcFmtWidth = srcWidth * srcFormatFactor;
|
|
|
|
// swich to texture converter frame buffer
|
|
// attach destTexture as color destination
|
|
g_framebufferManager.SetFramebuffer(s_texConvFrameBuffer);
|
|
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, destTexture);
|
|
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, destTexture, 0);
|
|
|
|
GL_REPORT_FBO_ERROR();
|
|
|
|
for (int i = 1; i < 8; ++i)
|
|
TextureMngr::DisableStage(i);
|
|
|
|
// activate source texture
|
|
// set srcAddr as data for source texture
|
|
glActiveTexture(GL_TEXTURE0);
|
|
glEnable(GL_TEXTURE_RECTANGLE_ARB);
|
|
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, s_srcTexture);
|
|
|
|
// TODO: make this less slow. (How?)
|
|
if(s_srcTextureWidth == (GLsizei)srcFmtWidth && s_srcTextureHeight == (GLsizei)srcHeight)
|
|
{
|
|
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,0,0,s_srcTextureWidth, s_srcTextureHeight, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
|
|
}
|
|
else
|
|
{
|
|
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, (GLsizei)srcFmtWidth, (GLsizei)srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
|
|
s_srcTextureWidth = (GLsizei)srcFmtWidth;
|
|
s_srcTextureHeight = (GLsizei)srcHeight;
|
|
}
|
|
|
|
glViewport(0, 0, srcWidth, srcHeight);
|
|
|
|
PixelShaderCache::EnableShader(s_yuyvToRgbProgram.glprogid);
|
|
|
|
GL_REPORT_ERRORD();
|
|
|
|
glBegin(GL_QUADS);
|
|
glTexCoord2f(srcFmtWidth, (float)srcHeight); glVertex2f(1,-1);
|
|
glTexCoord2f(srcFmtWidth, 0); glVertex2f(1,1);
|
|
glTexCoord2f(0, 0); glVertex2f(-1,1);
|
|
glTexCoord2f(0, (float)srcHeight); glVertex2f(-1,-1);
|
|
glEnd();
|
|
|
|
// reset state
|
|
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
|
|
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_ARB, 0, 0);
|
|
TextureMngr::DisableStage(0);
|
|
|
|
VertexShaderManager::SetViewportChanged();
|
|
|
|
g_framebufferManager.SetFramebuffer(0);
|
|
|
|
Renderer::RestoreAPIState();
|
|
GL_REPORT_ERRORD();
|
|
}
|
|
*/
|
|
} // namespace
|