VideoSoftware: Added Real XFB support.

Massivly increases the accuracy of VideoSoftware, if it's going to be slow, it
might as well be accurate.

 * Fixes flickering issues in games which double render to get Hardware AA, like the Peach's Castle tech demo.
 * Rouge Squadren 2's Cantina Intro doesn't flicker anymore, but it duplicates the top half of the screen onto the bottom.
 * Any games which need RealXFB support should now work in Video Software
This commit is contained in:
Scott Mansell 2013-08-20 23:51:39 +12:00
parent fa8a4cdbb5
commit d52e241cdf
6 changed files with 250 additions and 47 deletions

View File

@ -15,20 +15,26 @@
#include "HW/Memmap.h" #include "HW/Memmap.h"
#include "Core.h" #include "Core.h"
static const float s_gammaLUT[] =
{
1.0f,
1.7f,
2.2f,
1.0f
};
namespace EfbCopy namespace EfbCopy
{ {
void CopyToXfb() void CopyToXfb(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma)
{ {
GLInterface->Update(); // just updates the render window position and the backbuffer size
if (!g_SWVideoConfig.bHwRasterizer) if (!g_SWVideoConfig.bHwRasterizer)
{ {
// copy to open gl for rendering INFO_LOG(VIDEO, "xfbaddr: %x, fbwidth: %i, fbheight: %i, source: (%i, %i, %i, %i), Gamma %f",
EfbInterface::UpdateColorTexture(); xfbAddr, fbWidth, fbHeight, sourceRc.top, sourceRc.left, sourceRc.bottom, sourceRc.right, Gamma);
SWRenderer::DrawTexture(EfbInterface::efbColorTexture, EFB_WIDTH, EFB_HEIGHT); EfbInterface::yuv422_packed* xfb_in_ram = (EfbInterface::yuv422_packed *) Memory::GetPointer(xfbAddr);
}
SWRenderer::SwapBuffer(); EfbInterface::CopyToXFB(xfb_in_ram, fbWidth, fbHeight, sourceRc, Gamma);
}
} }
void CopyToRam() void CopyToRam()
@ -47,8 +53,8 @@ namespace EfbCopy
int left = bpmem.copyTexSrcXY.x; int left = bpmem.copyTexSrcXY.x;
int top = bpmem.copyTexSrcXY.y; int top = bpmem.copyTexSrcXY.y;
int right = left + bpmem.copyTexSrcWH.x; int right = left + bpmem.copyTexSrcWH.x + 1;
int bottom = top + bpmem.copyTexSrcWH.y; int bottom = top + bpmem.copyTexSrcWH.y + 1;
for (u16 y = top; y <= bottom; y++) for (u16 y = top; y <= bottom; y++)
{ {
@ -62,21 +68,47 @@ namespace EfbCopy
void CopyEfb() void CopyEfb()
{ {
if (bpmem.triggerEFBCopy.copy_to_xfb) EFBRectangle rc;
DebugUtil::OnFrameEnd(); rc.left = (int)bpmem.copyTexSrcXY.x;
rc.top = (int)bpmem.copyTexSrcXY.y;
// Here Width+1 like Height, otherwise some textures are corrupted already since the native resolution.
rc.right = (int)(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1);
rc.bottom = (int)(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1);
//if (bpmem.triggerEFBCopy.copy_to_xfb)
// DebugUtil::OnFrameEnd(); // FIXME: not actually frame end
if (!g_bSkipCurrentFrame) if (!g_bSkipCurrentFrame)
{ {
if (bpmem.triggerEFBCopy.copy_to_xfb) if (bpmem.triggerEFBCopy.copy_to_xfb)
{ {
CopyToXfb(); float yScale;
Core::Callback_VideoCopiedToXFB(true); if (bpmem.triggerEFBCopy.scale_invert)
yScale = 256.0f / (float)bpmem.dispcopyyscale;
else
yScale = (float)bpmem.dispcopyyscale / 256.0f;
swstats.frameCount++; float xfbLines = ((bpmem.copyTexSrcWH.y + 1.0f) * yScale);
if (yScale != 1.0)
WARN_LOG(VIDEO, "yScale of %f is currently unsupported", yScale);
if ((u32)xfbLines > MAX_XFB_HEIGHT)
{
INFO_LOG(VIDEO, "Tried to scale EFB to too many XFB lines (%f)", xfbLines);
xfbLines = MAX_XFB_HEIGHT;
}
CopyToXfb(bpmem.copyTexDest << 5,
bpmem.copyMipMapStrideChannels << 4,
(u32)xfbLines,
rc,
s_gammaLUT[bpmem.triggerEFBCopy.gamma]);
} }
else else
{ {
CopyToRam(); CopyToRam(); // FIXME: should use the rectangle we have already created above
} }
if (bpmem.triggerEFBCopy.clear) if (bpmem.triggerEFBCopy.clear)
@ -87,13 +119,5 @@ namespace EfbCopy
ClearEfb(); ClearEfb();
} }
} }
else
{
if (bpmem.triggerEFBCopy.copy_to_xfb)
{
// no frame rendered but tell that a frame has finished for frame skip counter
Core::Callback_VideoCopiedToXFB(false);
}
}
} }
} }

View File

@ -8,16 +8,13 @@
#include "BPMemLoader.h" #include "BPMemLoader.h"
#include "LookUpTables.h" #include "LookUpTables.h"
#include "SWPixelEngine.h" #include "SWPixelEngine.h"
#include "HW/Memmap.h"
u8 efb[EFB_WIDTH*EFB_HEIGHT*6]; u8 efb[EFB_WIDTH*EFB_HEIGHT*6];
namespace EfbInterface namespace EfbInterface
{ {
u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4];
inline u32 GetColorOffset(u16 x, u16 y) inline u32 GetColorOffset(u16 x, u16 y)
{ {
return (x + y * EFB_WIDTH) * 3; return (x + y * EFB_WIDTH) * 3;
@ -31,7 +28,6 @@ namespace EfbInterface
void DoState(PointerWrap &p) void DoState(PointerWrap &p)
{ {
p.DoArray(efb, EFB_WIDTH*EFB_HEIGHT*6); p.DoArray(efb, EFB_WIDTH*EFB_HEIGHT*6);
p.DoArray(efbColorTexture, EFB_WIDTH*EFB_HEIGHT*4);
} }
void SetPixelAlphaOnly(u32 offset, u8 a) void SetPixelAlphaOnly(u32 offset, u8 a)
@ -469,6 +465,19 @@ namespace EfbInterface
GetPixelColor(offset, color); GetPixelColor(offset, color);
} }
// For internal used only, return a non-normalized value, which saves work later.
void GetColorYUV(u16 x, u16 y, yuv444 *out)
{
u8 color[4];
GetColor(x, y, color);
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
out->Y = 0.257f * color[RED_C] + 0.504f * color[GRN_C] + 0.098f * color[BLU_C];
out->U = -0.148f * color[RED_C] + -0.291f * color[GRN_C] + 0.439f * color[BLU_C];
out->V = 0.439f * color[RED_C] + -0.368f * color[GRN_C] + -0.071f * color[BLU_C];
}
u32 GetDepth(u16 x, u16 y) u32 GetDepth(u16 x, u16 y)
{ {
u32 offset = GetDepthOffset(x, y); u32 offset = GetDepthOffset(x, y);
@ -482,22 +491,56 @@ namespace EfbInterface
return &efb[GetColorOffset(x, y)]; return &efb[GetColorOffset(x, y)];
} }
void UpdateColorTexture() void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) {
{ // FIXME: We should do Gamma correction
u32 color;
u8* colorPtr = (u8*)&color;
u32* texturePtr = (u32*)efbColorTexture;
u32 textureAddress = 0;
u32 efbOffset = 0;
for (u16 y = 0; y < EFB_HEIGHT; y++) if (!xfb_in_ram)
{ {
for (u16 x = 0; x < EFB_WIDTH; x++) WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
return;
}
int left = sourceRc.left;
int right = sourceRc.right;
// this assumes copies will always start on an even (YU) pixel and the
// copy always has an even width, which might not be true.
if (left & 1 || right & 1) {
WARN_LOG(VIDEO, "Trying to copy XFB to from unaligned EFB source");
// this will show up as wrongly encoded
}
// Scanline buffer, leave room for borders
yuv444 scanline[640+2];
// our internal yuv444 type is not normalized, so black is {0, 0, 0} instead of {16, 128, 128}
scanline[0] = {0, 0, 0}; // black border at start
scanline[right+1] = {0, 0, 0}; // black border at end
for (u16 y = sourceRc.top; y < sourceRc.bottom; y++)
{
// Get a scanline of YUV pixels in 4:4:4 format
for (int i = 1, x = left; x < right; i++, x++)
{ {
GetPixelColor(efbOffset, colorPtr); GetColorYUV(x, y, &scanline[i]);
efbOffset += 3;
texturePtr[textureAddress++] = Common::swap32(color); // ABGR->RGBA
} }
// And Downsample them to 4:2:2
for (int i = 1, x = left; x < right; i+=2, x+=2)
{
// YU pixel
xfb_in_ram[x].Y = scanline[i].Y;
// U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 U[i+1]
// we add in 10 bit space so it will round more accurately
xfb_in_ram[x].UV = 128 + ((scanline[i-1].U + (scanline[i].U << 1) + scanline[i+1].U) >> 2);
// YV pixel
xfb_in_ram[x+1].Y = scanline[i+1].Y;
// V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 V[i+1]
xfb_in_ram[x+1].UV = 128 + ((scanline[i].V + (scanline[i+1].V << 1) + scanline[i+2].V) >> 2);
}
xfb_in_ram += 640;
} }
} }

View File

@ -11,9 +11,21 @@ namespace EfbInterface
{ {
const int DEPTH_BUFFER_START = EFB_WIDTH * EFB_HEIGHT * 3; const int DEPTH_BUFFER_START = EFB_WIDTH * EFB_HEIGHT * 3;
// color order is ABGR in order to emulate RGBA on little-endian hardware
enum { ALP_C, BLU_C, GRN_C, RED_C }; enum { ALP_C, BLU_C, GRN_C, RED_C };
// color order is ABGR in order to emulate RGBA on little-endian hardware // packed so the compiler doesn't mess with alignment
typedef struct __attribute__ ((packed)) {
u8 Y;
u8 UV;
} yuv422_packed;
// But this one is only used internally, so we can let the compiler pack it however it likes.
typedef struct __attribute__ ((aligned (4))){
u8 Y;
s8 U;
s8 V;
} yuv444;
// does full blending of an incoming pixel // does full blending of an incoming pixel
void BlendTev(u16 x, u16 y, u8 *color); void BlendTev(u16 x, u16 y, u8 *color);
@ -28,12 +40,13 @@ namespace EfbInterface
void SetDepth(u16 x, u16 y, u32 depth); void SetDepth(u16 x, u16 y, u32 depth);
void GetColor(u16 x, u16 y, u8 *color); void GetColor(u16 x, u16 y, u8 *color);
void GetColorYUV(u16 x, u16 y, yuv444 *color);
u32 GetDepth(u16 x, u16 y); u32 GetDepth(u16 x, u16 y);
u8* GetPixelPointer(u16 x, u16 y, bool depth); u8* GetPixelPointer(u16 x, u16 y, bool depth);
void UpdateColorTexture(); void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma);
extern u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4]; // RGBA format
void DoState(PointerWrap &p); void DoState(PointerWrap &p);
} }

View File

@ -5,10 +5,12 @@
#include "Common.h" #include "Common.h"
#include <math.h> #include <math.h>
#include "Core.h"
#include "../../Plugin_VideoOGL/Src/GLUtil.h" #include "../../Plugin_VideoOGL/Src/GLUtil.h"
#include "RasterFont.h" #include "RasterFont.h"
#include "SWRenderer.h" #include "SWRenderer.h"
#include "SWStatistics.h" #include "SWStatistics.h"
#include "SWCommandProcessor.h"
#include "OnScreenDisplay.h" #include "OnScreenDisplay.h"
@ -18,6 +20,9 @@ static GLint attr_pos = -1, attr_tex = -1;
static GLint uni_tex = -1; static GLint uni_tex = -1;
static GLuint program; static GLuint program;
static u8 s_xfbColorTexture[2][EFB_WIDTH*EFB_HEIGHT*4];
static int s_currentColorTexture = 0;
// Rasterfont isn't compatible with GLES // Rasterfont isn't compatible with GLES
// degasus: I think it does, but I can't test it // degasus: I think it does, but I can't test it
#ifndef USE_GLES #ifndef USE_GLES
@ -26,6 +31,7 @@ RasterFont* s_pfont = NULL;
void SWRenderer::Init() void SWRenderer::Init()
{ {
GLInterface->SetBackBufferDimensions(EFB_WIDTH, EFB_HEIGHT);
} }
void SWRenderer::Shutdown() void SWRenderer::Shutdown()
@ -68,6 +74,9 @@ void CreateShaders()
void SWRenderer::Prepare() void SWRenderer::Prepare()
{ {
memset(s_xfbColorTexture, 0, sizeof(s_xfbColorTexture));
s_currentColorTexture = 0;
glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
glGenTextures(1, &s_RenderTarget); glGenTextures(1, &s_RenderTarget);
@ -162,11 +171,59 @@ void DrawButton(GLuint tex, float *coords)
glBindTexture(TEX2D, 0); glBindTexture(TEX2D, 0);
} }
#endif #endif
void SWRenderer::UpdateColorTexture(EfbInterface::yuv422_packed *xfb)
{
u32 offset = 0;
u8 *TexturePointer = s_xfbColorTexture[!s_currentColorTexture];
for (u16 y = 0; y < EFB_HEIGHT; y++)
{
for (u16 x = 0; x < EFB_WIDTH; x+=2)
{
// We do this one color sample (aka 2 RGB pixles) at a time
int Y1 = xfb[x].Y - 16;
int Y2 = xfb[x+1].Y - 16;
int U = int(xfb[x].UV) - 128;
int V = int(xfb[x+1].UV) - 128;
// We do the inverse BT.601 conversion for YCbCr to RGB
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 1.596f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 - 0.392f * U - 0.813f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 2.017f * U ));
TexturePointer[offset++] = 255;
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 1.596f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 - 0.392f * U - 0.813f * V));
TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 2.017f * U ));
TexturePointer[offset++] = 255;
}
xfb += EFB_WIDTH;
}
s_currentColorTexture = !s_currentColorTexture;
}
// Called on the GPU thread
void SWRenderer::Swap(u32 fbWidth, u32 fbHeight)
{
GLInterface->Update(); // just updates the render window position and the backbuffer size
if (!g_SWVideoConfig.bHwRasterizer)
SWRenderer::DrawTexture(s_xfbColorTexture[s_currentColorTexture], fbWidth, fbHeight);
swstats.frameCount++;
SWRenderer::SwapBuffer();
Core::Callback_VideoCopiedToXFB(true); // FIXME: should this function be called FrameRendered?
}
void SWRenderer::DrawTexture(u8 *texture, int width, int height) void SWRenderer::DrawTexture(u8 *texture, int width, int height)
{ {
// FIXME: This should add black bars when the game has set the VI to render less than the full xfb.
GLsizei glWidth = (GLsizei)GLInterface->GetBackBufferWidth(); GLsizei glWidth = (GLsizei)GLInterface->GetBackBufferWidth();
GLsizei glHeight = (GLsizei)GLInterface->GetBackBufferHeight(); GLsizei glHeight = (GLsizei)GLInterface->GetBackBufferHeight();
// Update GLViewPort // Update GLViewPort
glViewport(0, 0, glWidth, glHeight); glViewport(0, 0, glWidth, glHeight);
glScissor(0, 0, glWidth, glHeight); glScissor(0, 0, glWidth, glHeight);

View File

@ -6,6 +6,7 @@
#define _RENDERER_H_ #define _RENDERER_H_
#include "CommonTypes.h" #include "CommonTypes.h"
#include "EfbInterface.h"
namespace SWRenderer namespace SWRenderer
{ {
@ -16,8 +17,10 @@ namespace SWRenderer
void RenderText(const char* pstr, int left, int top, u32 color); void RenderText(const char* pstr, int left, int top, u32 color);
void DrawDebugText(); void DrawDebugText();
void UpdateColorTexture(EfbInterface::yuv422_packed *xfb);
void DrawTexture(u8 *texture, int width, int height); void DrawTexture(u8 *texture, int width, int height);
void Swap(u32 fbWidth, u32 fbHeight);
void SwapBuffer(); void SwapBuffer();
} }

View File

@ -9,7 +9,7 @@
#include "VideoConfigDialog.h" #include "VideoConfigDialog.h"
#endif // HAVE_WX #endif // HAVE_WX
#include "Atomic.h"
#include "SWCommandProcessor.h" #include "SWCommandProcessor.h"
#include "OpcodeDecoder.h" #include "OpcodeDecoder.h"
#include "SWVideoConfig.h" #include "SWVideoConfig.h"
@ -29,10 +29,22 @@
#include "OpcodeDecoder.h" #include "OpcodeDecoder.h"
#include "SWVertexLoader.h" #include "SWVertexLoader.h"
#include "SWStatistics.h" #include "SWStatistics.h"
#include "HW/VideoInterface.h"
#include "HW/Memmap.h"
#include "OnScreenDisplay.h" #include "OnScreenDisplay.h"
#define VSYNC_ENABLED 0 #define VSYNC_ENABLED 0
static volatile u32 s_swapRequested = false;
static volatile struct
{
u32 xfbAddr;
FieldType field;
u32 fbWidth;
u32 fbHeight;
} s_beginFieldArgs;
namespace SW namespace SW
{ {
@ -192,11 +204,50 @@ void VideoSoftware::Video_Prepare()
// Run from the CPU thread (from VideoInterface.cpp) // Run from the CPU thread (from VideoInterface.cpp)
void VideoSoftware::Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) void VideoSoftware::Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight)
{ {
s_beginFieldArgs.xfbAddr = xfbAddr;
s_beginFieldArgs.field = field;
s_beginFieldArgs.fbWidth = fbWidth;
s_beginFieldArgs.fbHeight = fbHeight;
} }
// Run from the CPU thread (from VideoInterface.cpp) // Run from the CPU thread (from VideoInterface.cpp)
void VideoSoftware::Video_EndField() void VideoSoftware::Video_EndField()
{ {
// Techincally the XFB is continually rendered out scanline by scanline between
// BeginField and EndFeild, We could possibly get away with copying out the whole thing
// at BeginField for less lag, but for the safest emulation we run it here.
if (g_bSkipCurrentFrame || s_beginFieldArgs.xfbAddr == 0 ) {
swstats.frameCount++;
swstats.ResetFrame();
Core::Callback_VideoCopiedToXFB(false);
return;
}
if (!g_SWVideoConfig.bHwRasterizer) {
// Force Progressive
u32 xfbAddr = VideoInterface::GetXFBAddressTop();
// All drivers make an assumption that the two fields are interleaved in the framebuffer
// Give a warning if this isn't true.
if (xfbAddr + 1280 != VideoInterface::GetXFBAddressBottom()) {
WARN_LOG(VIDEO, "Feilds are not interleaved in XFB as expected.");
}
EfbInterface::yuv422_packed *xfb = (EfbInterface::yuv422_packed *) Memory::GetPointer(xfbAddr);
SWRenderer::UpdateColorTexture(xfb);
}
// Idealy we would just move all the opengl contex stuff to the CPU thread, but this gets
// messy when the Hardware Rasterizer is enabled.
// And Neobrain loves his Hardware Rasterizer
// If we are runing dual core, Signal the GPU thread about the new colour texture.
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
Common::AtomicStoreRelease(s_swapRequested, true);
else
SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
} }
u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData) u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData)
@ -242,6 +293,16 @@ bool VideoSoftware::Video_Screenshot(const char *_szFilename)
return false; return false;
} }
// Run from the graphics thread
static void VideoFifo_CheckSwapRequest()
{
if (Common::AtomicLoadAcquire(s_swapRequested))
{
SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight);
Common::AtomicStoreRelease(s_swapRequested, false);
}
}
// ------------------------------- // -------------------------------
// Enter and exit the video loop // Enter and exit the video loop
// ------------------------------- // -------------------------------
@ -252,6 +313,7 @@ void VideoSoftware::Video_EnterLoop()
while (fifoStateRun) while (fifoStateRun)
{ {
VideoFifo_CheckSwapRequest();
g_video_backend->PeekMessages(); g_video_backend->PeekMessages();
if (!SWCommandProcessor::RunBuffer()) if (!SWCommandProcessor::RunBuffer())
@ -262,6 +324,7 @@ void VideoSoftware::Video_EnterLoop()
while (!emuRunningState && fifoStateRun) while (!emuRunningState && fifoStateRun)
{ {
g_video_backend->PeekMessages(); g_video_backend->PeekMessages();
VideoFifo_CheckSwapRequest();
m_csSWVidOccupied.unlock(); m_csSWVidOccupied.unlock();
Common::SleepCurrentThread(1); Common::SleepCurrentThread(1);
m_csSWVidOccupied.lock(); m_csSWVidOccupied.lock();