diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/EfbCopy.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/EfbCopy.cpp index 303d5a06ad..3959942f83 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/EfbCopy.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/EfbCopy.cpp @@ -15,20 +15,26 @@ #include "HW/Memmap.h" #include "Core.h" +static const float s_gammaLUT[] = +{ + 1.0f, + 1.7f, + 2.2f, + 1.0f +}; + namespace EfbCopy { - void CopyToXfb() + void CopyToXfb(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) { - GLInterface->Update(); // just updates the render window position and the backbuffer size - if (!g_SWVideoConfig.bHwRasterizer) { - // copy to open gl for rendering - EfbInterface::UpdateColorTexture(); - SWRenderer::DrawTexture(EfbInterface::efbColorTexture, EFB_WIDTH, EFB_HEIGHT); - } + INFO_LOG(VIDEO, "xfbaddr: %x, fbwidth: %i, fbheight: %i, source: (%i, %i, %i, %i), Gamma %f", + xfbAddr, fbWidth, fbHeight, sourceRc.top, sourceRc.left, sourceRc.bottom, sourceRc.right, Gamma); + EfbInterface::yuv422_packed* xfb_in_ram = (EfbInterface::yuv422_packed *) Memory::GetPointer(xfbAddr); - SWRenderer::SwapBuffer(); + EfbInterface::CopyToXFB(xfb_in_ram, fbWidth, fbHeight, sourceRc, Gamma); + } } void CopyToRam() @@ -47,8 +53,8 @@ namespace EfbCopy int left = bpmem.copyTexSrcXY.x; int top = bpmem.copyTexSrcXY.y; - int right = left + bpmem.copyTexSrcWH.x; - int bottom = top + bpmem.copyTexSrcWH.y; + int right = left + bpmem.copyTexSrcWH.x + 1; + int bottom = top + bpmem.copyTexSrcWH.y + 1; for (u16 y = top; y <= bottom; y++) { @@ -62,21 +68,47 @@ namespace EfbCopy void CopyEfb() { - if (bpmem.triggerEFBCopy.copy_to_xfb) - DebugUtil::OnFrameEnd(); + EFBRectangle rc; + rc.left = (int)bpmem.copyTexSrcXY.x; + rc.top = (int)bpmem.copyTexSrcXY.y; + + // Here Width+1 like Height, otherwise some textures are corrupted already since the native resolution. + rc.right = (int)(bpmem.copyTexSrcXY.x + bpmem.copyTexSrcWH.x + 1); + rc.bottom = (int)(bpmem.copyTexSrcXY.y + bpmem.copyTexSrcWH.y + 1); + + //if (bpmem.triggerEFBCopy.copy_to_xfb) + // DebugUtil::OnFrameEnd(); // FIXME: not actually frame end if (!g_bSkipCurrentFrame) { if (bpmem.triggerEFBCopy.copy_to_xfb) { - CopyToXfb(); - Core::Callback_VideoCopiedToXFB(true); + float yScale; + if (bpmem.triggerEFBCopy.scale_invert) + yScale = 256.0f / (float)bpmem.dispcopyyscale; + else + yScale = (float)bpmem.dispcopyyscale / 256.0f; - swstats.frameCount++; + float xfbLines = ((bpmem.copyTexSrcWH.y + 1.0f) * yScale); + + if (yScale != 1.0) + WARN_LOG(VIDEO, "yScale of %f is currently unsupported", yScale); + + if ((u32)xfbLines > MAX_XFB_HEIGHT) + { + INFO_LOG(VIDEO, "Tried to scale EFB to too many XFB lines (%f)", xfbLines); + xfbLines = MAX_XFB_HEIGHT; + } + + CopyToXfb(bpmem.copyTexDest << 5, + bpmem.copyMipMapStrideChannels << 4, + (u32)xfbLines, + rc, + s_gammaLUT[bpmem.triggerEFBCopy.gamma]); } else { - CopyToRam(); + CopyToRam(); // FIXME: should use the rectangle we have already created above } if (bpmem.triggerEFBCopy.clear) @@ -87,13 +119,5 @@ namespace EfbCopy ClearEfb(); } } - else - { - if (bpmem.triggerEFBCopy.copy_to_xfb) - { - // no frame rendered but tell that a frame has finished for frame skip counter - Core::Callback_VideoCopiedToXFB(false); - } - } } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.cpp index 3d8ea0f9f1..b6aa9349ac 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.cpp @@ -8,16 +8,13 @@ #include "BPMemLoader.h" #include "LookUpTables.h" #include "SWPixelEngine.h" +#include "HW/Memmap.h" u8 efb[EFB_WIDTH*EFB_HEIGHT*6]; - namespace EfbInterface { - - u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4]; - inline u32 GetColorOffset(u16 x, u16 y) { return (x + y * EFB_WIDTH) * 3; @@ -31,7 +28,6 @@ namespace EfbInterface void DoState(PointerWrap &p) { p.DoArray(efb, EFB_WIDTH*EFB_HEIGHT*6); - p.DoArray(efbColorTexture, EFB_WIDTH*EFB_HEIGHT*4); } void SetPixelAlphaOnly(u32 offset, u8 a) @@ -469,6 +465,19 @@ namespace EfbInterface GetPixelColor(offset, color); } + // For internal used only, return a non-normalized value, which saves work later. + void GetColorYUV(u16 x, u16 y, yuv444 *out) + { + u8 color[4]; + GetColor(x, y, color); + + // GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see + // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion + out->Y = 0.257f * color[RED_C] + 0.504f * color[GRN_C] + 0.098f * color[BLU_C]; + out->U = -0.148f * color[RED_C] + -0.291f * color[GRN_C] + 0.439f * color[BLU_C]; + out->V = 0.439f * color[RED_C] + -0.368f * color[GRN_C] + -0.071f * color[BLU_C]; + } + u32 GetDepth(u16 x, u16 y) { u32 offset = GetDepthOffset(x, y); @@ -482,22 +491,56 @@ namespace EfbInterface return &efb[GetColorOffset(x, y)]; } - void UpdateColorTexture() - { - u32 color; - u8* colorPtr = (u8*)&color; - u32* texturePtr = (u32*)efbColorTexture; - u32 textureAddress = 0; - u32 efbOffset = 0; + void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma) { + // FIXME: We should do Gamma correction - for (u16 y = 0; y < EFB_HEIGHT; y++) + if (!xfb_in_ram) { - for (u16 x = 0; x < EFB_WIDTH; x++) + WARN_LOG(VIDEO, "Tried to copy to invalid XFB address"); + return; + } + + int left = sourceRc.left; + int right = sourceRc.right; + + // this assumes copies will always start on an even (YU) pixel and the + // copy always has an even width, which might not be true. + if (left & 1 || right & 1) { + WARN_LOG(VIDEO, "Trying to copy XFB to from unaligned EFB source"); + // this will show up as wrongly encoded + } + + // Scanline buffer, leave room for borders + yuv444 scanline[640+2]; + + // our internal yuv444 type is not normalized, so black is {0, 0, 0} instead of {16, 128, 128} + scanline[0] = {0, 0, 0}; // black border at start + scanline[right+1] = {0, 0, 0}; // black border at end + + for (u16 y = sourceRc.top; y < sourceRc.bottom; y++) + { + // Get a scanline of YUV pixels in 4:4:4 format + + for (int i = 1, x = left; x < right; i++, x++) { - GetPixelColor(efbOffset, colorPtr); - efbOffset += 3; - texturePtr[textureAddress++] = Common::swap32(color); // ABGR->RGBA + GetColorYUV(x, y, &scanline[i]); } + + // And Downsample them to 4:2:2 + for (int i = 1, x = left; x < right; i+=2, x+=2) + { + // YU pixel + xfb_in_ram[x].Y = scanline[i].Y; + // U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 U[i+1] + // we add in 10 bit space so it will round more accurately + xfb_in_ram[x].UV = 128 + ((scanline[i-1].U + (scanline[i].U << 1) + scanline[i+1].U) >> 2); + + // YV pixel + xfb_in_ram[x+1].Y = scanline[i+1].Y; + // V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 V[i+1] + xfb_in_ram[x+1].UV = 128 + ((scanline[i].V + (scanline[i+1].V << 1) + scanline[i+2].V) >> 2); + } + xfb_in_ram += 640; } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.h b/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.h index 987c243bfd..dcd5752267 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/EfbInterface.h @@ -11,9 +11,21 @@ namespace EfbInterface { const int DEPTH_BUFFER_START = EFB_WIDTH * EFB_HEIGHT * 3; + // color order is ABGR in order to emulate RGBA on little-endian hardware enum { ALP_C, BLU_C, GRN_C, RED_C }; - // color order is ABGR in order to emulate RGBA on little-endian hardware + // packed so the compiler doesn't mess with alignment + typedef struct __attribute__ ((packed)) { + u8 Y; + u8 UV; + } yuv422_packed; + + // But this one is only used internally, so we can let the compiler pack it however it likes. + typedef struct __attribute__ ((aligned (4))){ + u8 Y; + s8 U; + s8 V; + } yuv444; // does full blending of an incoming pixel void BlendTev(u16 x, u16 y, u8 *color); @@ -28,12 +40,13 @@ namespace EfbInterface void SetDepth(u16 x, u16 y, u32 depth); void GetColor(u16 x, u16 y, u8 *color); + void GetColorYUV(u16 x, u16 y, yuv444 *color); u32 GetDepth(u16 x, u16 y); u8* GetPixelPointer(u16 x, u16 y, bool depth); - void UpdateColorTexture(); - extern u8 efbColorTexture[EFB_WIDTH*EFB_HEIGHT*4]; // RGBA format + void CopyToXFB(yuv422_packed* xfb_in_ram, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc, float Gamma); + void DoState(PointerWrap &p); } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.cpp index a395025808..2ca459e886 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.cpp @@ -5,10 +5,12 @@ #include "Common.h" #include +#include "Core.h" #include "../../Plugin_VideoOGL/Src/GLUtil.h" #include "RasterFont.h" #include "SWRenderer.h" #include "SWStatistics.h" +#include "SWCommandProcessor.h" #include "OnScreenDisplay.h" @@ -18,6 +20,9 @@ static GLint attr_pos = -1, attr_tex = -1; static GLint uni_tex = -1; static GLuint program; +static u8 s_xfbColorTexture[2][EFB_WIDTH*EFB_HEIGHT*4]; +static int s_currentColorTexture = 0; + // Rasterfont isn't compatible with GLES // degasus: I think it does, but I can't test it #ifndef USE_GLES @@ -26,6 +31,7 @@ RasterFont* s_pfont = NULL; void SWRenderer::Init() { + GLInterface->SetBackBufferDimensions(EFB_WIDTH, EFB_HEIGHT); } void SWRenderer::Shutdown() @@ -68,6 +74,9 @@ void CreateShaders() void SWRenderer::Prepare() { + memset(s_xfbColorTexture, 0, sizeof(s_xfbColorTexture)); + s_currentColorTexture = 0; + glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment glGenTextures(1, &s_RenderTarget); @@ -162,11 +171,59 @@ void DrawButton(GLuint tex, float *coords) glBindTexture(TEX2D, 0); } #endif + +void SWRenderer::UpdateColorTexture(EfbInterface::yuv422_packed *xfb) +{ + u32 offset = 0; + u8 *TexturePointer = s_xfbColorTexture[!s_currentColorTexture]; + + for (u16 y = 0; y < EFB_HEIGHT; y++) + { + for (u16 x = 0; x < EFB_WIDTH; x+=2) + { + // We do this one color sample (aka 2 RGB pixles) at a time + int Y1 = xfb[x].Y - 16; + int Y2 = xfb[x+1].Y - 16; + int U = int(xfb[x].UV) - 128; + int V = int(xfb[x+1].UV) - 128; + + // We do the inverse BT.601 conversion for YCbCr to RGB + // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 1.596f * V)); + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 - 0.392f * U - 0.813f * V)); + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y1 + 2.017f * U )); + TexturePointer[offset++] = 255; + + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 1.596f * V)); + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 - 0.392f * U - 0.813f * V)); + TexturePointer[offset++] = min(255.0f, max(0.0f, 1.164f * Y2 + 2.017f * U )); + TexturePointer[offset++] = 255; + } + xfb += EFB_WIDTH; + } + s_currentColorTexture = !s_currentColorTexture; +} + +// Called on the GPU thread +void SWRenderer::Swap(u32 fbWidth, u32 fbHeight) +{ + GLInterface->Update(); // just updates the render window position and the backbuffer size + if (!g_SWVideoConfig.bHwRasterizer) + SWRenderer::DrawTexture(s_xfbColorTexture[s_currentColorTexture], fbWidth, fbHeight); + + swstats.frameCount++; + SWRenderer::SwapBuffer(); + Core::Callback_VideoCopiedToXFB(true); // FIXME: should this function be called FrameRendered? +} + void SWRenderer::DrawTexture(u8 *texture, int width, int height) { + // FIXME: This should add black bars when the game has set the VI to render less than the full xfb. + GLsizei glWidth = (GLsizei)GLInterface->GetBackBufferWidth(); GLsizei glHeight = (GLsizei)GLInterface->GetBackBufferHeight(); + // Update GLViewPort glViewport(0, 0, glWidth, glHeight); glScissor(0, 0, glWidth, glHeight); diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.h b/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.h index ba856936cb..44fc111c9e 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWRenderer.h @@ -6,6 +6,7 @@ #define _RENDERER_H_ #include "CommonTypes.h" +#include "EfbInterface.h" namespace SWRenderer { @@ -16,8 +17,10 @@ namespace SWRenderer void RenderText(const char* pstr, int left, int top, u32 color); void DrawDebugText(); + void UpdateColorTexture(EfbInterface::yuv422_packed *xfb); void DrawTexture(u8 *texture, int width, int height); + void Swap(u32 fbWidth, u32 fbHeight); void SwapBuffer(); } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp index c63ad2e446..cd6a3b156c 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp @@ -9,7 +9,7 @@ #include "VideoConfigDialog.h" #endif // HAVE_WX - +#include "Atomic.h" #include "SWCommandProcessor.h" #include "OpcodeDecoder.h" #include "SWVideoConfig.h" @@ -29,10 +29,22 @@ #include "OpcodeDecoder.h" #include "SWVertexLoader.h" #include "SWStatistics.h" +#include "HW/VideoInterface.h" +#include "HW/Memmap.h" #include "OnScreenDisplay.h" #define VSYNC_ENABLED 0 +static volatile u32 s_swapRequested = false; + +static volatile struct +{ + u32 xfbAddr; + FieldType field; + u32 fbWidth; + u32 fbHeight; +} s_beginFieldArgs; + namespace SW { @@ -191,12 +203,51 @@ void VideoSoftware::Video_Prepare() // Run from the CPU thread (from VideoInterface.cpp) void VideoSoftware::Video_BeginField(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight) -{ +{ + s_beginFieldArgs.xfbAddr = xfbAddr; + s_beginFieldArgs.field = field; + s_beginFieldArgs.fbWidth = fbWidth; + s_beginFieldArgs.fbHeight = fbHeight; } // Run from the CPU thread (from VideoInterface.cpp) void VideoSoftware::Video_EndField() { + // Techincally the XFB is continually rendered out scanline by scanline between + // BeginField and EndFeild, We could possibly get away with copying out the whole thing + // at BeginField for less lag, but for the safest emulation we run it here. + + if (g_bSkipCurrentFrame || s_beginFieldArgs.xfbAddr == 0 ) { + swstats.frameCount++; + swstats.ResetFrame(); + Core::Callback_VideoCopiedToXFB(false); + return; + } + if (!g_SWVideoConfig.bHwRasterizer) { + + // Force Progressive + u32 xfbAddr = VideoInterface::GetXFBAddressTop(); + + // All drivers make an assumption that the two fields are interleaved in the framebuffer + // Give a warning if this isn't true. + if (xfbAddr + 1280 != VideoInterface::GetXFBAddressBottom()) { + WARN_LOG(VIDEO, "Feilds are not interleaved in XFB as expected."); + } + + EfbInterface::yuv422_packed *xfb = (EfbInterface::yuv422_packed *) Memory::GetPointer(xfbAddr); + + SWRenderer::UpdateColorTexture(xfb); + } + + // Idealy we would just move all the opengl contex stuff to the CPU thread, but this gets + // messy when the Hardware Rasterizer is enabled. + // And Neobrain loves his Hardware Rasterizer + + // If we are runing dual core, Signal the GPU thread about the new colour texture. + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) + Common::AtomicStoreRelease(s_swapRequested, true); + else + SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); } u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData) @@ -242,6 +293,16 @@ bool VideoSoftware::Video_Screenshot(const char *_szFilename) return false; } +// Run from the graphics thread +static void VideoFifo_CheckSwapRequest() +{ + if (Common::AtomicLoadAcquire(s_swapRequested)) + { + SWRenderer::Swap(s_beginFieldArgs.fbWidth, s_beginFieldArgs.fbHeight); + Common::AtomicStoreRelease(s_swapRequested, false); + } +} + // ------------------------------- // Enter and exit the video loop // ------------------------------- @@ -252,6 +313,7 @@ void VideoSoftware::Video_EnterLoop() while (fifoStateRun) { + VideoFifo_CheckSwapRequest(); g_video_backend->PeekMessages(); if (!SWCommandProcessor::RunBuffer()) @@ -262,11 +324,12 @@ void VideoSoftware::Video_EnterLoop() while (!emuRunningState && fifoStateRun) { g_video_backend->PeekMessages(); + VideoFifo_CheckSwapRequest(); m_csSWVidOccupied.unlock(); Common::SleepCurrentThread(1); m_csSWVidOccupied.lock(); } - } + } } void VideoSoftware::Video_ExitLoop()