mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 15:31:17 +01:00
2b9389202e
This header doesn't actually make use of MathUtil.h within itself, so this can be removed. Many other source files used VideoCommon.h as an indirect include to include MathUtil.h, so these includes can also be adjusted. While we're at it, we can also migrate valid inclusions of VideoCommon.h into cpp files where it can feasibly be done to minimize propagating it via other headers.
714 lines
18 KiB
C++
714 lines
18 KiB
C++
// Copyright 2009 Dolphin Emulator Project
|
|
// Licensed under GPLv2+
|
|
// Refer to the license.txt file included.
|
|
|
|
#include "VideoBackends/Software/EfbInterface.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cstddef>
|
|
#include <cstring>
|
|
#include <vector>
|
|
|
|
#include "Common/CommonTypes.h"
|
|
#include "Common/Logging/Log.h"
|
|
|
|
#include "VideoBackends/Software/CopyRegion.h"
|
|
#include "VideoCommon/BPMemory.h"
|
|
#include "VideoCommon/LookUpTables.h"
|
|
#include "VideoCommon/PerfQueryBase.h"
|
|
#include "VideoCommon/VideoCommon.h"
|
|
|
|
namespace EfbInterface
|
|
{
|
|
static std::array<u8, EFB_WIDTH * EFB_HEIGHT * 6> efb;
|
|
|
|
static std::array<u32, PQ_NUM_MEMBERS> perf_values;
|
|
|
|
static inline u32 GetColorOffset(u16 x, u16 y)
|
|
{
|
|
return (x + y * EFB_WIDTH) * 3;
|
|
}
|
|
|
|
static inline u32 GetDepthOffset(u16 x, u16 y)
|
|
{
|
|
constexpr u32 depth_buffer_start = EFB_WIDTH * EFB_HEIGHT * 3;
|
|
|
|
return (x + y * EFB_WIDTH) * 3 + depth_buffer_start;
|
|
}
|
|
|
|
static void SetPixelAlphaOnly(u32 offset, u8 a)
|
|
{
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::Z24:
|
|
case PEControl::RGB565_Z16:
|
|
// do nothing
|
|
break;
|
|
case PEControl::RGBA6_Z24:
|
|
{
|
|
u32 a32 = a;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xffffffc0;
|
|
val |= (a32 >> 2) & 0x0000003f;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
}
|
|
}
|
|
|
|
static void SetPixelColorOnly(u32 offset, u8* rgb)
|
|
{
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::Z24:
|
|
{
|
|
u32 src = *(u32*)rgb;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= src >> 8;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
case PEControl::RGBA6_Z24:
|
|
{
|
|
u32 src = *(u32*)rgb;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff00003f;
|
|
val |= (src >> 4) & 0x00000fc0; // blue
|
|
val |= (src >> 6) & 0x0003f000; // green
|
|
val |= (src >> 8) & 0x00fc0000; // red
|
|
*dst = val;
|
|
}
|
|
break;
|
|
case PEControl::RGB565_Z16:
|
|
{
|
|
INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
|
|
u32 src = *(u32*)rgb;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= src >> 8;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
}
|
|
}
|
|
|
|
static void SetPixelAlphaColor(u32 offset, u8* color)
|
|
{
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::Z24:
|
|
{
|
|
u32 src = *(u32*)color;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= src >> 8;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
case PEControl::RGBA6_Z24:
|
|
{
|
|
u32 src = *(u32*)color;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= (src >> 2) & 0x0000003f; // alpha
|
|
val |= (src >> 4) & 0x00000fc0; // blue
|
|
val |= (src >> 6) & 0x0003f000; // green
|
|
val |= (src >> 8) & 0x00fc0000; // red
|
|
*dst = val;
|
|
}
|
|
break;
|
|
case PEControl::RGB565_Z16:
|
|
{
|
|
INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
|
|
u32 src = *(u32*)color;
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= src >> 8;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
}
|
|
}
|
|
|
|
static u32 GetPixelColor(u32 offset)
|
|
{
|
|
u32 src;
|
|
std::memcpy(&src, &efb[offset], sizeof(u32));
|
|
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::Z24:
|
|
return 0xff | ((src & 0x00ffffff) << 8);
|
|
|
|
case PEControl::RGBA6_Z24:
|
|
return Convert6To8(src & 0x3f) | // Alpha
|
|
Convert6To8((src >> 6) & 0x3f) << 8 | // Blue
|
|
Convert6To8((src >> 12) & 0x3f) << 16 | // Green
|
|
Convert6To8((src >> 18) & 0x3f) << 24; // Red
|
|
|
|
case PEControl::RGB565_Z16:
|
|
INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
|
|
return 0xff | ((src & 0x00ffffff) << 8);
|
|
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void SetPixelDepth(u32 offset, u32 depth)
|
|
{
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::RGBA6_Z24:
|
|
case PEControl::Z24:
|
|
{
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= depth & 0x00ffffff;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
case PEControl::RGB565_Z16:
|
|
{
|
|
INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
|
|
u32* dst = (u32*)&efb[offset];
|
|
u32 val = *dst & 0xff000000;
|
|
val |= depth & 0x00ffffff;
|
|
*dst = val;
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
}
|
|
}
|
|
|
|
static u32 GetPixelDepth(u32 offset)
|
|
{
|
|
u32 depth = 0;
|
|
|
|
switch (bpmem.zcontrol.pixel_format)
|
|
{
|
|
case PEControl::RGB8_Z24:
|
|
case PEControl::RGBA6_Z24:
|
|
case PEControl::Z24:
|
|
{
|
|
depth = (*(u32*)&efb[offset]) & 0x00ffffff;
|
|
}
|
|
break;
|
|
case PEControl::RGB565_Z16:
|
|
{
|
|
INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
|
|
depth = (*(u32*)&efb[offset]) & 0x00ffffff;
|
|
}
|
|
break;
|
|
default:
|
|
ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
|
|
}
|
|
|
|
return depth;
|
|
}
|
|
|
|
static u32 GetSourceFactor(u8* srcClr, u8* dstClr, BlendMode::BlendFactor mode)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case BlendMode::ZERO:
|
|
return 0;
|
|
case BlendMode::ONE:
|
|
return 0xffffffff;
|
|
case BlendMode::DSTCLR:
|
|
return *(u32*)dstClr;
|
|
case BlendMode::INVDSTCLR:
|
|
return 0xffffffff - *(u32*)dstClr;
|
|
case BlendMode::SRCALPHA:
|
|
{
|
|
u8 alpha = srcClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::INVSRCALPHA:
|
|
{
|
|
u8 alpha = 0xff - srcClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::DSTALPHA:
|
|
{
|
|
u8 alpha = dstClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::INVDSTALPHA:
|
|
{
|
|
u8 alpha = 0xff - dstClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static u32 GetDestinationFactor(u8* srcClr, u8* dstClr, BlendMode::BlendFactor mode)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case BlendMode::ZERO:
|
|
return 0;
|
|
case BlendMode::ONE:
|
|
return 0xffffffff;
|
|
case BlendMode::SRCCLR:
|
|
return *(u32*)srcClr;
|
|
case BlendMode::INVSRCCLR:
|
|
return 0xffffffff - *(u32*)srcClr;
|
|
case BlendMode::SRCALPHA:
|
|
{
|
|
u8 alpha = srcClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::INVSRCALPHA:
|
|
{
|
|
u8 alpha = 0xff - srcClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::DSTALPHA:
|
|
{
|
|
u8 alpha = dstClr[ALP_C] & 0xff;
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
case BlendMode::INVDSTALPHA:
|
|
{
|
|
u8 alpha = 0xff - dstClr[ALP_C];
|
|
u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
|
|
return factor;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void BlendColor(u8* srcClr, u8* dstClr)
|
|
{
|
|
u32 srcFactor = GetSourceFactor(srcClr, dstClr, bpmem.blendmode.srcfactor);
|
|
u32 dstFactor = GetDestinationFactor(srcClr, dstClr, bpmem.blendmode.dstfactor);
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
{
|
|
// add MSB of factors to make their range 0 -> 256
|
|
u32 sf = (srcFactor & 0xff);
|
|
sf += sf >> 7;
|
|
|
|
u32 df = (dstFactor & 0xff);
|
|
df += df >> 7;
|
|
|
|
u32 color = (srcClr[i] * sf + dstClr[i] * df) >> 8;
|
|
dstClr[i] = (color > 255) ? 255 : color;
|
|
|
|
dstFactor >>= 8;
|
|
srcFactor >>= 8;
|
|
}
|
|
}
|
|
|
|
static void LogicBlend(u32 srcClr, u32* dstClr, BlendMode::LogicOp op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case BlendMode::CLEAR:
|
|
*dstClr = 0;
|
|
break;
|
|
case BlendMode::AND:
|
|
*dstClr = srcClr & *dstClr;
|
|
break;
|
|
case BlendMode::AND_REVERSE:
|
|
*dstClr = srcClr & (~*dstClr);
|
|
break;
|
|
case BlendMode::COPY:
|
|
*dstClr = srcClr;
|
|
break;
|
|
case BlendMode::AND_INVERTED:
|
|
*dstClr = (~srcClr) & *dstClr;
|
|
break;
|
|
case BlendMode::NOOP:
|
|
// Do nothing
|
|
break;
|
|
case BlendMode::XOR:
|
|
*dstClr = srcClr ^ *dstClr;
|
|
break;
|
|
case BlendMode::OR:
|
|
*dstClr = srcClr | *dstClr;
|
|
break;
|
|
case BlendMode::NOR:
|
|
*dstClr = ~(srcClr | *dstClr);
|
|
break;
|
|
case BlendMode::EQUIV:
|
|
*dstClr = ~(srcClr ^ *dstClr);
|
|
break;
|
|
case BlendMode::INVERT:
|
|
*dstClr = ~*dstClr;
|
|
break;
|
|
case BlendMode::OR_REVERSE:
|
|
*dstClr = srcClr | (~*dstClr);
|
|
break;
|
|
case BlendMode::COPY_INVERTED:
|
|
*dstClr = ~srcClr;
|
|
break;
|
|
case BlendMode::OR_INVERTED:
|
|
*dstClr = (~srcClr) | *dstClr;
|
|
break;
|
|
case BlendMode::NAND:
|
|
*dstClr = ~(srcClr & *dstClr);
|
|
break;
|
|
case BlendMode::SET:
|
|
*dstClr = 0xffffffff;
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void SubtractBlend(u8* srcClr, u8* dstClr)
|
|
{
|
|
for (int i = 0; i < 4; i++)
|
|
{
|
|
int c = (int)dstClr[i] - (int)srcClr[i];
|
|
dstClr[i] = (c < 0) ? 0 : c;
|
|
}
|
|
}
|
|
|
|
static void Dither(u16 x, u16 y, u8* color)
|
|
{
|
|
// No blending for RGB8 mode
|
|
if (!bpmem.blendmode.dither || bpmem.zcontrol.pixel_format != PEControl::PixelFormat::RGBA6_Z24)
|
|
return;
|
|
|
|
// Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
|
|
static const u8 dither[2][2] = {{0, 2}, {3, 1}};
|
|
|
|
// Only the color channels are dithered?
|
|
for (int i = BLU_C; i <= RED_C; i++)
|
|
color[i] = ((color[i] - (color[i] >> 6)) + dither[y & 1][x & 1]) & 0xfc;
|
|
}
|
|
|
|
void BlendTev(u16 x, u16 y, u8* color)
|
|
{
|
|
const u32 offset = GetColorOffset(x, y);
|
|
u32 dstClr = GetPixelColor(offset);
|
|
|
|
u8* dstClrPtr = (u8*)&dstClr;
|
|
|
|
if (bpmem.blendmode.blendenable)
|
|
{
|
|
if (bpmem.blendmode.subtract)
|
|
SubtractBlend(color, dstClrPtr);
|
|
else
|
|
BlendColor(color, dstClrPtr);
|
|
}
|
|
else if (bpmem.blendmode.logicopenable)
|
|
{
|
|
LogicBlend(*((u32*)color), &dstClr, bpmem.blendmode.logicmode);
|
|
}
|
|
else
|
|
{
|
|
dstClrPtr = color;
|
|
}
|
|
|
|
if (bpmem.dstalpha.enable)
|
|
dstClrPtr[ALP_C] = bpmem.dstalpha.alpha;
|
|
|
|
if (bpmem.blendmode.colorupdate)
|
|
{
|
|
Dither(x, y, dstClrPtr);
|
|
if (bpmem.blendmode.alphaupdate)
|
|
SetPixelAlphaColor(offset, dstClrPtr);
|
|
else
|
|
SetPixelColorOnly(offset, dstClrPtr);
|
|
}
|
|
else if (bpmem.blendmode.alphaupdate)
|
|
{
|
|
SetPixelAlphaOnly(offset, dstClrPtr[ALP_C]);
|
|
}
|
|
}
|
|
|
|
void SetColor(u16 x, u16 y, u8* color)
|
|
{
|
|
u32 offset = GetColorOffset(x, y);
|
|
if (bpmem.blendmode.colorupdate)
|
|
{
|
|
if (bpmem.blendmode.alphaupdate)
|
|
SetPixelAlphaColor(offset, color);
|
|
else
|
|
SetPixelColorOnly(offset, color);
|
|
}
|
|
else if (bpmem.blendmode.alphaupdate)
|
|
{
|
|
SetPixelAlphaOnly(offset, color[ALP_C]);
|
|
}
|
|
}
|
|
|
|
void SetDepth(u16 x, u16 y, u32 depth)
|
|
{
|
|
if (bpmem.zmode.updateenable)
|
|
SetPixelDepth(GetDepthOffset(x, y), depth);
|
|
}
|
|
|
|
u32 GetColor(u16 x, u16 y)
|
|
{
|
|
u32 offset = GetColorOffset(x, y);
|
|
return GetPixelColor(offset);
|
|
}
|
|
|
|
static u32 VerticalFilter(const std::array<u32, 3>& colors,
|
|
const std::array<u8, 7>& filterCoefficients)
|
|
{
|
|
u8 in_colors[3][4];
|
|
std::memcpy(&in_colors, colors.data(), sizeof(in_colors));
|
|
|
|
// Alpha channel is not used
|
|
u8 out_color[4];
|
|
out_color[ALP_C] = 0;
|
|
|
|
// All Coefficients should sum to 64, otherwise the total brightness will change, which many games
|
|
// do on purpose to implement a brightness filter across the whole copy.
|
|
for (int i = BLU_C; i <= RED_C; i++)
|
|
{
|
|
// TODO: implement support for multisampling.
|
|
// In non-multisampling mode:
|
|
// * Coefficients 2, 3 and 4 sample from the current pixel.
|
|
// * Coefficients 0 and 1 sample from the pixel above this one
|
|
// * Coefficients 5 and 6 sample from the pixel below this one
|
|
int sum =
|
|
in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) +
|
|
in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) +
|
|
in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]);
|
|
|
|
// TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware.
|
|
out_color[i] = std::min(255, sum >> 6); // clamp larger values to 255
|
|
}
|
|
|
|
u32 out_color32;
|
|
std::memcpy(&out_color32, out_color, sizeof(out_color32));
|
|
return out_color32;
|
|
}
|
|
|
|
static u32 GammaCorrection(u32 color, const float gamma_rcp)
|
|
{
|
|
u8 in_colors[4];
|
|
std::memcpy(&in_colors, &color, sizeof(in_colors));
|
|
|
|
u8 out_color[4];
|
|
for (int i = BLU_C; i <= RED_C; i++)
|
|
{
|
|
out_color[i] = static_cast<u8>(
|
|
std::clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f));
|
|
}
|
|
|
|
u32 out_color32;
|
|
std::memcpy(&out_color32, out_color, sizeof(out_color32));
|
|
return out_color32;
|
|
}
|
|
|
|
// For internal used only, return a non-normalized value, which saves work later.
|
|
static yuv444 ConvertColorToYUV(u32 color)
|
|
{
|
|
const u8 red = static_cast<u8>(color >> 24);
|
|
const u8 green = static_cast<u8>(color >> 16);
|
|
const u8 blue = static_cast<u8>(color >> 8);
|
|
|
|
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
|
|
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
|
return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
|
|
static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
|
|
static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
|
|
}
|
|
|
|
u32 GetDepth(u16 x, u16 y)
|
|
{
|
|
u32 offset = GetDepthOffset(x, y);
|
|
return GetPixelDepth(offset);
|
|
}
|
|
|
|
u8* GetPixelPointer(u16 x, u16 y, bool depth)
|
|
{
|
|
if (depth)
|
|
return &efb[GetDepthOffset(x, y)];
|
|
return &efb[GetColorOffset(x, y)];
|
|
}
|
|
|
|
void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const MathUtil::Rectangle<int>& source_rect,
|
|
float y_scale, float gamma)
|
|
{
|
|
if (!xfb_in_ram)
|
|
{
|
|
WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
|
|
return;
|
|
}
|
|
|
|
const int left = source_rect.left;
|
|
const int right = source_rect.right;
|
|
const bool clamp_top = bpmem.triggerEFBCopy.clamp_top;
|
|
const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom;
|
|
const float gamma_rcp = 1.0f / gamma;
|
|
const auto filter_coefficients = bpmem.copyfilter.GetCoefficients();
|
|
|
|
// this assumes copies will always start on an even (YU) pixel and the
|
|
// copy always has an even width, which might not be true.
|
|
if (left & 1 || right & 1)
|
|
{
|
|
WARN_LOG(VIDEO, "Trying to copy XFB to from unaligned EFB source");
|
|
// this will show up as wrongly encoded
|
|
}
|
|
|
|
// Scanline buffer, leave room for borders
|
|
yuv444 scanline[EFB_WIDTH + 2];
|
|
|
|
static std::vector<yuv422_packed> source;
|
|
source.resize(EFB_WIDTH * EFB_HEIGHT);
|
|
yuv422_packed* src_ptr = &source[0];
|
|
|
|
for (int y = source_rect.top; y < source_rect.bottom; y++)
|
|
{
|
|
// Clamping behavior
|
|
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
|
|
// which returns random garbage from the empty bus (confirmed by hardware tests).
|
|
//
|
|
// In our implementation, the garbage just so happens to be the top or bottom row.
|
|
// Statistically, that could happen.
|
|
const u16 y_prev = static_cast<u16>(std::max(clamp_top ? source_rect.top : 0, y - 1));
|
|
const u16 y_next =
|
|
static_cast<u16>(std::min<int>(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1));
|
|
|
|
// Get a scanline of YUV pixels in 4:4:4 format
|
|
for (int i = 1, x = left; x < right; i++, x++)
|
|
{
|
|
// Get RGB colors
|
|
std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};
|
|
|
|
// Vertical Filter (Multisampling resolve, deflicker, brightness)
|
|
u32 filtered = VerticalFilter(colors, filter_coefficients);
|
|
|
|
// Gamma correction happens here.
|
|
filtered = GammaCorrection(filtered, gamma_rcp);
|
|
|
|
scanline[i] = ConvertColorToYUV(filtered);
|
|
}
|
|
|
|
// Flipper clamps the border colors
|
|
scanline[0] = scanline[1];
|
|
scanline[right + 1] = scanline[right];
|
|
|
|
// And Downsample them to 4:2:2
|
|
for (int i = 1, x = left; x < right; i += 2, x += 2)
|
|
{
|
|
// YU pixel
|
|
src_ptr[x].Y = scanline[i].Y + 16;
|
|
// we mix our color differences in 10 bit space so it will round more accurately
|
|
// U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 * U[i+1]
|
|
src_ptr[x].UV = 128 + ((scanline[i - 1].U + (scanline[i].U << 1) + scanline[i + 1].U) >> 2);
|
|
|
|
// YV pixel
|
|
src_ptr[x + 1].Y = scanline[i + 1].Y + 16;
|
|
// V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
|
|
src_ptr[x + 1].UV =
|
|
128 + ((scanline[i - 1].V + (scanline[i].V << 1) + scanline[i + 1].V) >> 2);
|
|
}
|
|
src_ptr += memory_stride;
|
|
}
|
|
|
|
auto dest_rect =
|
|
MathUtil::Rectangle<int>{source_rect.left, source_rect.top, source_rect.right,
|
|
static_cast<int>(static_cast<float>(source_rect.bottom) * y_scale)};
|
|
|
|
const std::size_t destination_size = dest_rect.GetWidth() * dest_rect.GetHeight() * 2;
|
|
static std::vector<yuv422_packed> destination;
|
|
destination.resize(dest_rect.GetWidth() * dest_rect.GetHeight());
|
|
|
|
SW::CopyRegion(source.data(), source_rect, destination.data(), dest_rect);
|
|
|
|
memcpy(xfb_in_ram, destination.data(), destination_size);
|
|
}
|
|
|
|
bool ZCompare(u16 x, u16 y, u32 z)
|
|
{
|
|
u32 offset = GetDepthOffset(x, y);
|
|
u32 depth = GetPixelDepth(offset);
|
|
|
|
bool pass;
|
|
|
|
switch (bpmem.zmode.func)
|
|
{
|
|
case ZMode::NEVER:
|
|
pass = false;
|
|
break;
|
|
case ZMode::LESS:
|
|
pass = z < depth;
|
|
break;
|
|
case ZMode::EQUAL:
|
|
pass = z == depth;
|
|
break;
|
|
case ZMode::LEQUAL:
|
|
pass = z <= depth;
|
|
break;
|
|
case ZMode::GREATER:
|
|
pass = z > depth;
|
|
break;
|
|
case ZMode::NEQUAL:
|
|
pass = z != depth;
|
|
break;
|
|
case ZMode::GEQUAL:
|
|
pass = z >= depth;
|
|
break;
|
|
case ZMode::ALWAYS:
|
|
pass = true;
|
|
break;
|
|
default:
|
|
pass = false;
|
|
ERROR_LOG(VIDEO, "Bad Z compare mode %i", (int)bpmem.zmode.func);
|
|
}
|
|
|
|
if (pass && bpmem.zmode.updateenable)
|
|
{
|
|
SetPixelDepth(offset, z);
|
|
}
|
|
|
|
return pass;
|
|
}
|
|
|
|
u32 GetPerfQueryResult(PerfQueryType type)
|
|
{
|
|
return perf_values[type];
|
|
}
|
|
|
|
void ResetPerfQuery()
|
|
{
|
|
perf_values = {};
|
|
}
|
|
|
|
void IncPerfCounterQuadCount(PerfQueryType type)
|
|
{
|
|
// NOTE: hardware doesn't process individual pixels but quads instead.
|
|
// Current software renderer architecture works on pixels though, so
|
|
// we have this "quad" hack here to only increment the registers on
|
|
// every fourth rendered pixel
|
|
static u32 quad[PQ_NUM_MEMBERS];
|
|
if (++quad[type] != 3)
|
|
return;
|
|
quad[type] = 0;
|
|
++perf_values[type];
|
|
}
|
|
} // namespace EfbInterface
|