Merge pull request #2957 from phire/unify_efbcopy

Cleanup and unify efb copy implemtations into VideoCommon
This commit is contained in:
Scott Mansell 2015-09-07 00:10:42 +12:00
commit c08a83a5aa
17 changed files with 180 additions and 285 deletions

View File

@ -87,55 +87,21 @@ void PSTextureEncoder::Shutdown()
SAFE_RELEASE(m_out);
}
size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
void PSTextureEncoder::Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf)
{
if (!m_ready) // Make sure we initialized OK
return 0;
// Clamp srcRect to 640x528. BPS: The Strike tries to encode an 800x600
// texture, which is invalid.
EFBRectangle correctSrc = srcRect;
correctSrc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT);
// Validate source rect size
if (correctSrc.GetWidth() <= 0 || correctSrc.GetHeight() <= 0)
return 0;
return;
HRESULT hr;
unsigned int blockW = BLOCK_WIDTHS[dstFormat];
unsigned int blockH = BLOCK_HEIGHTS[dstFormat];
// Round up source dims to multiple of block size
unsigned int actualWidth = correctSrc.GetWidth() / (scaleByHalf ? 2 : 1);
actualWidth = (actualWidth + blockW-1) & ~(blockW-1);
unsigned int actualHeight = correctSrc.GetHeight() / (scaleByHalf ? 2 : 1);
actualHeight = (actualHeight + blockH-1) & ~(blockH-1);
unsigned int numBlocksX = actualWidth/blockW;
unsigned int numBlocksY = actualHeight/blockH;
unsigned int cacheLinesPerRow;
if (dstFormat == 0x6) // RGBA takes two cache lines per block; all others take one
cacheLinesPerRow = numBlocksX*2;
else
cacheLinesPerRow = numBlocksX;
_assert_msg_(VIDEO, cacheLinesPerRow*32 <= MAX_BYTES_PER_BLOCK_ROW, "cache lines per row sanity check");
unsigned int totalCacheLines = cacheLinesPerRow * numBlocksY;
_assert_msg_(VIDEO, totalCacheLines*32 <= MAX_BYTES_PER_ENCODE, "total encode size sanity check");
size_t encodeSize = 0;
// Reset API
g_renderer->ResetAPIState();
// Set up all the state for EFB encoding
{
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(cacheLinesPerRow * 8), FLOAT(numBlocksY));
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(texture_entry->CacheLinesPerRow() * 8), FLOAT(texture_entry->NumBlocksY()));
D3D::context->RSSetViewports(1, &vp);
EFBRectangle fullSrcRect;
@ -155,9 +121,9 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
EFBEncodeParams params;
params.SrcLeft = correctSrc.left;
params.SrcTop = correctSrc.top;
params.DestWidth = actualWidth;
params.SrcLeft = srcRect.left;
params.SrcTop = srcRect.top;
params.DestWidth = texture_entry->native_width;
params.ScaleFactor = scaleByHalf ? 2 : 1;
D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(m_encodeParams);
@ -172,12 +138,12 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
targetRect.AsRECT(),
Renderer::GetTargetWidth(),
Renderer::GetTargetHeight(),
SetStaticShader(dstFormat, srcFormat, isIntensity, scaleByHalf),
SetStaticShader(texture_entry->format, srcFormat, isIntensity, scaleByHalf),
VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout());
// Copy to staging buffer
D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, cacheLinesPerRow * 8, numBlocksY, 1);
D3D11_BOX srcBox = CD3D11_BOX(0, 0, 0, texture_entry->CacheLinesPerRow() * 8, texture_entry->NumBlocksY(), 1);
D3D::context->CopySubresourceRegion(m_outStage, 0, 0, 0, 0, m_out, 0, &srcBox);
// Transfer staging buffer to GameCube/Wii RAM
@ -186,16 +152,14 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
CHECK(SUCCEEDED(hr), "map staging buffer (0x%x)", hr);
u8* src = (u8*)map.pData;
for (unsigned int y = 0; y < numBlocksY; ++y)
for (unsigned int y = 0; y < texture_entry->NumBlocksY(); ++y)
{
memcpy(dst, src, cacheLinesPerRow*32);
dst += bpmem.copyMipMapStrideChannels*32;
memcpy(dst, src, texture_entry->CacheLinesPerRow() * 32);
dst += texture_entry->memory_stride;
src += map.RowPitch;
}
D3D::context->Unmap(m_outStage, 0);
encodeSize = bpmem.copyMipMapStrideChannels*32 * numBlocksY;
}
// Restore API
@ -203,8 +167,6 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
D3D::context->OMSetRenderTargets(1,
&FramebufferManager::GetEFBColorTexture()->GetRTV(),
FramebufferManager::GetEFBDepthTexture()->GetDSV());
return encodeSize;
}
ID3D11PixelShader* PSTextureEncoder::SetStaticShader(unsigned int dstFormat, PEControl::PixelFormat srcFormat,

View File

@ -6,6 +6,8 @@
#include "VideoBackends/D3D/TextureEncoder.h"
#include "VideoCommon/TextureCacheBase.h"
struct ID3D11Texture2D;
struct ID3D11RenderTargetView;
struct ID3D11Buffer;
@ -29,7 +31,7 @@ public:
void Init();
void Shutdown();
size_t Encode(u8* dst, unsigned int dstFormat,
void Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf);

View File

@ -2,7 +2,6 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Core/HW/Memmap.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DShader.h"
#include "VideoBackends/D3D/D3DState.h"
@ -185,7 +184,7 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf
}
}
void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat)
@ -226,10 +225,13 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
// Create texture copy
D3D::drawShadedTexQuad(
(srcFormat == PEControl::Z24) ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : FramebufferManager::GetEFBColorTexture()->GetSRV(),
&sourcerect, Renderer::GetTargetWidth(), Renderer::GetTargetHeight(),
(srcFormat == PEControl::Z24) ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true),
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader());
(srcFormat == PEControl::Z24 ? FramebufferManager::GetEFBDepthTexture() : FramebufferManager::GetEFBColorTexture())->GetSRV(),
&sourcerect, Renderer::GetTargetWidth(),
Renderer::GetTargetHeight(),
srcFormat == PEControl::Z24 ? PixelShaderCache::GetDepthMatrixProgram(true) : PixelShaderCache::GetColorMatrixProgram(true),
VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
@ -237,14 +239,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
if (!g_ActiveConfig.bSkipEFBCopyToRam)
{
u8* dst = Memory::GetPointer(dstAddr);
size_t encoded_size = g_encoder->Encode(dst, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf);
size_in_bytes = (u32)encoded_size;
TextureCache::MakeRangeDynamic(dstAddr, (u32)encoded_size);
this->hash = GetHash64(dst, (int)encoded_size, g_ActiveConfig.iSafeTextureCache_ColorSamples);
g_encoder->Encode(dst, this, srcFormat, srcRect, isIntensity, scaleByHalf);
}
}
@ -342,7 +337,7 @@ void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* uncon
D3D::stateman->SetTexture(1, palette_buf_srv);
// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
float params[4] = { unconverted->format == 0 ? 15.f : 255.f };
float params[4] = { (unconverted->format & 0xf) == 0 ? 15.f : 255.f };
D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(palette_uniform);

View File

@ -34,7 +34,7 @@ private:
void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int levels) override;
void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
void FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat) override;

View File

@ -5,96 +5,11 @@
#pragma once
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"
namespace DX11
{
// 4-bit format: 8x8 texels / cache line
// 8-bit format: 8x4 texels / cache line
// 16-bit format: 4x4 texels / cache line
// 32-bit format: 4x4 texels / 2 cache lines
// Compressed format: 8x8 texels / cache line
// Document EFB encoding formats here with examples of where they are used.
// Format: 0 - R4
// Used in The Legend of Zelda: The Wind Waker for character shadows (srcFormat 1,
// isIntensity 1, scaleByHalf 1).
// Format: 1 - R8
// FIXME: Unseen. May or may not be a duplicate of format 8.
// Format: 2 - A4 R4
// FIXME: Unseen.
// Format: 3 - A8 R8
// FIXME: Unseen.
// Format: 4 - R5 G6 B5
// Used in Wind Waker for most render-to-texture effects like heat shimmer and
// depth-of-field.
// Format: 5 - 1 R5 G5 B5 or 0 A3 R4 G4 B4
// Used in Twilight Princess for character shadows.
// Format: 6 - A8 R8 A8 R8 | G8 B8 G8 B8
// Used in Twilight Princess for bloom effect.
// Format: 7 - A8
// Used in Metroid Prime 2 for the scan visor.
// Format: 8 - R8
// Used in Twilight Princess for the map.
// Format: 9 - G8
// FIXME: Unseen.
// Format: A - B8
// Used in Metroid Prime 2 for the scan visor.
// Format: B - G8 R8
// Used in Wind Waker for depth-of-field. Usually used with srcFormat 3 to
// render depth textures. The bytes are swapped, so games have to correct it
// in RAM before using it as a texture.
// Format: C - B8 G8
// FIXME: Unseen.
const unsigned int BLOCK_WIDTHS[16] = {
8, // R4
8, // R8 (FIXME: duplicate of R8 below?)
8, // A4 R4
4, // A8 R8
4, // R5 G6 B5
4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4
4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines)
8, // A8
8, // R8 (FIXME: duplicate of R8 above?)
8, // G8
8, // B8
4, // G8 R8
4, // B8 G8
0, 0, 0 // Unknown formats
};
const unsigned int BLOCK_HEIGHTS[16] = {
8, // R4
4, // R8 (FIXME: duplicate of R8 below?)
4, // A4 R4
4, // A8 R8
4, // R5 G6 B5
4, // 1 R5 G5 B5 or 0 A3 R4 G4 B4
4, // A8 R8 A8 R8 | G8 B8 G8 B8 (two cache lines)
4, // A8
4, // R8 (FIXME: duplicate of R8 above?)
4, // G8
4, // B8
4, // G8 R8
4, // B8 G8
0, 0, 0 // Unknown formats
};
// Maximum number of bytes that can occur in a texture block-row generated by
// the encoder
static const UINT MAX_BYTES_PER_BLOCK_ROW = (EFB_WIDTH/4)*64;
@ -111,7 +26,7 @@ public:
virtual void Init() = 0;
virtual void Shutdown() = 0;
// Returns size in bytes of encoded block of memory
virtual size_t Encode(u8* dst, unsigned int dstFormat,
virtual void Encode(u8* dst, const TextureCache::TCacheEntryBase *texture_entry,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf) = 0;

View File

@ -213,7 +213,7 @@ void TextureCache::TCacheEntry::Load(unsigned int width, unsigned int height,
TextureCache::SetStage();
}
void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
void TextureCache::TCacheEntry::FromRenderTarget(u8* dstPointer, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat)
@ -264,24 +264,14 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
if (!g_ActiveConfig.bSkipEFBCopyToRam)
{
int encoded_size = TextureConverter::EncodeToRamFromTexture(
dstAddr,
TextureConverter::EncodeToRamFromTexture(
dstPointer,
this,
read_texture,
srcFormat == PEControl::Z24,
isIntensity,
dstFormat,
scaleByHalf,
srcRect,
copyMipMapStrideChannels * 32);
u8* dst = Memory::GetPointer(dstAddr);
u64 const new_hash = GetHash64(dst,encoded_size,g_ActiveConfig.iSafeTextureCache_ColorSamples);
size_in_bytes = (u32)encoded_size;
TextureCache::MakeRangeDynamic(dstAddr, encoded_size);
hash = new_hash;
srcRect);
}
FramebufferManager::SetFramebuffer(0);
@ -570,7 +560,7 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
memcpy(buffer.first, palette, size);
s_palette_stream_buffer->Unmap(size);
glUniform1i(s_palette_buffer_offset_uniform[format], buffer.second / 2);
glUniform1f(s_palette_multiplier_uniform[format], unconverted->format == 0 ? 15.0f : 255.0f);
glUniform1f(s_palette_multiplier_uniform[format], (unconverted->format & 0xf) == 0 ? 15.0f : 255.0f);
glUniform4f(s_palette_copy_position_uniform[format], 0.0f, 0.0f, (float)unconverted->config.width, (float)unconverted->config.height);
glActiveTexture(GL_TEXTURE10);

View File

@ -41,7 +41,7 @@ private:
void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level) override;
void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
void FromRenderTarget(u8 *dst, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat) override;

View File

@ -268,59 +268,18 @@ static void EncodeToRamUsingShader(GLuint srcTexture,
}
}
int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source, u32 writeStride)
void EncodeToRamFromTexture(u8 *dest_ptr, const TextureCache::TCacheEntryBase *texture_entry,
GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, int bScaleByHalf, const EFBRectangle& source)
{
u32 format = copyfmt;
if (bFromZBuffer)
{
format |= _GX_TF_ZTF;
if (copyfmt == 11)
format = GX_TF_Z16;
else if (format < GX_TF_Z8 || format > GX_TF_Z24X8)
format |= _GX_TF_CTF;
}
else
{
if (copyfmt > GX_TF_RGBA8 || (copyfmt < GX_TF_RGB565 && !bIsIntensityFmt))
format |= _GX_TF_CTF;
}
SHADER& texconv_shader = GetOrCreateEncodingShader(format);
u8 *dest_ptr = Memory::GetPointer(address);
int width = (source.right - source.left) >> bScaleByHalf;
int height = (source.bottom - source.top) >> bScaleByHalf;
int size_in_bytes = TexDecoder_GetTextureSizeInBytes(width, height, format);
u16 blkW = TexDecoder_GetBlockWidthInTexels(format) - 1;
u16 blkH = TexDecoder_GetBlockHeightInTexels(format) - 1;
// only copy on cache line boundaries
// extra pixels are copied but not displayed in the resulting texture
s32 expandedWidth = (width + blkW) & (~blkW);
s32 expandedHeight = (height + blkH) & (~blkH);
SHADER& texconv_shader = GetOrCreateEncodingShader(texture_entry->format);
texconv_shader.Bind();
glUniform4i(s_encodingUniforms[format],
source.left, source.top,
expandedWidth, bScaleByHalf ? 2 : 1);
unsigned int numBlocksX = expandedWidth / TexDecoder_GetBlockWidthInTexels(format);
unsigned int numBlocksY = expandedHeight / TexDecoder_GetBlockHeightInTexels(format);
unsigned int cacheLinesPerRow;
if ((format & 0x0f) == 6)
cacheLinesPerRow = numBlocksX * 2;
else
cacheLinesPerRow = numBlocksX;
glUniform4i(s_encodingUniforms[texture_entry->format],
source.left, source.top, texture_entry->native_width, bScaleByHalf ? 2 : 1);
EncodeToRamUsingShader(source_texture,
dest_ptr, cacheLinesPerRow * 32, numBlocksY,
writeStride, bScaleByHalf > 0 && !bFromZBuffer);
return size_in_bytes; // TODO: D3D11 is calculating this value differently!
dest_ptr, texture_entry->CacheLinesPerRow() * 32, texture_entry->NumBlocksY(),
texture_entry->memory_stride, bScaleByHalf > 0 && !bFromZBuffer);
}
void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, u32 dstWidth, u32 dstStride, u32 dstHeight)

View File

@ -5,6 +5,7 @@
#pragma once
#include "VideoBackends/OGL/GLUtil.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VideoCommon.h"
namespace OGL
@ -24,7 +25,8 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc,
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture);
// returns size of the encoded data (in bytes)
int EncodeToRamFromTexture(u32 address, GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, int bScaleByHalf, const EFBRectangle& source, u32 writeStride);
void EncodeToRamFromTexture(u8 *dest_ptr, const TextureCache::TCacheEntryBase *texture_entry,
GLuint source_texture, bool bFromZBuffer, bool bIsIntensityFmt, int bScaleByHalf, const EFBRectangle& source);
}

View File

@ -9,7 +9,6 @@
#include "VideoCommon/BPFunctions.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
@ -85,15 +84,6 @@ void SetColorMask()
g_renderer->SetColorMask();
}
void CopyEFB(u32 dstAddr, const EFBRectangle& srcRect,
unsigned int dstFormat, PEControl::PixelFormat srcFormat,
bool isIntensity, bool scaleByHalf)
{
// bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer (Zbuffer uses 24-bit Format)
TextureCache::CopyRenderTargetToTexture(dstAddr, dstFormat, srcFormat,
srcRect, isIntensity, scaleByHalf);
}
/* Explanation of the magic behind ClearScreen:
There's numerous possible formats for the pixel data in the EFB.
However, in the HW accelerated backends we're always using RGBA8

View File

@ -23,9 +23,6 @@ void SetBlendMode();
void SetDitherMode();
void SetLogicOpMode();
void SetColorMask();
void CopyEFB(u32 dstAddr, const EFBRectangle& srcRect,
unsigned int dstFormat, PEControl::PixelFormat srcFormat,
bool isIntensity, bool scaleByHalf);
void ClearScreen(const EFBRectangle &rc);
void OnPixelFormatChange();
void SetInterlacingMode(const BPCmd &bp);

View File

@ -20,6 +20,7 @@
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoCommon.h"
@ -205,6 +206,7 @@ static void BPWritten(const BPCmd& bp)
// The values in bpmem.copyTexSrcXY and bpmem.copyTexSrcWH are updated in case 0x49 and 0x4a in this function
u32 destAddr = bpmem.copyTexDest << 5;
u32 destStride = bpmem.copyMipMapStrideChannels << 5;
EFBRectangle srcRect;
srcRect.left = (int)bpmem.copyTexSrcXY.x;
@ -223,8 +225,9 @@ static void BPWritten(const BPCmd& bp)
if (g_ActiveConfig.bShowEFBCopyRegions)
stats.efb_regions.push_back(srcRect);
CopyEFB(destAddr, srcRect,
PE_copy.tp_realFormat(), bpmem.zcontrol.pixel_format,
// bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer (Zbuffer uses 24-bit Format)
TextureCache::CopyRenderTargetToTexture(destAddr, PE_copy.tp_realFormat(), destStride,
bpmem.zcontrol.pixel_format, srcRect,
!!PE_copy.intensity_fmt, !!PE_copy.half_scale);
}
else
@ -251,10 +254,9 @@ static void BPWritten(const BPCmd& bp)
height = MAX_XFB_HEIGHT;
}
u32 stride = bpmem.copyMipMapStrideChannels << 5;
DEBUG_LOG(VIDEO, "RenderToXFB: destAddr: %08x | srcRect {%d %d %d %d} | fbWidth: %u | fbStride: %u | fbHeight: %u",
destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, bpmem.copyTexSrcWH.x + 1, stride, height);
Renderer::RenderToXFB(destAddr, srcRect, stride, height, s_gammaLUT[PE_copy.gamma]);
destAddr, srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, bpmem.copyTexSrcWH.x + 1, destStride, height);
Renderer::RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]);
}
// Clear the rectangular region after copying it.

View File

@ -182,24 +182,6 @@ void TextureCache::Cleanup(int _frameCount)
}
}
void TextureCache::MakeRangeDynamic(u32 start_address, u32 size)
{
TexCache::iterator
iter = textures_by_address.begin();
while (iter != textures_by_address.end())
{
if (iter->second->OverlapsMemoryRange(start_address, size))
{
iter = FreeTexture(iter);
}
else
{
++iter;
}
}
}
bool TextureCache::TCacheEntryBase::OverlapsMemoryRange(u32 range_address, u32 range_size) const
{
if (addr + size_in_bytes <= range_address)
@ -243,7 +225,7 @@ TextureCache::TCacheEntryBase* TextureCache::DoPartialTextureUpdates(TexCache::i
&& entry_to_update->addr <= entry->addr
&& entry->addr + entry->size_in_bytes <= entry_to_update->addr + entry_to_update->size_in_bytes
&& entry->frameCount == FRAMECOUNT_INVALID
&& entry->copyMipMapStrideChannels * 32 == numBlocksX * block_size)
&& entry->memory_stride == numBlocksX * block_size)
{
u32 block_offset = (entry->addr - entry_to_update->addr) / block_size;
u32 block_x = block_offset % numBlocksX;
@ -372,11 +354,11 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage)
return nullptr;
// TexelSizeInNibbles(format) * width * height / 16;
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat) - 1;
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat) - 1;
const unsigned int bsw = TexDecoder_GetBlockWidthInTexels(texformat);
const unsigned int bsh = TexDecoder_GetBlockHeightInTexels(texformat);
unsigned int expandedWidth = (width + bsw) & (~bsw);
unsigned int expandedHeight = (height + bsh) & (~bsh);
unsigned int expandedWidth = ROUND_UP(width, bsw);
unsigned int expandedHeight = ROUND_UP(height, bsh);
const unsigned int nativeW = width;
const unsigned int nativeH = height;
@ -668,8 +650,8 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage)
{
const u32 mip_width = CalculateLevelSize(width, level);
const u32 mip_height = CalculateLevelSize(height, level);
const u32 expanded_mip_width = (mip_width + bsw) & (~bsw);
const u32 expanded_mip_height = (mip_height + bsh) & (~bsh);
const u32 expanded_mip_width = ROUND_UP(mip_width, bsw);
const u32 expanded_mip_height = ROUND_UP(mip_height, bsh);
const u8*& mip_src_data = from_tmem
? ((level % 2) ? ptr_odd : ptr_even)
@ -693,7 +675,7 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage)
return ReturnEntry(stage, entry);
}
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat,
void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, PEControl::PixelFormat srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf)
{
// Emulation methods:
@ -753,9 +735,11 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
case 0: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 0;
dstFormat |= _GX_TF_CTF;
break;
case 8: // Z8H
dstFormat |= _GX_TF_CTF;
case 1: // Z8
case 8: // Z8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 1;
break;
@ -768,6 +752,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
case 11: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 3;
dstFormat |= _GX_TF_CTF;
break;
case 6: // Z24X8
@ -778,11 +763,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 5;
dstFormat |= _GX_TF_CTF;
break;
case 10: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 6;
dstFormat |= _GX_TF_CTF;
break;
case 12: // Z16L - copy lower 16 depth bits
@ -790,6 +777,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
// Used e.g. in Zelda: Skyward Sword
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 7;
dstFormat |= _GX_TF_CTF;
break;
default:
@ -798,6 +786,8 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
cbufid = 8;
break;
}
dstFormat |= _GX_TF_ZTF;
}
else if (isIntensity)
{
@ -862,11 +852,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
ColorMask[0] = 15.0f;
ColorMask[4] = 1.0f / 15.0f;
cbufid = 14;
dstFormat |= _GX_TF_CTF;
break;
case 1: // R8
case 8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
cbufid = 15;
dstFormat |= _GX_TF_CTF;
break;
case 2: // RA4
@ -881,6 +873,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
fConstAdd[3] = 1.0f;
cbufid = 17;
}
dstFormat |= _GX_TF_CTF;
break;
case 3: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
@ -892,6 +885,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
fConstAdd[3] = 1.0f;
cbufid = 19;
}
dstFormat |= _GX_TF_CTF;
break;
case 7: // A8
@ -907,25 +901,30 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
fConstAdd[3] = 1.0f;
cbufid = 21;
}
dstFormat |= _GX_TF_CTF;
break;
case 9: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 22;
dstFormat |= _GX_TF_CTF;
break;
case 10: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 23;
dstFormat |= _GX_TF_CTF;
break;
case 11: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 24;
dstFormat |= _GX_TF_CTF;
break;
case 12: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 25;
dstFormat |= _GX_TF_CTF;
break;
case 4: // RGB565
@ -973,6 +972,13 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
}
}
u8* dst = Memory::GetPointer(dstAddr);
if (dst == nullptr)
{
ERROR_LOG(VIDEO, "Trying to copy from EFB to invalid address 0x%8x", dstAddr);
return;
}
const unsigned int tex_w = scaleByHalf ? srcRect.GetWidth() / 2 : srcRect.GetWidth();
const unsigned int tex_h = scaleByHalf ? srcRect.GetHeight() / 2 : srcRect.GetHeight();
@ -996,17 +1002,36 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat
TCacheEntryBase* entry = AllocateTexture(config);
// TODO: Using the wrong dstFormat, dumb...
entry->SetGeneralParameters(dstAddr, 0, dstFormat);
entry->SetDimensions(tex_w, tex_h, 1);
entry->SetHashes(TEXHASH_INVALID);
entry->frameCount = FRAMECOUNT_INVALID;
entry->is_efb_copy = true;
entry->SetEfbCopy(dstStride);
entry->is_custom_tex = false;
entry->copyMipMapStrideChannels = bpmem.copyMipMapStrideChannels;
entry->FromRenderTarget(dstAddr, dstFormat, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);
entry->FromRenderTarget(dst, dstFormat, dstStride, srcFormat, srcRect, isIntensity, scaleByHalf, cbufid, colmat);
if (!g_ActiveConfig.bSkipEFBCopyToRam)
{
entry->hash = GetHash64(dst, (int)entry->size_in_bytes, g_ActiveConfig.iSafeTextureCache_ColorSamples);
// Invalidate all textures that overlap the range of our texture
TexCache::iterator
iter = textures_by_address.begin();
while (iter != textures_by_address.end())
{
if (iter->second->OverlapsMemoryRange(dstAddr, entry->size_in_bytes))
{
iter = FreeTexture(iter);
}
else
{
++iter;
}
}
}
if (g_ActiveConfig.bDumpEFBTarget)
{
@ -1052,3 +1077,36 @@ TextureCache::TexCache::iterator TextureCache::FreeTexture(TexCache::iterator it
return textures_by_address.erase(iter);
}
u32 TextureCache::TCacheEntryBase::CacheLinesPerRow() const
{
u32 blockW = TexDecoder_GetBlockWidthInTexels(format);
// Round up source height to multiple of block size
u32 actualWidth = ROUND_UP(native_width, blockW);
u32 numBlocksX = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one
if (format == GX_TF_RGBA8)
numBlocksX = numBlocksX * 2;
return numBlocksX;
}
u32 TextureCache::TCacheEntryBase::NumBlocksY() const
{
u32 blockH = TexDecoder_GetBlockHeightInTexels(format);
// Round up source height to multiple of block size
u32 actualHeight = ROUND_UP(native_height, blockH);
return actualHeight / blockH;
}
void TextureCache::TCacheEntryBase::SetEfbCopy(u32 stride)
{
is_efb_copy = true;
memory_stride = stride;
_assert_msg_(VIDEO, memory_stride >= CacheLinesPerRow(), "Memory stride is too small");
size_in_bytes = memory_stride * NumBlocksY();
}

View File

@ -53,7 +53,7 @@ public:
u32 format;
bool is_efb_copy;
bool is_custom_tex;
u32 copyMipMapStrideChannels;
u32 memory_stride;
unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view
unsigned int native_levels;
@ -76,6 +76,7 @@ public:
native_width = _native_width;
native_height = _native_height;
native_levels = _native_levels;
memory_stride = _native_width;
}
void SetHashes(u64 _hash)
@ -83,6 +84,8 @@ public:
hash = _hash;
}
void SetEfbCopy(u32 stride);
TCacheEntryBase(const TCacheEntryConfig& c) : config(c) {}
virtual ~TCacheEntryBase();
@ -96,7 +99,7 @@ public:
virtual void Load(unsigned int width, unsigned int height,
unsigned int expanded_width, unsigned int level) = 0;
virtual void FromRenderTarget(u32 dstAddr, unsigned int dstFormat,
virtual void FromRenderTarget(u8* dst, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect,
bool isIntensity, bool scaleByHalf, unsigned int cbufid,
const float *colmat) = 0;
@ -104,6 +107,11 @@ public:
bool OverlapsMemoryRange(u32 range_address, u32 range_size) const;
bool IsEfbCopy() const { return is_efb_copy; }
u32 NumBlocksY() const;
u32 CacheLinesPerRow() const;
void Memset(u8* ptr, u32 tag);
};
virtual ~TextureCache(); // needs virtual for DX11 dtor
@ -115,7 +123,6 @@ public:
static void Cleanup(int _frameCount);
static void Invalidate();
static void MakeRangeDynamic(u32 start_address, u32 size);
virtual TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) = 0;
@ -125,8 +132,8 @@ public:
static TCacheEntryBase* Load(const u32 stage);
static void UnbindTextures();
static void BindTextures();
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, PEControl::PixelFormat srcFormat,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf);
static void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride,
PEControl::PixelFormat srcFormat, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf);
static void RequestInvalidateTextureCache();

View File

@ -650,9 +650,11 @@ const char *GenerateEncodingShader(u32 format,API_TYPE ApiType)
case GX_CTF_GB8:
WriteCC8Encoder(p, "gb", ApiType);
break;
case GX_CTF_Z8H:
case GX_TF_Z8:
WriteC8Encoder(p, "r", ApiType);
break;
case GX_CTF_Z16R:
case GX_TF_Z16:
WriteZ16Encoder(p, ApiType);
break;

View File

@ -16,6 +16,7 @@ extern GC_ALIGNED16(u8 texMem[TMEM_SIZE]);
enum TextureFormat
{
// These are the texture formats that can be read by the texture mapper.
GX_TF_I4 = 0x0,
GX_TF_I8 = 0x1,
GX_TF_IA4 = 0x2,
@ -28,14 +29,21 @@ enum TextureFormat
GX_TF_C14X2 = 0xA,
GX_TF_CMPR = 0xE,
_GX_TF_CTF = 0x20, // copy-texture-format only (simply means linear?)
_GX_TF_ZTF = 0x10, // Z-texture-format
_GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin)
// these formats are also valid when copying targets
// Depth texture formats (which directly map to the equivalent colour format above.)
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
_GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin)
// These are extra formats that can be used when copying from efb,
// they use one of texel formats from above, but pack diffrent data into them.
GX_CTF_R4 = 0x0 | _GX_TF_CTF,
GX_CTF_RA4 = 0x2 | _GX_TF_CTF,
GX_CTF_RA8 = 0x3 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no commercial games use it
GX_CTF_A8 = 0x7 | _GX_TF_CTF,
GX_CTF_R8 = 0x8 | _GX_TF_CTF,
GX_CTF_G8 = 0x9 | _GX_TF_CTF,
@ -43,13 +51,12 @@ enum TextureFormat
GX_CTF_RG8 = 0xB | _GX_TF_CTF,
GX_CTF_GB8 = 0xC | _GX_TF_CTF,
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
// extra depth texture formats that can be used for efb copies.
GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8
GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16
GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF,
};

View File

@ -35,7 +35,6 @@ int TexDecoder_GetTexelSizeInNibbles(int format)
case GX_CTF_R4: return 1;
case GX_CTF_RA4: return 2;
case GX_CTF_RA8: return 4;
case GX_CTF_YUVA8: return 8;
case GX_CTF_A8: return 2;
case GX_CTF_R8: return 2;
case GX_CTF_G8: return 2;
@ -48,10 +47,14 @@ int TexDecoder_GetTexelSizeInNibbles(int format)
case GX_TF_Z24X8: return 8;
case GX_CTF_Z4: return 1;
case GX_CTF_Z8H: return 2;
case GX_CTF_Z8M: return 2;
case GX_CTF_Z8L: return 2;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default: return 1;
default:
PanicAlert("Unsupported Texture Format (%08x)! (GetTexelSizeInNibbles)", format);
return 1;
}
}
@ -88,11 +91,13 @@ int TexDecoder_GetBlockWidthInTexels(u32 format)
case GX_TF_Z16: return 4;
case GX_TF_Z24X8: return 4;
case GX_CTF_Z4: return 8;
case GX_CTF_Z8H: return 8;
case GX_CTF_Z8M: return 8;
case GX_CTF_Z8L: return 8;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default:
ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format);
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format);
return 8;
}
}
@ -125,11 +130,13 @@ int TexDecoder_GetBlockHeightInTexels(u32 format)
case GX_TF_Z16: return 4;
case GX_TF_Z24X8: return 4;
case GX_CTF_Z4: return 8;
case GX_CTF_Z8H: return 4;
case GX_CTF_Z8M: return 4;
case GX_CTF_Z8L: return 4;
case GX_CTF_Z16R: return 4;
case GX_CTF_Z16L: return 4;
default:
ERROR_LOG(VIDEO, "Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format);
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format);
return 4;
}
}