mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-23 12:41:50 +01:00
Handle OOB blits by adding to the texture base offset
The previous method would cause OOB reads for the last row to clamp, and adding an extra row would potentially encounter unmapped memory. So use this technique based on how Ryu does it.
This commit is contained in:
parent
6aef7fdd1e
commit
7d0b7f0b71
@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
using IOVA = soc::gm20b::IOVA;
|
using IOVA = soc::gm20b::IOVA;
|
||||||
using MemoryLayout = skyline::soc::gm20b::engine::fermi2d::type::MemoryLayout;
|
using MemoryLayout = skyline::soc::gm20b::engine::fermi2d::type::MemoryLayout;
|
||||||
|
|
||||||
gpu::GuestTexture Fermi2D::GetGuestTexture(const Surface &surface) {
|
std::pair<gpu::GuestTexture, bool> Fermi2D::GetGuestTexture(const Surface &surface, u32 oobReadStart, u32 oobReadWidth) {
|
||||||
auto determineFormat = [&](Surface::SurfaceFormat format) -> skyline::gpu::texture::Format {
|
auto determineFormat = [&](Surface::SurfaceFormat format) -> skyline::gpu::texture::Format {
|
||||||
#define FORMAT_CASE(fermiFmt, skFmt, fmtType) \
|
#define FORMAT_CASE(fermiFmt, skFmt, fmtType) \
|
||||||
case Surface::SurfaceFormat::fermiFmt ## fmtType: \
|
case Surface::SurfaceFormat::fermiFmt ## fmtType: \
|
||||||
@ -84,8 +84,15 @@ namespace skyline::gpu::interconnect {
|
|||||||
texture.layerCount = 1;
|
texture.layerCount = 1;
|
||||||
texture.viewType = vk::ImageViewType::e2D;
|
texture.viewType = vk::ImageViewType::e2D;
|
||||||
|
|
||||||
|
|
||||||
|
u64 addressOffset{};
|
||||||
if (surface.memoryLayout == MemoryLayout::Pitch) {
|
if (surface.memoryLayout == MemoryLayout::Pitch) {
|
||||||
texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1};
|
texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1};
|
||||||
|
|
||||||
|
// OpenGL games rely on reads wrapping around to the next line when reading out of bounds, emulate this behaviour by offsetting the address
|
||||||
|
if (oobReadStart && surface.width == (oobReadWidth + oobReadStart) && (oobReadWidth + oobReadStart) > texture.dimensions.width)
|
||||||
|
addressOffset += oobReadStart * texture.format->bpb;
|
||||||
|
|
||||||
texture.tileConfig = gpu::texture::TileConfig{
|
texture.tileConfig = gpu::texture::TileConfig{
|
||||||
.mode = gpu::texture::TileMode::Pitch,
|
.mode = gpu::texture::TileMode::Pitch,
|
||||||
.pitch = surface.stride
|
.pitch = surface.stride
|
||||||
@ -99,11 +106,11 @@ namespace skyline::gpu::interconnect {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
IOVA iova{surface.address};
|
u64 iova{u64{surface.address} + addressOffset};
|
||||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())};
|
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())};
|
||||||
texture.mappings.assign(mappings.begin(), mappings.end());
|
texture.mappings.assign(mappings.begin(), mappings.end());
|
||||||
|
|
||||||
return texture;
|
return {texture, addressOffset != 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
Fermi2D::Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx)
|
Fermi2D::Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx)
|
||||||
@ -114,10 +121,19 @@ namespace skyline::gpu::interconnect {
|
|||||||
void Fermi2D::Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, SampleModeOrigin sampleOrigin, bool resolve, SampleModeFilter filter) {
|
void Fermi2D::Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, SampleModeOrigin sampleOrigin, bool resolve, SampleModeFilter filter) {
|
||||||
TRACE_EVENT("gpu", "Fermi2D::Blit");
|
TRACE_EVENT("gpu", "Fermi2D::Blit");
|
||||||
|
|
||||||
// TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
|
// Blit shader always samples from centre so adjust if necessary
|
||||||
auto srcGuestTexture{GetGuestTexture(srcSurface)};
|
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
||||||
auto dstGuestTexture{GetGuestTexture(dstSurface)};
|
float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY};
|
||||||
|
|
||||||
|
u32 oobReadStart{static_cast<u32>(centredSrcRectX)};
|
||||||
|
u32 oobReadWidth{static_cast<u32>(duDx * static_cast<float>(dstRectWidth))};
|
||||||
|
|
||||||
|
// TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
|
||||||
|
auto [srcGuestTexture, srcWentOob]{GetGuestTexture(srcSurface, oobReadStart, oobReadWidth)};
|
||||||
|
if (srcWentOob)
|
||||||
|
centredSrcRectX = 0.0f;
|
||||||
|
|
||||||
|
auto [dstGuestTexture, dstWentOob]{GetGuestTexture(dstSurface)};
|
||||||
auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture, executor.tag)};
|
auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture, executor.tag)};
|
||||||
executor.AttachDependency(srcTextureView);
|
executor.AttachDependency(srcTextureView);
|
||||||
executor.AttachTexture(srcTextureView.get());
|
executor.AttachTexture(srcTextureView.get());
|
||||||
@ -127,10 +143,6 @@ namespace skyline::gpu::interconnect {
|
|||||||
executor.AttachTexture(dstTextureView.get());
|
executor.AttachTexture(dstTextureView.get());
|
||||||
dstTextureView->texture->MarkGpuDirty(executor.usageTracker);
|
dstTextureView->texture->MarkGpuDirty(executor.usageTracker);
|
||||||
|
|
||||||
// Blit shader always samples from centre so adjust if necessary
|
|
||||||
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
|
||||||
float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY};
|
|
||||||
|
|
||||||
executor.AddCheckpoint("Before blit");
|
executor.AddCheckpoint("Before blit");
|
||||||
gpu.helperShaders.blitHelperShader.Blit(
|
gpu.helperShaders.blitHelperShader.Blit(
|
||||||
gpu,
|
gpu,
|
||||||
|
@ -33,7 +33,7 @@ namespace skyline::gpu::interconnect {
|
|||||||
soc::gm20b::ChannelContext &channelCtx;
|
soc::gm20b::ChannelContext &channelCtx;
|
||||||
gpu::interconnect::CommandExecutor &executor;
|
gpu::interconnect::CommandExecutor &executor;
|
||||||
|
|
||||||
gpu::GuestTexture GetGuestTexture(const Surface &surface);
|
std::pair<gpu::GuestTexture, bool> GetGuestTexture(const Surface &surface, u32 oobReadStart = 0, u32 oobReadWidth = 0);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
||||||
|
@ -175,7 +175,6 @@ namespace skyline::gpu {
|
|||||||
struct FragmentPushConstantLayout {
|
struct FragmentPushConstantLayout {
|
||||||
glsl::Vec2 srcOriginUV;
|
glsl::Vec2 srcOriginUV;
|
||||||
glsl::Vec2 dstSrcScaleFactor;
|
glsl::Vec2 dstSrcScaleFactor;
|
||||||
float srcHeightRecip;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr static std::array<vk::PushConstantRange, 2> PushConstantRanges{
|
constexpr static std::array<vk::PushConstantRange, 2> PushConstantRanges{
|
||||||
@ -254,7 +253,6 @@ namespace skyline::gpu {
|
|||||||
}, blit::FragmentPushConstantLayout{
|
}, blit::FragmentPushConstantLayout{
|
||||||
.srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height},
|
.srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height},
|
||||||
.dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)},
|
.dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)},
|
||||||
.srcHeightRecip = 1.0f / srcImageDimensions.height
|
|
||||||
},
|
},
|
||||||
GetPipeline(gpu,
|
GetPipeline(gpu,
|
||||||
{dstImageView->format->vkFormat,
|
{dstImageView->format->vkFormat,
|
||||||
|
@ -8,14 +8,10 @@ layout (push_constant) uniform constants {
|
|||||||
layout (offset = 16)
|
layout (offset = 16)
|
||||||
vec2 srcOriginUV;
|
vec2 srcOriginUV;
|
||||||
vec2 dstSrcScaleFactor;
|
vec2 dstSrcScaleFactor;
|
||||||
float srcHeightRecip;
|
|
||||||
} PC;
|
} PC;
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV;
|
vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV;
|
||||||
// Account for out of bounds blits by moving to the next line of the source texture for the copy
|
|
||||||
srcUV.y += floor(srcUV.x) * PC.srcHeightRecip;
|
|
||||||
srcUV.x = srcUV.x - floor(srcUV.x);
|
|
||||||
colour.rgba = texture(src, srcUV);
|
colour.rgba = texture(src, srcUV);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user