mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-06 04:15:07 +01:00
Handle OOB blits by adding to the texture base offset
The previous method would cause OOB reads for the last row to clamp, and adding an extra row would potentially encounter unmapped memory. So use this technique based on how Ryu does it.
This commit is contained in:
parent
6aef7fdd1e
commit
7d0b7f0b71
@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect {
|
||||
using IOVA = soc::gm20b::IOVA;
|
||||
using MemoryLayout = skyline::soc::gm20b::engine::fermi2d::type::MemoryLayout;
|
||||
|
||||
gpu::GuestTexture Fermi2D::GetGuestTexture(const Surface &surface) {
|
||||
std::pair<gpu::GuestTexture, bool> Fermi2D::GetGuestTexture(const Surface &surface, u32 oobReadStart, u32 oobReadWidth) {
|
||||
auto determineFormat = [&](Surface::SurfaceFormat format) -> skyline::gpu::texture::Format {
|
||||
#define FORMAT_CASE(fermiFmt, skFmt, fmtType) \
|
||||
case Surface::SurfaceFormat::fermiFmt ## fmtType: \
|
||||
@ -84,8 +84,15 @@ namespace skyline::gpu::interconnect {
|
||||
texture.layerCount = 1;
|
||||
texture.viewType = vk::ImageViewType::e2D;
|
||||
|
||||
|
||||
u64 addressOffset{};
|
||||
if (surface.memoryLayout == MemoryLayout::Pitch) {
|
||||
texture.dimensions = gpu::texture::Dimensions{surface.stride / texture.format->bpb, surface.height, 1};
|
||||
|
||||
// OpenGL games rely on reads wrapping around to the next line when reading out of bounds, emulate this behaviour by offsetting the address
|
||||
if (oobReadStart && surface.width == (oobReadWidth + oobReadStart) && (oobReadWidth + oobReadStart) > texture.dimensions.width)
|
||||
addressOffset += oobReadStart * texture.format->bpb;
|
||||
|
||||
texture.tileConfig = gpu::texture::TileConfig{
|
||||
.mode = gpu::texture::TileMode::Pitch,
|
||||
.pitch = surface.stride
|
||||
@ -99,11 +106,11 @@ namespace skyline::gpu::interconnect {
|
||||
};
|
||||
}
|
||||
|
||||
IOVA iova{surface.address};
|
||||
u64 iova{u64{surface.address} + addressOffset};
|
||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(iova, texture.GetSize())};
|
||||
texture.mappings.assign(mappings.begin(), mappings.end());
|
||||
|
||||
return texture;
|
||||
return {texture, addressOffset != 0};
|
||||
}
|
||||
|
||||
Fermi2D::Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx)
|
||||
@ -114,10 +121,19 @@ namespace skyline::gpu::interconnect {
|
||||
void Fermi2D::Blit(const Surface &srcSurface, const Surface &dstSurface, float srcRectX, float srcRectY, u32 dstRectWidth, u32 dstRectHeight, u32 dstRectX, u32 dstRectY, float duDx, float dvDy, SampleModeOrigin sampleOrigin, bool resolve, SampleModeFilter filter) {
|
||||
TRACE_EVENT("gpu", "Fermi2D::Blit");
|
||||
|
||||
// TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
|
||||
auto srcGuestTexture{GetGuestTexture(srcSurface)};
|
||||
auto dstGuestTexture{GetGuestTexture(dstSurface)};
|
||||
// Blit shader always samples from centre so adjust if necessary
|
||||
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
||||
float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY};
|
||||
|
||||
u32 oobReadStart{static_cast<u32>(centredSrcRectX)};
|
||||
u32 oobReadWidth{static_cast<u32>(duDx * static_cast<float>(dstRectWidth))};
|
||||
|
||||
// TODO: When we support MSAA perform a resolve operation rather than blit when the `resolve` flag is set.
|
||||
auto [srcGuestTexture, srcWentOob]{GetGuestTexture(srcSurface, oobReadStart, oobReadWidth)};
|
||||
if (srcWentOob)
|
||||
centredSrcRectX = 0.0f;
|
||||
|
||||
auto [dstGuestTexture, dstWentOob]{GetGuestTexture(dstSurface)};
|
||||
auto srcTextureView{gpu.texture.FindOrCreate(srcGuestTexture, executor.tag)};
|
||||
executor.AttachDependency(srcTextureView);
|
||||
executor.AttachTexture(srcTextureView.get());
|
||||
@ -127,10 +143,6 @@ namespace skyline::gpu::interconnect {
|
||||
executor.AttachTexture(dstTextureView.get());
|
||||
dstTextureView->texture->MarkGpuDirty(executor.usageTracker);
|
||||
|
||||
// Blit shader always samples from centre so adjust if necessary
|
||||
float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
|
||||
float centredSrcRectY{sampleOrigin == SampleModeOrigin::Corner ? srcRectY - 0.5f : srcRectY};
|
||||
|
||||
executor.AddCheckpoint("Before blit");
|
||||
gpu.helperShaders.blitHelperShader.Blit(
|
||||
gpu,
|
||||
|
@ -33,7 +33,7 @@ namespace skyline::gpu::interconnect {
|
||||
soc::gm20b::ChannelContext &channelCtx;
|
||||
gpu::interconnect::CommandExecutor &executor;
|
||||
|
||||
gpu::GuestTexture GetGuestTexture(const Surface &surface);
|
||||
std::pair<gpu::GuestTexture, bool> GetGuestTexture(const Surface &surface, u32 oobReadStart = 0, u32 oobReadWidth = 0);
|
||||
|
||||
public:
|
||||
Fermi2D(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
||||
|
@ -175,7 +175,6 @@ namespace skyline::gpu {
|
||||
struct FragmentPushConstantLayout {
|
||||
glsl::Vec2 srcOriginUV;
|
||||
glsl::Vec2 dstSrcScaleFactor;
|
||||
float srcHeightRecip;
|
||||
};
|
||||
|
||||
constexpr static std::array<vk::PushConstantRange, 2> PushConstantRanges{
|
||||
@ -254,7 +253,6 @@ namespace skyline::gpu {
|
||||
}, blit::FragmentPushConstantLayout{
|
||||
.srcOriginUV = {srcRect.x / srcImageDimensions.width, srcRect.y / srcImageDimensions.height},
|
||||
.dstSrcScaleFactor = {dstSrcScaleFactorX * (srcRect.width / srcImageDimensions.width), dstSrcScaleFactorY * (srcRect.height / srcImageDimensions.height)},
|
||||
.srcHeightRecip = 1.0f / srcImageDimensions.height
|
||||
},
|
||||
GetPipeline(gpu,
|
||||
{dstImageView->format->vkFormat,
|
||||
|
@ -8,14 +8,10 @@ layout (push_constant) uniform constants {
|
||||
layout (offset = 16)
|
||||
vec2 srcOriginUV;
|
||||
vec2 dstSrcScaleFactor;
|
||||
float srcHeightRecip;
|
||||
} PC;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec2 srcUV = dstUV * PC.dstSrcScaleFactor + PC.srcOriginUV;
|
||||
// Account for out of bounds blits by moving to the next line of the source texture for the copy
|
||||
srcUV.y += floor(srcUV.x) * PC.srcHeightRecip;
|
||||
srcUV.x = srcUV.x - floor(srcUV.x);
|
||||
colour.rgba = texture(src, srcUV);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user