Implement primitive Linear->Block Linear DMA engine copies

Slightly inaccurate and misses some features but good enough for most games, should be revisted later.
This commit is contained in:
Billy Laws 2022-04-09 17:56:06 +01:00 committed by PixelyIon
parent 3c26921d54
commit 3e4e8de1d2
4 changed files with 126 additions and 13 deletions

View File

@ -150,4 +150,22 @@ namespace skyline::gpu::format {
#undef FORMAT_NORM_INT_FLOAT
// @fmt:on
inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) {
switch (bytesPerPixel) {
case 1:
return R8Uint;
case 2:
return R8G8Uint;
case 4:
return R8G8B8A8Uint;
case 8:
return R16G16B16A16Uint;
case 16:
return R32G32B32A32Uint;
default:
Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel);
return R8Uint;
}
}
}

View File

@ -12,7 +12,7 @@ namespace skyline::gpu::texture {
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
constexpr u8 GobHeight{8}; // The height of a GOB in lines
size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
inline size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs
u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines
u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines
@ -27,7 +27,7 @@ namespace skyline::gpu::texture {
/**
* @brief Copies pixel data between a linear and blocklinear texture
*/
template <typename CopyFunction>
template<typename CopyFunction>
void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) {
u32 blockHeight{guest.tileConfig.blockHeight};
u32 robHeight{GobHeight * blockHeight};
@ -99,15 +99,15 @@ namespace skyline::gpu::texture {
/**
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
*/
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
inline void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy);
}
/**
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
*/
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8* src, u8* dst, size_t size) {
inline void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8 *src, u8 *dst, size_t size) {
std::memcpy(dst, src, size);
});
}
@ -115,7 +115,7 @@ namespace skyline::gpu::texture {
/**
* @brief Copies the contents of a pitch-linear guest texture to a linear output buffer
*/
void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
inline void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
@ -132,7 +132,7 @@ namespace skyline::gpu::texture {
/**
* @brief Copies the contents of a linear buffer to a pitch-linear guest texture
*/
void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
inline void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data

View File

@ -1,6 +1,9 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2022 yuzu Emulator Project (https://github.com/yuzu-emu/yuzu/)
#include <gpu/texture/format.h>
#include <gpu/texture/layout.h>
#include <soc.h>
#include <soc/gm20b/channel.h>
#include <soc/gm20b/gmmu.h>
@ -27,9 +30,18 @@ namespace skyline::soc::gm20b::engine {
if (*registers.lineLengthIn == 0)
return; // Nothing to copy
if (registers.launchDma->remapEnable) {
Logger::Warn("DMA remapping is unimplemented!");
return;
}
if (registers.launchDma->multiLineEnable) {
// 2D/3D copy
Logger::Warn("2D/3D DMA engine copies are unimplemented");
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
CopyPitchToBlockLinear();
else
Logger::Warn("Unimplemented multi-line copy type: {} -> {}!",
static_cast<u8>(registers.launchDma->srcMemoryLayout), static_cast<u8>(registers.launchDma->dstMemoryLayout));
} else {
// 1D buffer copy
// TODO: implement swizzled 1D copies based on VMM 'kind'
@ -38,6 +50,64 @@ namespace skyline::soc::gm20b::engine {
}
}
void MaxwellDma::CopyPitchToBlockLinear() {
if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) {
Logger::Warn("3D DMA engine copies are unimplemented!");
return;
}
if (registers.dstSurface->blockSize.Width() != 1) {
Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!");
return;
}
u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())};
if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) {
Logger::Warn("Non-linear DMA source textures are not implemented!");
return;
}
if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) {
Logger::Warn("Non-zero origin DMA copies are not implemented!");
return;
}
gpu::GuestTexture srcTexture{span<u8>{},
gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1},
gpu::format::GetFormatForBpp(bytesPerPixel),
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
gpu::texture::TextureType::e2D};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) {
srcTexture.mappings[0] = mappings[0];
} else {
Logger::Warn("DMA for split textures is unimplemented!");
return;
}
if (*registers.lineLengthIn != registers.dstSurface->width)
Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width);
// This represents a single layer view into a potentially multi-layer texture
gpu::GuestTexture dstTexture{span<u8>{},
gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1},
gpu::format::GetFormatForBpp(bytesPerPixel),
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
gpu::texture::TextureType::e2D};
u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer};
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) {
dstTexture.mappings[0] = mappings[0];
} else {
Logger::Warn("DMA for split textures is unimplemented!");
return;
}
Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, *registers.offsetIn, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress);
gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data());
}
void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
for (u32 argument : arguments)
HandleMethod(method, argument);

View File

@ -22,6 +22,8 @@ namespace skyline::soc::gm20b::engine {
void LaunchDma();
void CopyPitchToBlockLinear();
public:
/**
* @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
@ -187,19 +189,42 @@ namespace skyline::soc::gm20b::engine {
u8 _pad5_ : 2;
u8 numDstComponentsMinusOne : 2;
u8 _pad6_ : 6;
u8 ComponentSize() {
return componentSizeMinusOne + 1;
}
u8 NumSrcComponents() {
return numSrcComponentsMinusOne + 1;
}
u8 NumDstComponents() {
return numDstComponentsMinusOne + 1;
}
};
static_assert(sizeof(RemapComponents) == 0xC);
Register<0x1C2, RemapComponents> remapComponents;
struct Surface {
// Nvidias docs here differ from other emus and deko3d so go with what they say
struct {
u8 width : 4;
u8 height : 4;
u8 depth : 4;
u8 widthLog2 : 4;
u8 heightLog2 : 4;
u8 depthLog2 : 4;
u8 gobHeight : 4;
u16 _pad_;
u8 Width() {
return static_cast<u8>(1 << widthLog2);
}
u8 Height() {
return static_cast<u8>(1 << heightLog2);
}
u8 Depth() {
return static_cast<u8>(1 << depthLog2);
}
} blockSize;
u32 width;
u32 height;