mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-04 23:55:08 +01:00
Implement primitive Linear->Block Linear DMA engine copies
Slightly inaccurate and misses some features but good enough for most games, should be revisted later.
This commit is contained in:
parent
3c26921d54
commit
3e4e8de1d2
@ -150,4 +150,22 @@ namespace skyline::gpu::format {
|
||||
#undef FORMAT_NORM_INT_FLOAT
|
||||
|
||||
// @fmt:on
|
||||
|
||||
inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) {
|
||||
switch (bytesPerPixel) {
|
||||
case 1:
|
||||
return R8Uint;
|
||||
case 2:
|
||||
return R8G8Uint;
|
||||
case 4:
|
||||
return R8G8B8A8Uint;
|
||||
case 8:
|
||||
return R16G16B16A16Uint;
|
||||
case 16:
|
||||
return R32G32B32A32Uint;
|
||||
default:
|
||||
Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel);
|
||||
return R8Uint;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ namespace skyline::gpu::texture {
|
||||
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
|
||||
constexpr u8 GobHeight{8}; // The height of a GOB in lines
|
||||
|
||||
size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
|
||||
inline size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
|
||||
u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs
|
||||
u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines
|
||||
u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines
|
||||
@ -27,7 +27,7 @@ namespace skyline::gpu::texture {
|
||||
/**
|
||||
* @brief Copies pixel data between a linear and blocklinear texture
|
||||
*/
|
||||
template <typename CopyFunction>
|
||||
template<typename CopyFunction>
|
||||
void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) {
|
||||
u32 blockHeight{guest.tileConfig.blockHeight};
|
||||
u32 robHeight{GobHeight * blockHeight};
|
||||
@ -99,15 +99,15 @@ namespace skyline::gpu::texture {
|
||||
/**
|
||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||
*/
|
||||
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||
inline void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||
CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||
*/
|
||||
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8* src, u8* dst, size_t size) {
|
||||
inline void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8 *src, u8 *dst, size_t size) {
|
||||
std::memcpy(dst, src, size);
|
||||
});
|
||||
}
|
||||
@ -115,7 +115,7 @@ namespace skyline::gpu::texture {
|
||||
/**
|
||||
* @brief Copies the contents of a pitch-linear guest texture to a linear output buffer
|
||||
*/
|
||||
void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||
inline void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
||||
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
||||
|
||||
@ -132,7 +132,7 @@ namespace skyline::gpu::texture {
|
||||
/**
|
||||
* @brief Copies the contents of a linear buffer to a pitch-linear guest texture
|
||||
*/
|
||||
void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||
inline void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
||||
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
// Copyright © 2022 yuzu Emulator Project (https://github.com/yuzu-emu/yuzu/)
|
||||
|
||||
#include <gpu/texture/format.h>
|
||||
#include <gpu/texture/layout.h>
|
||||
#include <soc.h>
|
||||
#include <soc/gm20b/channel.h>
|
||||
#include <soc/gm20b/gmmu.h>
|
||||
@ -27,9 +30,18 @@ namespace skyline::soc::gm20b::engine {
|
||||
if (*registers.lineLengthIn == 0)
|
||||
return; // Nothing to copy
|
||||
|
||||
if (registers.launchDma->remapEnable) {
|
||||
Logger::Warn("DMA remapping is unimplemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (registers.launchDma->multiLineEnable) {
|
||||
// 2D/3D copy
|
||||
Logger::Warn("2D/3D DMA engine copies are unimplemented");
|
||||
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
|
||||
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
|
||||
CopyPitchToBlockLinear();
|
||||
else
|
||||
Logger::Warn("Unimplemented multi-line copy type: {} -> {}!",
|
||||
static_cast<u8>(registers.launchDma->srcMemoryLayout), static_cast<u8>(registers.launchDma->dstMemoryLayout));
|
||||
} else {
|
||||
// 1D buffer copy
|
||||
// TODO: implement swizzled 1D copies based on VMM 'kind'
|
||||
@ -38,6 +50,64 @@ namespace skyline::soc::gm20b::engine {
|
||||
}
|
||||
}
|
||||
|
||||
void MaxwellDma::CopyPitchToBlockLinear() {
|
||||
if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) {
|
||||
Logger::Warn("3D DMA engine copies are unimplemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (registers.dstSurface->blockSize.Width() != 1) {
|
||||
Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())};
|
||||
if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) {
|
||||
Logger::Warn("Non-linear DMA source textures are not implemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) {
|
||||
Logger::Warn("Non-zero origin DMA copies are not implemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
gpu::GuestTexture srcTexture{span<u8>{},
|
||||
gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1},
|
||||
gpu::format::GetFormatForBpp(bytesPerPixel),
|
||||
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
|
||||
gpu::texture::TextureType::e2D};
|
||||
|
||||
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) {
|
||||
srcTexture.mappings[0] = mappings[0];
|
||||
} else {
|
||||
Logger::Warn("DMA for split textures is unimplemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
if (*registers.lineLengthIn != registers.dstSurface->width)
|
||||
Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width);
|
||||
|
||||
// This represents a single layer view into a potentially multi-layer texture
|
||||
gpu::GuestTexture dstTexture{span<u8>{},
|
||||
gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1},
|
||||
gpu::format::GetFormatForBpp(bytesPerPixel),
|
||||
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
|
||||
gpu::texture::TextureType::e2D};
|
||||
|
||||
u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer};
|
||||
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) {
|
||||
dstTexture.mappings[0] = mappings[0];
|
||||
} else {
|
||||
Logger::Warn("DMA for split textures is unimplemented!");
|
||||
return;
|
||||
}
|
||||
|
||||
Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, *registers.offsetIn, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress);
|
||||
|
||||
gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data());
|
||||
}
|
||||
|
||||
void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
|
||||
for (u32 argument : arguments)
|
||||
HandleMethod(method, argument);
|
||||
|
@ -22,6 +22,8 @@ namespace skyline::soc::gm20b::engine {
|
||||
|
||||
void LaunchDma();
|
||||
|
||||
void CopyPitchToBlockLinear();
|
||||
|
||||
public:
|
||||
/**
|
||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
||||
@ -187,19 +189,42 @@ namespace skyline::soc::gm20b::engine {
|
||||
u8 _pad5_ : 2;
|
||||
u8 numDstComponentsMinusOne : 2;
|
||||
u8 _pad6_ : 6;
|
||||
|
||||
u8 ComponentSize() {
|
||||
return componentSizeMinusOne + 1;
|
||||
}
|
||||
|
||||
u8 NumSrcComponents() {
|
||||
return numSrcComponentsMinusOne + 1;
|
||||
}
|
||||
|
||||
u8 NumDstComponents() {
|
||||
return numDstComponentsMinusOne + 1;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(RemapComponents) == 0xC);
|
||||
|
||||
Register<0x1C2, RemapComponents> remapComponents;
|
||||
|
||||
struct Surface {
|
||||
// Nvidias docs here differ from other emus and deko3d so go with what they say
|
||||
struct {
|
||||
u8 width : 4;
|
||||
u8 height : 4;
|
||||
u8 depth : 4;
|
||||
u8 widthLog2 : 4;
|
||||
u8 heightLog2 : 4;
|
||||
u8 depthLog2 : 4;
|
||||
u8 gobHeight : 4;
|
||||
u16 _pad_;
|
||||
|
||||
u8 Width() {
|
||||
return static_cast<u8>(1 << widthLog2);
|
||||
}
|
||||
|
||||
u8 Height() {
|
||||
return static_cast<u8>(1 << heightLog2);
|
||||
}
|
||||
|
||||
u8 Depth() {
|
||||
return static_cast<u8>(1 << depthLog2);
|
||||
}
|
||||
} blockSize;
|
||||
u32 width;
|
||||
u32 height;
|
||||
|
Loading…
Reference in New Issue
Block a user