mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-27 02:04:22 +01:00
Implement primitive Linear->Block Linear DMA engine copies
Slightly inaccurate and misses some features but good enough for most games, should be revisted later.
This commit is contained in:
parent
3c26921d54
commit
3e4e8de1d2
@ -150,4 +150,22 @@ namespace skyline::gpu::format {
|
|||||||
#undef FORMAT_NORM_INT_FLOAT
|
#undef FORMAT_NORM_INT_FLOAT
|
||||||
|
|
||||||
// @fmt:on
|
// @fmt:on
|
||||||
|
|
||||||
|
inline const gpu::texture::FormatBase &GetFormatForBpp(u32 bytesPerPixel) {
|
||||||
|
switch (bytesPerPixel) {
|
||||||
|
case 1:
|
||||||
|
return R8Uint;
|
||||||
|
case 2:
|
||||||
|
return R8G8Uint;
|
||||||
|
case 4:
|
||||||
|
return R8G8B8A8Uint;
|
||||||
|
case 8:
|
||||||
|
return R16G16B16A16Uint;
|
||||||
|
case 16:
|
||||||
|
return R32G32B32A32Uint;
|
||||||
|
default:
|
||||||
|
Logger::Error("Couldn't convert bytes per pixel: {}", bytesPerPixel);
|
||||||
|
return R8Uint;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ namespace skyline::gpu::texture {
|
|||||||
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
|
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
|
||||||
constexpr u8 GobHeight{8}; // The height of a GOB in lines
|
constexpr u8 GobHeight{8}; // The height of a GOB in lines
|
||||||
|
|
||||||
size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
|
inline size_t GetBlockLinearLayerSize(const GuestTexture &guest) {
|
||||||
u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs
|
u32 blockHeight{guest.tileConfig.blockHeight}; //!< The height of the blocks in GOBs
|
||||||
u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines
|
u32 robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines
|
||||||
u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines
|
u32 surfaceHeightLines{util::DivideCeil(guest.dimensions.height, u32{guest.format->blockHeight})}; //!< The height of the surface in lines
|
||||||
@ -27,7 +27,7 @@ namespace skyline::gpu::texture {
|
|||||||
/**
|
/**
|
||||||
* @brief Copies pixel data between a linear and blocklinear texture
|
* @brief Copies pixel data between a linear and blocklinear texture
|
||||||
*/
|
*/
|
||||||
template <typename CopyFunction>
|
template<typename CopyFunction>
|
||||||
void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) {
|
void CopyBlockLinearInternal(const GuestTexture &guest, u8 *blockLinear, u8 *linear, CopyFunction copyFunction) {
|
||||||
u32 blockHeight{guest.tileConfig.blockHeight};
|
u32 blockHeight{guest.tileConfig.blockHeight};
|
||||||
u32 robHeight{GobHeight * blockHeight};
|
u32 robHeight{GobHeight * blockHeight};
|
||||||
@ -99,15 +99,15 @@ namespace skyline::gpu::texture {
|
|||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||||
*/
|
*/
|
||||||
void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
inline void CopyBlockLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||||
CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy);
|
CopyBlockLinearInternal(guest, guestInput, linearOutput, std::memcpy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
* @brief Copies the contents of a blocklinear guest texture to a linear output buffer
|
||||||
*/
|
*/
|
||||||
void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
inline void CopyLinearToBlockLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||||
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8* src, u8* dst, size_t size) {
|
CopyBlockLinearInternal(guest, guestOutput, linearInput, [](u8 *src, u8 *dst, size_t size) {
|
||||||
std::memcpy(dst, src, size);
|
std::memcpy(dst, src, size);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -115,7 +115,7 @@ namespace skyline::gpu::texture {
|
|||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a pitch-linear guest texture to a linear output buffer
|
* @brief Copies the contents of a pitch-linear guest texture to a linear output buffer
|
||||||
*/
|
*/
|
||||||
void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
inline void CopyPitchLinearToLinear(const GuestTexture &guest, u8 *guestInput, u8 *linearOutput) {
|
||||||
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
||||||
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
||||||
|
|
||||||
@ -132,7 +132,7 @@ namespace skyline::gpu::texture {
|
|||||||
/**
|
/**
|
||||||
* @brief Copies the contents of a linear buffer to a pitch-linear guest texture
|
* @brief Copies the contents of a linear buffer to a pitch-linear guest texture
|
||||||
*/
|
*/
|
||||||
void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
inline void CopyLinearToPitchLinear(const GuestTexture &guest, u8 *linearInput, u8 *guestOutput) {
|
||||||
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
auto sizeLine{guest.format->GetSize(guest.dimensions.width, 1)}; //!< The size of a single line of pixel data
|
||||||
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
auto sizeStride{guest.tileConfig.pitch}; //!< The size of a single stride of pixel data
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
// SPDX-License-Identifier: MPL-2.0
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||||
|
// Copyright © 2022 yuzu Emulator Project (https://github.com/yuzu-emu/yuzu/)
|
||||||
|
|
||||||
|
#include <gpu/texture/format.h>
|
||||||
|
#include <gpu/texture/layout.h>
|
||||||
#include <soc.h>
|
#include <soc.h>
|
||||||
#include <soc/gm20b/channel.h>
|
#include <soc/gm20b/channel.h>
|
||||||
#include <soc/gm20b/gmmu.h>
|
#include <soc/gm20b/gmmu.h>
|
||||||
@ -27,9 +30,18 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
if (*registers.lineLengthIn == 0)
|
if (*registers.lineLengthIn == 0)
|
||||||
return; // Nothing to copy
|
return; // Nothing to copy
|
||||||
|
|
||||||
|
if (registers.launchDma->remapEnable) {
|
||||||
|
Logger::Warn("DMA remapping is unimplemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (registers.launchDma->multiLineEnable) {
|
if (registers.launchDma->multiLineEnable) {
|
||||||
// 2D/3D copy
|
if (registers.launchDma->srcMemoryLayout == Registers::LaunchDma::MemoryLayout::Pitch &&
|
||||||
Logger::Warn("2D/3D DMA engine copies are unimplemented");
|
registers.launchDma->dstMemoryLayout == Registers::LaunchDma::MemoryLayout::BlockLinear)
|
||||||
|
CopyPitchToBlockLinear();
|
||||||
|
else
|
||||||
|
Logger::Warn("Unimplemented multi-line copy type: {} -> {}!",
|
||||||
|
static_cast<u8>(registers.launchDma->srcMemoryLayout), static_cast<u8>(registers.launchDma->dstMemoryLayout));
|
||||||
} else {
|
} else {
|
||||||
// 1D buffer copy
|
// 1D buffer copy
|
||||||
// TODO: implement swizzled 1D copies based on VMM 'kind'
|
// TODO: implement swizzled 1D copies based on VMM 'kind'
|
||||||
@ -38,6 +50,64 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MaxwellDma::CopyPitchToBlockLinear() {
|
||||||
|
if (registers.dstSurface->blockSize.Depth() > 1 || registers.dstSurface->depth > 1) {
|
||||||
|
Logger::Warn("3D DMA engine copies are unimplemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (registers.dstSurface->blockSize.Width() != 1) {
|
||||||
|
Logger::Warn("DMA engine copies with block widths other than 1 are unimplemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 bytesPerPixel{static_cast<u32>(registers.remapComponents->ComponentSize() * registers.remapComponents->NumSrcComponents())};
|
||||||
|
if (bytesPerPixel * *registers.lineLengthIn != *registers.pitchIn) {
|
||||||
|
Logger::Warn("Non-linear DMA source textures are not implemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (registers.dstSurface->origin.x || registers.dstSurface->origin.y) {
|
||||||
|
Logger::Warn("Non-zero origin DMA copies are not implemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gpu::GuestTexture srcTexture{span<u8>{},
|
||||||
|
gpu::texture::Dimensions{*registers.lineLengthIn, *registers.lineCount, 1},
|
||||||
|
gpu::format::GetFormatForBpp(bytesPerPixel),
|
||||||
|
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Linear },
|
||||||
|
gpu::texture::TextureType::e2D};
|
||||||
|
|
||||||
|
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(*registers.offsetIn, srcTexture.GetLayerSize())}; mappings.size() == 1) {
|
||||||
|
srcTexture.mappings[0] = mappings[0];
|
||||||
|
} else {
|
||||||
|
Logger::Warn("DMA for split textures is unimplemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*registers.lineLengthIn != registers.dstSurface->width)
|
||||||
|
Logger::Warn("DMA copy width mismatch: src: {} dst: {}", *registers.lineLengthIn, registers.dstSurface->width);
|
||||||
|
|
||||||
|
// This represents a single layer view into a potentially multi-layer texture
|
||||||
|
gpu::GuestTexture dstTexture{span<u8>{},
|
||||||
|
gpu::texture::Dimensions{*registers.lineLengthIn, registers.dstSurface->height, 1},
|
||||||
|
gpu::format::GetFormatForBpp(bytesPerPixel),
|
||||||
|
gpu::texture::TileConfig{ .mode = gpu::texture::TileMode::Block, .blockHeight = registers.dstSurface->blockSize.Height(), .blockDepth = 1 },
|
||||||
|
gpu::texture::TextureType::e2D};
|
||||||
|
|
||||||
|
u64 dstLayerAddress{*registers.offsetOut + dstTexture.GetLayerSize() * registers.dstSurface->layer};
|
||||||
|
if (auto mappings{channelCtx.asCtx->gmmu.TranslateRange(dstLayerAddress, dstTexture.GetLayerSize())}; mappings.size() == 1) {
|
||||||
|
dstTexture.mappings[0] = mappings[0];
|
||||||
|
} else {
|
||||||
|
Logger::Warn("DMA for split textures is unimplemented!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger::Debug("{}x{}@0x{:X} -> {}x{}@0x{:X}", srcTexture.dimensions.width, srcTexture.dimensions.height, *registers.offsetIn, dstTexture.dimensions.width, dstTexture.dimensions.height, dstLayerAddress);
|
||||||
|
|
||||||
|
gpu::texture::CopyLinearToBlockLinear(dstTexture, srcTexture.mappings.front().data(), dstTexture.mappings.front().data());
|
||||||
|
}
|
||||||
|
|
||||||
void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
|
void MaxwellDma::CallMethodBatchNonInc(u32 method, span<u32> arguments) {
|
||||||
for (u32 argument : arguments)
|
for (u32 argument : arguments)
|
||||||
HandleMethod(method, argument);
|
HandleMethod(method, argument);
|
||||||
|
@ -22,6 +22,8 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
|
|
||||||
void LaunchDma();
|
void LaunchDma();
|
||||||
|
|
||||||
|
void CopyPitchToBlockLinear();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
* @url https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
|
||||||
@ -187,19 +189,42 @@ namespace skyline::soc::gm20b::engine {
|
|||||||
u8 _pad5_ : 2;
|
u8 _pad5_ : 2;
|
||||||
u8 numDstComponentsMinusOne : 2;
|
u8 numDstComponentsMinusOne : 2;
|
||||||
u8 _pad6_ : 6;
|
u8 _pad6_ : 6;
|
||||||
|
|
||||||
|
u8 ComponentSize() {
|
||||||
|
return componentSizeMinusOne + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 NumSrcComponents() {
|
||||||
|
return numSrcComponentsMinusOne + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 NumDstComponents() {
|
||||||
|
return numDstComponentsMinusOne + 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(RemapComponents) == 0xC);
|
static_assert(sizeof(RemapComponents) == 0xC);
|
||||||
|
|
||||||
Register<0x1C2, RemapComponents> remapComponents;
|
Register<0x1C2, RemapComponents> remapComponents;
|
||||||
|
|
||||||
struct Surface {
|
struct Surface {
|
||||||
// Nvidias docs here differ from other emus and deko3d so go with what they say
|
|
||||||
struct {
|
struct {
|
||||||
u8 width : 4;
|
u8 widthLog2 : 4;
|
||||||
u8 height : 4;
|
u8 heightLog2 : 4;
|
||||||
u8 depth : 4;
|
u8 depthLog2 : 4;
|
||||||
u8 gobHeight : 4;
|
u8 gobHeight : 4;
|
||||||
u16 _pad_;
|
u16 _pad_;
|
||||||
|
|
||||||
|
u8 Width() {
|
||||||
|
return static_cast<u8>(1 << widthLog2);
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 Height() {
|
||||||
|
return static_cast<u8>(1 << heightLog2);
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 Depth() {
|
||||||
|
return static_cast<u8>(1 << depthLog2);
|
||||||
|
}
|
||||||
} blockSize;
|
} blockSize;
|
||||||
u32 width;
|
u32 width;
|
||||||
u32 height;
|
u32 height;
|
||||||
|
Loading…
Reference in New Issue
Block a user