Initial Texture Manager Implementation + Maxwell3D Render Target

Implement the groundwork for the texture manager to be able to report basic overlaps and be extended to support more in the future. The Maxwell3D registers `RenderTargetControl`, `RenderTarget` and a stub for `ClearBuffers` were implemented. 

A lot of changes were also made to `GuestTexture`/`Texture` for supporting mipmapping and multiple array layers alongside significant architectural changes to `GuestTexture` effectively disconnecting it from `Texture` with it no longer being a parent rather an object that can be used to create a `Texture` object.

Note: Support for fragmented CPU mappings hasn't been added for texture synchronization yet
This commit is contained in:
PixelyIon 2021-08-21 01:47:13 +05:30
parent 8cba1edf6d
commit 270f2db1d2
17 changed files with 757 additions and 298 deletions

View File

@ -101,6 +101,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/audio/adpcm_decoder.cpp ${source_DIR}/skyline/audio/adpcm_decoder.cpp
${source_DIR}/skyline/gpu.cpp ${source_DIR}/skyline/gpu.cpp
${source_DIR}/skyline/gpu/memory_manager.cpp ${source_DIR}/skyline/gpu/memory_manager.cpp
${source_DIR}/skyline/gpu/texture_manager.cpp
${source_DIR}/skyline/gpu/command_scheduler.cpp ${source_DIR}/skyline/gpu/command_scheduler.cpp
${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/texture/texture.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp

View File

@ -49,8 +49,8 @@ namespace skyline {
DeviceState::DeviceState(kernel::OS *os, std::shared_ptr<JvmManager> jvmManager, std::shared_ptr<Settings> settings, std::shared_ptr<Logger> logger) DeviceState::DeviceState(kernel::OS *os, std::shared_ptr<JvmManager> jvmManager, std::shared_ptr<Settings> settings, std::shared_ptr<Logger> logger)
: os(os), jvm(std::move(jvmManager)), settings(std::move(settings)), logger(std::move(logger)) { : os(os), jvm(std::move(jvmManager)), settings(std::move(settings)), logger(std::move(logger)) {
// We assign these later as they use the state in their constructor and we don't want null pointers // We assign these later as they use the state in their constructor and we don't want null pointers
soc = std::make_shared<soc::SOC>(*this);
gpu = std::make_shared<gpu::GPU>(*this); gpu = std::make_shared<gpu::GPU>(*this);
soc = std::make_shared<soc::SOC>(*this);
audio = std::make_shared<audio::Audio>(*this); audio = std::make_shared<audio::Audio>(*this);
nce = std::make_shared<nce::NCE>(*this); nce = std::make_shared<nce::NCE>(*this);
scheduler = std::make_shared<kernel::Scheduler>(*this); scheduler = std::make_shared<kernel::Scheduler>(*this);

View File

@ -439,6 +439,31 @@ namespace skyline {
copy_from(span<typename std::add_const<typename In::value_type>::type>(in), amount); copy_from(span<typename std::add_const<typename In::value_type>::type>(in), amount);
} }
/**
* @return If a supplied span is located entirely inside this span and is effectively a subspan
*/
constexpr bool contains(const span<T, Extent>& other) const {
return this->begin() >= other.begin() && this->size() <= other.size();
}
/** Comparision operators for equality and binary searches **/
constexpr bool operator==(const span<T, Extent>& other) const {
return this->data() == other.data() && this->size() == other.size();
}
constexpr bool operator<(const span<T, Extent> &other) const {
return this->data() < other.data();
}
constexpr bool operator<(T* pointer) const {
return this->data() < pointer;
}
constexpr bool operator<(typename std::span<T, Extent>::const_iterator it) const {
return this->begin() < it;
}
/** Base Class Functions that return an instance of it, we upcast them **/ /** Base Class Functions that return an instance of it, we upcast them **/
template<size_t Count> template<size_t Count>
constexpr span<T, Count> first() const noexcept { constexpr span<T, Count> first() const noexcept {
@ -643,12 +668,12 @@ namespace skyline {
struct ThreadContext; struct ThreadContext;
} }
class JvmManager; class JvmManager;
namespace soc {
class SOC;
}
namespace gpu { namespace gpu {
class GPU; class GPU;
} }
namespace soc {
class SOC;
}
namespace kernel { namespace kernel {
namespace type { namespace type {
class KProcess; class KProcess;
@ -678,8 +703,8 @@ namespace skyline {
std::shared_ptr<Settings> settings; std::shared_ptr<Settings> settings;
std::shared_ptr<Logger> logger; std::shared_ptr<Logger> logger;
std::shared_ptr<loader::Loader> loader; std::shared_ptr<loader::Loader> loader;
std::shared_ptr<soc::SOC> soc;
std::shared_ptr<gpu::GPU> gpu; std::shared_ptr<gpu::GPU> gpu;
std::shared_ptr<soc::SOC> soc;
std::shared_ptr<audio::Audio> audio; std::shared_ptr<audio::Audio> audio;
std::shared_ptr<nce::NCE> nce; std::shared_ptr<nce::NCE> nce;
std::shared_ptr<kernel::Scheduler> scheduler; std::shared_ptr<kernel::Scheduler> scheduler;

View File

@ -145,5 +145,5 @@ namespace skyline::gpu {
}); });
} }
GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), memory(*this), scheduler(*this), presentation(state, *this) {} GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), memory(*this), scheduler(*this), presentation(state, *this), texture(*this) {}
} }

View File

@ -6,6 +6,7 @@
#include "gpu/memory_manager.h" #include "gpu/memory_manager.h"
#include "gpu/command_scheduler.h" #include "gpu/command_scheduler.h"
#include "gpu/presentation_engine.h" #include "gpu/presentation_engine.h"
#include "gpu/texture_manager.h"
namespace skyline::gpu { namespace skyline::gpu {
/** /**
@ -39,6 +40,8 @@ namespace skyline::gpu {
CommandScheduler scheduler; CommandScheduler scheduler;
PresentationEngine presentation; PresentationEngine presentation;
TextureManager texture;
GPU(const DeviceState &state); GPU(const DeviceState &state);
}; };
} }

View File

@ -4,6 +4,8 @@
#pragma once #pragma once
#include <vulkan/vulkan_raii.hpp> #include <vulkan/vulkan_raii.hpp>
#include <gpu.h>
#include <gpu/texture/format.h>
#include <soc/gm20b/engines/maxwell/types.h> #include <soc/gm20b/engines/maxwell/types.h>
namespace skyline::gpu::context { namespace skyline::gpu::context {
@ -11,26 +13,141 @@ namespace skyline::gpu::context {
/** /**
* @brief Host-equivalent context for state of the Maxwell3D engine on the guest * @brief Host-equivalent context for state of the Maxwell3D engine on the guest
* @note This class is **NOT** thread-safe and should not be utilized by multiple threads concurrently
*/ */
class GraphicsContext { class GraphicsContext {
private: private:
GPU &gpu; GPU &gpu;
std::array<vk::Viewport, maxwell3d::ViewportCount> viewports; struct RenderTarget {
bool disabled{}; //!< If this RT has been disabled and will be an unbound attachment instead
union {
u64 gpuAddress;
struct {
u32 gpuAddressHigh;
u32 gpuAddressLow;
};
};
GuestTexture guest;
std::optional<TextureView> view;
};
std::array<vk::Rect2D, maxwell3d::ViewportCount> scissors; std::array<RenderTarget, maxwell3d::RenderTargetCount> renderTargets{}; //!< The target textures to render into as color attachments
maxwell3d::RenderTargetControl renderTargetControl{};
std::array<vk::Viewport, maxwell3d::ViewportCount> viewports;
vk::ClearColorValue clearColorValue{}; //!< The value written to a color buffer being cleared
std::array<vk::Rect2D, maxwell3d::ViewportCount> scissors; //!< The scissors applied to viewports/render targets for masking writes during draws or clears
constexpr static vk::Rect2D DefaultScissor{ constexpr static vk::Rect2D DefaultScissor{
.extent = { .extent.height = std::numeric_limits<i32>::max(),
.height = std::numeric_limits<i32>::max(), .extent.width = std::numeric_limits<i32>::max(),
.width = std::numeric_limits<i32>::max(),
}
}; //!< A scissor which displays the entire viewport, utilized when the viewport scissor is disabled }; //!< A scissor which displays the entire viewport, utilized when the viewport scissor is disabled
public: public:
GraphicsContext(GPU &gpu) : gpu(gpu) { GraphicsContext(GPU &gpu) : gpu(gpu) {
scissors.fill(DefaultScissor); scissors.fill(DefaultScissor);
} }
/* Render Targets + Render Target Control */
void SetRenderTargetAddressHigh(size_t index, u32 high) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.gpuAddressHigh = high;
renderTarget.guest.mappings.clear();
renderTarget.view.reset();
}
void SetRenderTargetAddressLow(size_t index, u32 low) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.gpuAddressLow = low;
renderTarget.guest.mappings.clear();
renderTarget.view.reset();
}
void SetRenderTargetAddressWidth(size_t index, u32 value) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.dimensions.width = value;
renderTarget.view.reset();
}
void SetRenderTargetAddressHeight(size_t index, u32 value) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.dimensions.height = value;
renderTarget.view.reset();
}
void SetRenderTargetAddressFormat(size_t index, maxwell3d::RenderTarget::ColorFormat format) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.format = [&]() -> texture::Format {
switch (format) {
case maxwell3d::RenderTarget::ColorFormat::None:
return {};
case maxwell3d::RenderTarget::ColorFormat::R8G8B8A8Unorm:
return format::RGBA8888Unorm;
default:
throw exception("Cannot translate the supplied RT format: 0x{:X}", static_cast<u32>(format));
}
}();
renderTarget.disabled = !renderTarget.guest.format;
renderTarget.view.reset();
}
void SetRenderTargetTileMode(size_t index, maxwell3d::RenderTarget::TileMode mode) {
auto &renderTarget{renderTargets.at(index)};
auto &config{renderTarget.guest.tileConfig};
if (mode.isLinear) {
config.mode = texture::TileMode::Linear;
} else [[likely]] {
config = texture::TileConfig{
.mode = texture::TileMode::Block,
.blockHeight = static_cast<u8>(1U << mode.blockHeightLog2),
.blockDepth = static_cast<u8>(1U << mode.blockDepthLog2),
};
}
renderTarget.view.reset();
}
void SetRenderTargetArrayMode(size_t index, maxwell3d::RenderTarget::ArrayMode mode) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.layerCount = mode.layerCount;
if (mode.volume)
throw exception("RT Array Volumes are not supported (with layer count = {})", mode.layerCount);
renderTarget.view.reset();
}
void SetRenderTargetLayerStride(size_t index, u32 layerStrideLsr2) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.layerStride = layerStrideLsr2 << 2;
renderTarget.view.reset();
}
void SetRenderTargetBaseLayer(size_t index, u32 baseArrayLayer) {
auto &renderTarget{renderTargets.at(index)};
renderTarget.guest.baseArrayLayer = baseArrayLayer;
if (baseArrayLayer > std::numeric_limits<u16>::max())
throw exception("Base array layer ({}) exceeds the range of array count ({}) (with layer count = {})", baseArrayLayer, std::numeric_limits<u16>::max(), renderTarget.guest.layerCount);
renderTarget.view.reset();
}
const TextureView *GetRenderTarget(size_t index) {
auto &renderTarget{renderTargets.at(index)};
if (renderTarget.disabled)
return nullptr;
else if (renderTarget.view)
return &*renderTarget.view;
if (renderTarget.guest.mappings.empty()) {
// TODO: Fill in mappings
return nullptr;
}
return &*(renderTarget.view = gpu.texture.FindOrCreate(renderTarget.guest));
}
void UpdateRenderTargetControl(maxwell3d::RenderTargetControl control) {
renderTargetControl = control;
}
/* Viewport Transforms */ /* Viewport Transforms */
/** /**
@ -55,6 +172,20 @@ namespace skyline::gpu::context {
viewport.maxDepth = scale + translate; // Counteract the subtraction of the maxDepth (p_z - o_z) by minDepth (o_z) for the host scale viewport.maxDepth = scale + translate; // Counteract the subtraction of the maxDepth (p_z - o_z) by minDepth (o_z) for the host scale
} }
/* Buffer Clears */
void UpdateClearColorValue(size_t index, u32 value) {
clearColorValue.uint32.at(index) = value;
}
void ClearBuffers(maxwell3d::ClearBuffers clear) {
auto renderTarget{GetRenderTarget(renderTargetControl.Map(clear.renderTargetId))};
if (renderTarget) {
std::lock_guard lock(*renderTarget->backing);
// TODO: Clear the buffer
}
}
/* Viewport Scissors */ /* Viewport Scissors */
void SetScissor(size_t index, std::optional<maxwell3d::Scissor> scissor) { void SetScissor(size_t index, std::optional<maxwell3d::Scissor> scissor) {

View File

@ -121,6 +121,6 @@ namespace skyline::gpu::memory {
VmaAllocationInfo allocationInfo; VmaAllocationInfo allocationInfo;
ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast<const VkImageCreateInfo &>(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo)); ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast<const VkImageCreateInfo &>(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo));
return Image(reinterpret_cast<u8 *>(allocationInfo.pMappedData), vmaAllocator, image, allocation); return Image(vmaAllocator, image, allocation);
} }
} }

View File

@ -114,8 +114,8 @@ namespace skyline::gpu {
if (swapchainFormat != format) { if (swapchainFormat != format) {
auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)}; auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)};
if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{format, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{*format, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end())
throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(format), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(*format), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear));
} }
constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst}; constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst};
@ -125,7 +125,7 @@ namespace skyline::gpu {
vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{
.surface = **vkSurface, .surface = **vkSurface,
.minImageCount = minImageCount, .minImageCount = minImageCount,
.imageFormat = format, .imageFormat = *format,
.imageColorSpace = vk::ColorSpaceKHR::eSrgbNonlinear, .imageColorSpace = vk::ColorSpaceKHR::eSrgbNonlinear,
.imageExtent = extent, .imageExtent = extent,
.imageArrayLayers = 1, .imageArrayLayers = 1,
@ -142,7 +142,7 @@ namespace skyline::gpu {
for (size_t index{}; index < vkImages.size(); index++) { for (size_t index{}; index < vkImages.size(); index++) {
auto &slot{images[index]}; auto &slot{images[index]};
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal); slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal);
slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
} }
for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++) for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++)
@ -235,7 +235,11 @@ namespace skyline::gpu {
} }
std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max()); std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max());
images.at(nextImage.second)->CopyFrom(texture); images.at(nextImage.second)->CopyFrom(texture, vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
});
if (timestamp) { if (timestamp) {
// If the timestamp is specified, we need to convert it from the util::GetTimeNs base to the CLOCK_MONOTONIC one // If the timestamp is specified, we need to convert it from the util::GetTimeNs base to the CLOCK_MONOTONIC one

View File

@ -6,15 +6,17 @@
#include "texture.h" #include "texture.h"
namespace skyline::gpu::format { namespace skyline::gpu::format {
using Format = gpu::texture::Format; using Format = gpu::texture::FormatBase;
using vkf = vk::Format;
using vka = vk::ImageAspectFlagBits;
constexpr Format RGBA8888Unorm{sizeof(u8) * 4, 1, 1, vk::Format::eR8G8B8A8Unorm}; //!< 8-bits per channel 4-channel pixels constexpr Format RGBA8888Unorm{sizeof(u8) * 4, 1, 1, vkf::eR8G8B8A8Unorm, vka::eColor}; //!< 8-bits per channel 4-channel pixels
constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vkf::eR5G6B5UnormPack16, vka::eColor}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit
/** /**
* @brief Converts a Vulkan format to a Skyline format * @brief Converts a Vulkan format to a Skyline format
*/ */
constexpr const Format &GetFormat(vk::Format format) { constexpr gpu::texture::Format GetFormat(vk::Format format) {
switch (format) { switch (format) {
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
return RGBA8888Unorm; return RGBA8888Unorm;

View File

@ -7,66 +7,48 @@
#include "texture.h" #include "texture.h"
namespace skyline::gpu { namespace skyline::gpu {
GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} Texture::Texture(GPU &gpu, BackingType &&backing, GuestTexture guest, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mipLevels(mipLevels), layerCount(layerCount), sampleCount(sampleCount) {
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::Image backing, texture::Dimensions pDimensions, const texture::Format &pFormat, std::optional<vk::ImageTiling> tiling, vk::ImageLayout layout, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, backing, shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout layout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, std::move(backing), shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
std::shared_ptr<Texture> GuestTexture::CreateTexture(vk::ImageUsageFlags usage, std::optional<vk::ImageTiling> pTiling, vk::ImageLayout initialLayout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
pDimensions = pDimensions ? pDimensions : dimensions;
const auto &lFormat{pFormat ? pFormat : format};
auto tiling{pTiling ? *pTiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear};
vk::ImageCreateInfo imageCreateInfo{
.imageType = pDimensions.GetType(),
.format = lFormat,
.extent = pDimensions,
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.tiling = tiling,
.usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &state.gpu->vkQueueFamilyIndex,
.initialLayout = initialLayout,
};
auto sharedHost{std::make_shared<Texture>(*state.gpu, tiling != vk::ImageTiling::eLinear ? state.gpu->memory.AllocateImage(imageCreateInfo) : state.gpu->memory.AllocateMappedImage(imageCreateInfo), shared_from_this(), pDimensions, lFormat, initialLayout, tiling, swizzle)};
host = sharedHost;
return sharedHost;
}
Texture::Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) {
if (GetBacking()) if (GetBacking())
SynchronizeHost(); SynchronizeHost();
} }
Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), guest(nullptr), dimensions(dimensions), format(format), layout(layout), tiling(tiling), mapping(mapping) {} Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), dimensions(dimensions), format(format), layout(layout), tiling(tiling), mipLevels(mipLevels), layerCount(layerCount), sampleCount(sampleCount) {}
Texture::Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), guest(nullptr), dimensions(dimensions), format(format), layout(initialLayout), tiling(tiling), mapping(mapping) { Texture::Texture(GPU &pGpu, GuestTexture pGuest)
: gpu(pGpu),
guest(std::move(pGuest)),
dimensions(guest->dimensions),
format(guest->format),
layout(vk::ImageLayout::eGeneral),
tiling((guest->tileConfig.mode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear),
mipLevels(1),
layerCount(guest->layerCount),
sampleCount(vk::SampleCountFlagBits::e1) {
vk::ImageCreateInfo imageCreateInfo{
.imageType = guest->dimensions.GetType(),
.format = *guest->format,
.extent = guest->dimensions,
.mipLevels = 1,
.arrayLayers = guest->layerCount,
.samples = vk::SampleCountFlagBits::e1,
.tiling = tiling,
.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
.initialLayout = layout,
};
backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo);
}
Texture::Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), dimensions(dimensions), format(format), layout(initialLayout), tiling(tiling), mipLevels(mipLevels), layerCount(layerCount), sampleCount(sampleCount) {
vk::ImageCreateInfo imageCreateInfo{ vk::ImageCreateInfo imageCreateInfo{
.imageType = dimensions.GetType(), .imageType = dimensions.GetType(),
.format = format, .format = *format,
.extent = dimensions, .extent = dimensions,
.mipLevels = 1, .mipLevels = mipLevels,
.arrayLayers = 1, .arrayLayers = layerCount,
.samples = vk::SampleCountFlagBits::e1, .samples = sampleCount,
.tiling = tiling, .tiling = tiling,
.usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive, .sharingMode = vk::SharingMode::eExclusive,
@ -132,10 +114,14 @@ namespace skyline::gpu {
void Texture::SynchronizeHost() { void Texture::SynchronizeHost() {
if (!guest) if (!guest)
throw exception("Synchronization of host textures requires a valid guest texture to synchronize from"); throw exception("Synchronization of host textures requires a valid guest texture to synchronize from");
else if (guest->mappings.size() != 1)
throw exception("Synchronization of non-contigious textures is not supported");
else if (guest->dimensions != dimensions)
throw exception("Guest and host dimensions being different is not supported currently");
TRACE_EVENT("gpu", "Texture::SynchronizeHost"); TRACE_EVENT("gpu", "Texture::SynchronizeHost");
auto pointer{guest->pointer}; auto pointer{guest->mappings[0].data()};
auto size{format.GetSize(dimensions)}; auto size{format->GetSize(dimensions)};
u8 *bufferData; u8 *bufferData;
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> { auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
@ -154,24 +140,24 @@ namespace skyline::gpu {
} }
}()}; }()};
if (guest->tileMode == texture::TileMode::Block) { if (guest->tileConfig.mode == texture::TileMode::Block) {
// Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32
constexpr u8 SectorWidth{16}; // The width of a sector in bytes constexpr u8 SectorWidth{16}; // The width of a sector in bytes
constexpr u8 SectorHeight{2}; // The height of a sector in lines constexpr u8 SectorHeight{2}; // The height of a sector in lines
constexpr u8 GobWidth{64}; // The width of a GOB in bytes constexpr u8 GobWidth{64}; // The width of a GOB in bytes
constexpr u8 GobHeight{8}; // The height of a GOB in lines constexpr u8 GobHeight{8}; // The height of a GOB in lines
auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs auto blockHeight{guest->tileConfig.blockHeight}; //!< The height of the blocks in GOBs
auto robHeight{GobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines auto robHeight{GobHeight * blockHeight}; //!< The height of a single ROB (Row of Blocks) in lines
auto surfaceHeight{dimensions.height / guest->format.blockHeight}; // The height of the surface in lines auto surfaceHeight{guest->dimensions.height / guest->format->blockHeight}; //!< The height of the surface in lines
auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks) auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; //!< The height of the surface in ROBs (Row Of Blocks)
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, GobWidth)}; // The width of a ROB in bytes auto robWidthBytes{util::AlignUp((guest->dimensions.width / guest->format->blockWidth) * guest->format->bpb, GobWidth)}; //!< The width of a ROB in bytes
auto robWidthBlocks{robWidthBytes / GobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) auto robWidthBlocks{robWidthBytes / GobWidth}; //!< The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes auto robBytes{robWidthBytes * robHeight}; //!< The size of a ROB in bytes
auto gobYOffset{robWidthBytes * GobHeight}; // The offset of the next Y-axis GOB from the current one in linear space auto gobYOffset{robWidthBytes * GobHeight}; //!< The offset of the next Y-axis GOB from the current one in linear space
auto inputSector{pointer}; // The address of the input sector auto inputSector{pointer}; //!< The address of the input sector
auto outputRob{bufferData}; // The address of the output block auto outputRob{bufferData}; //!< The address of the output block
for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs
auto outputBlock{outputRob}; // We iterate through a block independently of the ROB auto outputBlock{outputRob}; // We iterate through a block independently of the ROB
@ -195,24 +181,24 @@ namespace skyline::gpu {
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / GobHeight)); // Calculate the amount of Y GOBs which aren't padding blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / GobHeight)); // Calculate the amount of Y GOBs which aren't padding
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors paddingY = (guest->tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors
} }
} else if (guest->tileMode == texture::TileMode::Pitch) { } else if (guest->tileConfig.mode == texture::TileMode::Pitch) {
auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data auto sizeLine{guest->format->GetSize(guest->dimensions.width, 1)}; //!< The size of a single line of pixel data
auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data auto sizeStride{guest->format->GetSize(guest->tileConfig.pitch, 1)}; //!< The size of a single stride of pixel data
auto inputLine{pointer}; // The address of the input line auto inputLine{pointer}; //!< The address of the input line
auto outputLine{bufferData}; // The address of the output line auto outputLine{bufferData}; //!< The address of the output line
for (u32 line{}; line < dimensions.height; line++) { for (u32 line{}; line < guest->dimensions.height; line++) {
std::memcpy(outputLine, inputLine, sizeLine); std::memcpy(outputLine, inputLine, sizeLine);
inputLine += sizeStride; inputLine += sizeStride;
outputLine += sizeLine; outputLine += sizeLine;
} }
} else if (guest->tileMode == texture::TileMode::Linear) { } else if (guest->tileConfig.mode == texture::TileMode::Linear) {
std::memcpy(bufferData, pointer, size); std::memcpy(bufferData, pointer, size);
} }
if (stagingBuffer) { if (stagingBuffer) {
if (WaitOnBacking() && size != format.GetSize(dimensions)) if (WaitOnBacking() && size != format->GetSize(dimensions))
throw exception("Backing properties changing during sync is not supported"); throw exception("Backing properties changing during sync is not supported");
WaitOnFence(); WaitOnFence();
@ -269,6 +255,8 @@ namespace skyline::gpu {
void Texture::SynchronizeGuest() { void Texture::SynchronizeGuest() {
if (!guest) if (!guest)
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
else if (guest->mappings.size() != 1)
throw exception("Synchronization of non-contigious textures is not supported");
WaitOnBacking(); WaitOnBacking();
WaitOnFence(); WaitOnFence();
@ -277,7 +265,7 @@ namespace skyline::gpu {
// TODO: Write Host -> Guest Synchronization // TODO: Write Host -> Guest Synchronization
} }
void Texture::CopyFrom(std::shared_ptr<Texture> source) { void Texture::CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource) {
WaitOnBacking(); WaitOnBacking();
WaitOnFence(); WaitOnFence();
@ -302,11 +290,7 @@ namespace skyline::gpu {
.newLayout = vk::ImageLayout::eTransferSrcOptimal, .newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = { .subresourceRange = subresource,
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
}); });
} }
@ -320,26 +304,23 @@ namespace skyline::gpu {
.newLayout = vk::ImageLayout::eTransferDstOptimal, .newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = { .subresourceRange = subresource,
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
}); });
if (layout == vk::ImageLayout::eUndefined) if (layout == vk::ImageLayout::eUndefined)
layout = vk::ImageLayout::eTransferDstOptimal; layout = vk::ImageLayout::eTransferDstOptimal;
} }
vk::ImageSubresourceLayers subresourceLayers{
.aspectMask = subresource.aspectMask,
.mipLevel = subresource.baseMipLevel,
.baseArrayLayer = subresource.baseArrayLayer,
.layerCount = subresource.layerCount == VK_REMAINING_ARRAY_LAYERS ? layerCount - subresource.baseArrayLayer : subresource.layerCount,
};
for (; subresourceLayers.mipLevel < (subresource.levelCount == VK_REMAINING_MIP_LEVELS ? mipLevels - subresource.baseMipLevel : subresource.levelCount); subresourceLayers.mipLevel++)
commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{ commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{
.srcSubresource = { .srcSubresource = subresourceLayers,
.aspectMask = vk::ImageAspectFlagBits::eColor, .dstSubresource = subresourceLayers,
.layerCount = 1,
},
.dstSubresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.layerCount = 1,
},
.extent = dimensions, .extent = dimensions,
}); });
@ -352,11 +333,7 @@ namespace skyline::gpu {
.newLayout = layout, .newLayout = layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = { .subresourceRange = subresource,
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
}); });
if (layout != vk::ImageLayout::eTransferSrcOptimal) if (layout != vk::ImageLayout::eTransferSrcOptimal)
@ -368,13 +345,38 @@ namespace skyline::gpu {
.newLayout = source->layout, .newLayout = source->layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = { .subresourceRange = subresource,
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
}); });
}); });
cycle->AttachObjects(source, shared_from_this()); cycle->AttachObjects(std::move(source), shared_from_this());
}
TextureView::TextureView(std::shared_ptr<Texture> backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format, vk::ComponentMapping mapping) : backing(std::move(backing)), type(type), format(format), mapping(mapping), range(range) {}
vk::ImageView TextureView::GetView() {
/*
if (view)
return **view;
auto viewType{[&]() {
switch (backing->dimensions.GetType()) {
case vk::ImageType::e1D:
return range.layerCount > 1 ? vk::ImageViewType::e1DArray : vk::ImageViewType::e1D;
case vk::ImageType::e2D:
return range.layerCount > 1 ? vk::ImageViewType::e2DArray : vk::ImageViewType::e2D;
case vk::ImageType::e3D:
return vk::ImageViewType::e3D;
}
}()};
return *view.emplace(backing->gpu.vkDevice, vk::ImageViewCreateInfo{
.image = backing->GetBacking(),
.viewType = vk::ImageViewType::eCube,
.format = format ? *format : *backing->format,
.components = mapping,
.subresourceRange = range,
});
*/
throw exception("TODO: TextureView::GetView");
} }
} }

View File

@ -3,7 +3,7 @@
#pragma once #pragma once
#include <gpu/fence_cycle.h> #include <gpu/memory_manager.h>
namespace skyline::gpu { namespace skyline::gpu {
namespace texture { namespace texture {
@ -27,9 +27,9 @@ namespace skyline::gpu {
auto operator<=>(const Dimensions &) const = default; auto operator<=>(const Dimensions &) const = default;
constexpr vk::ImageType GetType() const { constexpr vk::ImageType GetType() const {
if (depth) if (depth > 1)
return vk::ImageType::e3D; return vk::ImageType::e3D;
else if (width) else if (height > 1)
return vk::ImageType::e2D; return vk::ImageType::e2D;
else else
return vk::ImageType::e1D; return vk::ImageType::e1D;
@ -61,11 +61,12 @@ namespace skyline::gpu {
/** /**
* @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed) * @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed)
*/ */
struct Format { struct FormatBase {
u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats
u16 blockHeight{}; //!< The height of a block in pixels u16 blockHeight{}; //!< The height of a block in pixels
u16 blockWidth{}; //!< The width of a block in pixels u16 blockWidth{}; //!< The width of a block in pixels
vk::Format vkFormat{vk::Format::eUndefined}; vk::Format vkFormat{vk::Format::eUndefined};
vk::ImageAspectFlags vkAspect{vk::ImageAspectFlagBits::eColor};
constexpr bool IsCompressed() const { constexpr bool IsCompressed() const {
return (blockHeight != 1) || (blockWidth != 1); return (blockHeight != 1) || (blockWidth != 1);
@ -85,11 +86,11 @@ namespace skyline::gpu {
return GetSize(dimensions.width, dimensions.height, dimensions.depth); return GetSize(dimensions.width, dimensions.height, dimensions.depth);
} }
constexpr bool operator==(const Format &format) const { constexpr bool operator==(const FormatBase &format) const {
return vkFormat == format.vkFormat; return vkFormat == format.vkFormat;
} }
constexpr bool operator!=(const Format &format) const { constexpr bool operator!=(const FormatBase &format) const {
return vkFormat != format.vkFormat; return vkFormat != format.vkFormat;
} }
@ -103,6 +104,38 @@ namespace skyline::gpu {
constexpr operator bool() const { constexpr operator bool() const {
return bpb; return bpb;
} }
/**
* @return If the supplied format is texel-layout compatible with the current format
*/
constexpr bool IsCompatible(const FormatBase &other) const {
return bpb == other.bpb && blockHeight == other.blockHeight && blockWidth == other.blockWidth;
}
};
/**
* @brief A wrapper around a pointer to underlying format metadata to prevent redundant copies
*/
class Format {
private:
const FormatBase *base;
public:
constexpr Format(const FormatBase &base) : base(&base) {}
constexpr Format() : base(nullptr) {}
constexpr const FormatBase *operator->() const {
return base;
}
constexpr const FormatBase &operator*() const {
return *base;
}
constexpr operator bool() const {
return base;
}
}; };
/** /**
@ -118,15 +151,28 @@ namespace skyline::gpu {
/** /**
* @brief The parameters of the tiling mode, covered in Table 76 in the Tegra X1 TRM * @brief The parameters of the tiling mode, covered in Table 76 in the Tegra X1 TRM
*/ */
union TileConfig { struct TileConfig {
TileMode mode;
union {
struct { struct {
u8 blockHeight; //!< The height of the blocks in GOBs u8 blockHeight; //!< The height of the blocks in GOBs
u8 blockDepth; //!< The depth of the blocks in GOBs u8 blockDepth; //!< The depth of the blocks in GOBs
u16 surfaceWidth; //!< The width of a surface in samples
}; };
u32 pitch; //!< The pitch of the texture if it's pitch linear u32 pitch; //!< The pitch of the texture if it's pitch linear
}; };
constexpr bool operator==(const TileConfig &other) const {
if (mode == other.mode)
if (mode == TileMode::Linear)
return true;
else if (mode == TileMode::Pitch)
return pitch == other.pitch;
else if (mode == TileMode::Block)
return blockHeight == other.blockHeight && blockDepth == other.blockDepth;
return false;
}
};
enum class SwizzleChannel : u8 { enum class SwizzleChannel : u8 {
Zero, //!< Write 0 to the channel Zero, //!< Write 0 to the channel
One, //!< Write 1 to the channel One, //!< Write 1 to the channel
@ -168,56 +214,73 @@ namespace skyline::gpu {
}; };
} }
}; };
/**
* @brief The type of a texture to determine the access patterns for it
* @note This is effectively the Tegra X1 texture types with the 1DBuffer + 2DNoMipmap removed as those are handled elsewhere
* @note We explicitly utilize Vulkan types here as it provides the most efficient conversion while not exposing Vulkan to the outer API
*/
enum class TextureType {
e1D = VK_IMAGE_VIEW_TYPE_1D,
e2D = VK_IMAGE_VIEW_TYPE_2D,
e3D = VK_IMAGE_VIEW_TYPE_3D,
eCube = VK_IMAGE_VIEW_TYPE_CUBE,
e1DArray = VK_IMAGE_VIEW_TYPE_1D_ARRAY,
e2DArray = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
eCubeArray = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
};
} }
class Texture; class Texture;
class PresentationEngine; //!< A forward declaration of PresentationEngine as we require it to be able to create a Texture object class PresentationEngine; //!< A forward declaration of PresentationEngine as we require it to be able to create a Texture object
/** /**
* @brief A texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host * @brief A descriptor for a texture present in guest memory, it can be used to create a corresponding Texture object for usage on the host
*/ */
class GuestTexture : public std::enable_shared_from_this<GuestTexture> { struct GuestTexture {
private: using Mappings = boost::container::small_vector<span < u8>, 3>;
const DeviceState &state;
public: Mappings mappings; //!< Spans to CPU memory for the underlying data backing this texture
u8 *pointer; //!< The address of the texture in guest memory
std::weak_ptr<Texture> host; //!< A host texture (if any) that was created from this guest texture
texture::Dimensions dimensions; texture::Dimensions dimensions;
texture::Format format; texture::Format format;
texture::TileMode tileMode;
texture::TileConfig tileConfig; texture::TileConfig tileConfig;
texture::TextureType type;
u16 baseArrayLayer;
u16 layerCount;
u32 layerStride; //!< An optional hint regarding the size of a single layer, it will be set to 0 when not available
GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {}); GuestTexture() {}
constexpr size_t Size() { GuestTexture(Mappings mappings, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) : mappings(mappings), dimensions(dimensions), format(format), tileConfig(tileConfig), type(type), baseArrayLayer(baseArrayLayer), layerCount(layerCount), layerStride(layerStride) {}
return format.GetSize(dimensions);
} GuestTexture(span <u8> mapping, texture::Dimensions dimensions, texture::Format format, texture::TileConfig tileConfig, texture::TextureType type, u16 baseArrayLayer = 0, u16 layerCount = 1, u32 layerStride = 0) : mappings(1, mapping), dimensions(dimensions), format(format), tileConfig(tileConfig), type(type), baseArrayLayer(baseArrayLayer), layerCount(layerCount), layerStride(layerStride) {}
};
class TextureManager;
/** /**
* @brief Creates a corresponding host texture object for this guest texture * @brief A view into a specific subresource of a Texture
* @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object
* @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture)
* @param format The format of the host texture (Defaults to the format of the guest texture)
* @param tiling The tiling used by the image on host, this is the same as guest by default
* @param layout The initial layout of the Vulkan Image, this is used for efficient layout management
* @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling)
* @return A shared pointer to the host texture object
* @note There can only be one host texture for a corresponding guest texture
* @note If any of the supplied parameters do not match up with the backing then it's undefined behavior
*/ */
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format &format = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {}); class TextureView {
private:
vk::raii::ImageView *view{};
public:
std::shared_ptr<Texture> backing;
vk::ImageViewType type;
texture::Format format;
vk::ComponentMapping mapping;
vk::ImageSubresourceRange range;
/** /**
* @note As a RAII object is used here, the lifetime of the backing is handled by the host texture * @param format A compatible format for the texture view (Defaults to the format of the backing texture)
*/ */
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format &format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); TextureView(std::shared_ptr<Texture> backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format = {}, vk::ComponentMapping mapping = {});
/** /**
* @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function * @return A Vulkan Image View that corresponds to the properties of this view
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
*/ */
std::shared_ptr<Texture> CreateTexture(vk::ImageUsageFlags usage = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format &format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); vk::ImageView GetView();
}; };
/** /**
@ -232,7 +295,30 @@ namespace skyline::gpu {
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>; using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
std::shared_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing std::shared_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing
friend TextureManager;
public:
std::optional<GuestTexture> guest;
texture::Dimensions dimensions;
texture::Format format;
vk::ImageLayout layout; vk::ImageLayout layout;
vk::ImageTiling tiling;
u32 mipLevels;
u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap)
vk::SampleCountFlagBits sampleCount;
Texture(GPU &gpu, BackingType &&backing, GuestTexture guest, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
Texture(GPU &gpu, GuestTexture guest);
/**
* @brief Creates and allocates memory for the backing to creates a texture object wrapping it
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
*/
Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
/** /**
* @note The handle returned is nullable and the appropriate precautions should be taken * @note The handle returned is nullable and the appropriate precautions should be taken
@ -245,23 +331,6 @@ namespace skyline::gpu {
}, backing); }, backing);
} }
public:
std::shared_ptr<GuestTexture> guest; //!< The guest texture from which this was created, it's required for syncing
texture::Dimensions dimensions;
texture::Format format;
vk::ImageTiling tiling;
vk::ComponentMapping mapping;
Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping);
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {});
/**
* @brief Creates and allocates memory for the backing to creates a texture object wrapping it
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
*/
Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {});
/** /**
* @brief Acquires an exclusive lock on the texture for the calling thread * @brief Acquires an exclusive lock on the texture for the calling thread
* @note Naming is in accordance to the BasicLockable named requirement * @note Naming is in accordance to the BasicLockable named requirement
@ -311,28 +380,11 @@ namespace skyline::gpu {
*/ */
void TransitionLayout(vk::ImageLayout layout); void TransitionLayout(vk::ImageLayout layout);
/**
* @brief Convert this texture to the specified tiling mode
* @param tileMode The tiling mode to convert it to
* @param tileConfig The configuration for the tiling mode (Can be default argument for Linear)
*/
void ConvertTileMode(texture::TileMode tileMode, texture::TileConfig tileConfig = {});
/**
* @brief Converts the texture dimensions to the specified ones (As long as they are within the GuestTexture's range)
*/
void SetDimensions(texture::Dimensions dimensions);
/** /**
* @brief Converts the texture to have the specified format * @brief Converts the texture to have the specified format
*/ */
void SetFormat(texture::Format format); void SetFormat(texture::Format format);
/**
* @brief Change the texture channel swizzle to the specified one
*/
void SetSwizzle(texture::Swizzle swizzle);
/** /**
* @brief Synchronizes the host texture with the guest after it has been modified * @brief Synchronizes the host texture with the guest after it has been modified
* @note The texture **must** be locked prior to calling this * @note The texture **must** be locked prior to calling this
@ -350,6 +402,10 @@ namespace skyline::gpu {
/** /**
* @brief Copies the contents of the supplied source texture into the current texture * @brief Copies the contents of the supplied source texture into the current texture
*/ */
void CopyFrom(std::shared_ptr<Texture> source); void CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = VK_REMAINING_MIP_LEVELS,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
});
}; };
} }

View File

@ -0,0 +1,84 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "texture_manager.h"
namespace skyline::gpu {
TextureManager::TextureManager(GPU &gpu) : gpu(gpu) {}
TextureView TextureManager::FindOrCreate(const GuestTexture &guestTexture) {
auto guestMapping{guestTexture.mappings.front()};
// Iterate over all textures that overlap with the first mapping of the guest texture and compare the mappings:
// 1) All mappings match up perfectly, we check that the rest of the supplied mappings correspond to mappings in the texture
// 1.1) If they match as well, we check for format/dimensions/tiling config matching the texture and return or move onto (3)
// 2) Only a contiguous range of mappings match, we check for if the overlap is meaningful with layout math, it can go two ways:
// 2.1) If there is a meaningful overlap, we check for format/dimensions/tiling config compatibility and return or move onto (3)
// 2.2) If there isn't, we move onto (3)
// 3) If there's another overlap we go back to (1) with it else we go to (4)
// 4) We check all the overlapping texture for if they're in the texture pool:
// 4.1) If they are, we do nothing to them
// 4.2) If they aren't, we delete them from the map
// 5) Create a new texture and insert it in the map then return it
std::shared_ptr<Texture> match{};
auto mappingEnd{std::upper_bound(textures.begin(), textures.end(), guestMapping)}, hostMapping{mappingEnd};
while (hostMapping != textures.begin() && std::prev(hostMapping)->end() > guestMapping.begin()) {
auto &hostMappings{hostMapping->texture->guest->mappings};
// We need to check that all corresponding mappings in the candidate texture and the guest texture match up
// Only the start of the first matched mapping and the end of the last mapping can not match up as this is the case for views
auto firstHostMapping{hostMapping->iterator};
auto lastGuestMapping{guestTexture.mappings.back()};
auto lastHostMapping{std::find_if(firstHostMapping, hostMappings.end(), [&lastGuestMapping](const span<u8> &it) {
return lastGuestMapping.begin() >= it.begin() && lastGuestMapping.size() <= it.size();
})};
bool mappingMatch{std::equal(firstHostMapping, lastHostMapping, guestTexture.mappings.begin(), guestTexture.mappings.end(), [](const span<u8> &lhs, const span<u8> &rhs) {
return lhs.end() == rhs.end(); // We check end() here to implicitly ignore any offset from the first mapping
})};
if (firstHostMapping == hostMappings.begin() && firstHostMapping->begin() == guestMapping.begin() && mappingMatch && lastHostMapping == std::prev(hostMappings.end()) && lastGuestMapping.end() == lastHostMapping->end()) {
// We've gotten a perfect 1:1 match for *all* mappings from the start to end, we just need to check for compatibility aside from this
auto &matchGuestTexture{*hostMapping->texture->guest};
if (matchGuestTexture.format->IsCompatible(*guestTexture.format) && matchGuestTexture.dimensions == guestTexture.dimensions && matchGuestTexture.tileConfig == guestTexture.tileConfig) {
auto &texture{hostMapping->texture};
return TextureView(texture, static_cast<vk::ImageViewType>(guestTexture.type), vk::ImageSubresourceRange{
.aspectMask = guestTexture.format->vkAspect,
.levelCount = texture->mipLevels,
.layerCount = texture->layerCount,
}, guestTexture.format);
}
} else if (mappingMatch) {
// We've gotten a partial match with a certain subset of contiguous mappings matching, we need to check if this is a meaningful overlap
if (false) {
// TODO: Layout Checks + Check match against Base Layer in TIC
auto &texture{hostMapping->texture};
return TextureView(texture, static_cast<vk::ImageViewType>(guestTexture.type), vk::ImageSubresourceRange{
.aspectMask = guestTexture.format->vkAspect,
.levelCount = texture->mipLevels,
.layerCount = texture->layerCount,
}, guestTexture.format);
}
}
}
// Create a texture as we cannot find one that matches
auto texture{std::make_shared<Texture>(gpu, guestTexture)};
auto it{texture->guest->mappings.begin()};
textures.emplace(mappingEnd, TextureMapping{texture, it, guestMapping});
while ((++it) != texture->guest->mappings.end()) {
guestMapping = *it;
mappingEnd = hostMapping = std::upper_bound(textures.begin(), textures.end(), guestMapping);
while (hostMapping != textures.begin() && std::prev(hostMapping)->end() > guestMapping.begin()) {
// TODO: Delete textures not in texture pool
}
textures.emplace(mappingEnd, TextureMapping{texture, it, guestMapping});
}
return TextureView(texture, static_cast<vk::ImageViewType>(guestTexture.type), vk::ImageSubresourceRange{
.aspectMask = guestTexture.format->vkAspect,
.levelCount = texture->mipLevels,
.layerCount = texture->layerCount,
}, guestTexture.format);
}
}

View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "texture/texture.h"
#include <random>
namespace skyline::gpu {
/**
* @brief The Texture Manager is responsible for maintaining a global view of textures being mapped from the guest to the host, any lookups and creation of host texture from equivalent guest textures alongside reconciliation of any overlaps with existing textures
*/
class TextureManager {
private:
/**
* @brief A single contiguous mapping of a texture in the CPU address space
*/
struct TextureMapping : span<u8> {
std::shared_ptr<Texture> texture;
GuestTexture::Mappings::iterator iterator; //!< An iterator to the mapping in the texture's GuestTexture corresponding to this mapping
template<typename... Args>
TextureMapping(std::shared_ptr<Texture> texture, GuestTexture::Mappings::iterator iterator, Args &&... args) : span<u8>(std::forward<Args>(args)...), texture(std::move(texture)), iterator(iterator) {}
};
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the texture mappings
std::vector<TextureMapping> textures; //!< A sorted vector of all texture mappings
bool IsSizeCompatible(texture::Dimensions lhsDimension, texture::TileConfig lhsConfig, texture::Dimensions rhsDimension, texture::TileConfig rhsConfig) {
return lhsDimension == rhsDimension && lhsConfig == rhsConfig;
}
public:
TextureManager(GPU &gpu);
/**
* @return A pre-existing or newly created Texture object which matches the specified criteria
*/
TextureView FindOrCreate(const GuestTexture &guestTexture);
};
}

View File

@ -69,12 +69,8 @@ namespace skyline::service::hosbinder {
for (auto &slot : queue) { for (auto &slot : queue) {
slot.state = BufferState::Free; slot.state = BufferState::Free;
slot.frameNumber = std::numeric_limits<u32>::max(); slot.frameNumber = std::numeric_limits<u32>::max();
if (std::exchange(slot.texture, {}))
if (slot.texture) {
slot.texture = {};
FreeGraphicBufferNvMap(*slot.graphicBuffer); FreeGraphicBufferNvMap(*slot.graphicBuffer);
}
slot.graphicBuffer = nullptr; slot.graphicBuffer = nullptr;
} }
} else if (preallocatedBufferCount < count) { } else if (preallocatedBufferCount < count) {
@ -172,12 +168,8 @@ namespace skyline::service::hosbinder {
bufferSlot.state = BufferState::Free; bufferSlot.state = BufferState::Free;
bufferSlot.frameNumber = std::numeric_limits<u32>::max(); bufferSlot.frameNumber = std::numeric_limits<u32>::max();
if (std::exchange(bufferSlot.texture, {}))
if (bufferSlot.texture) {
bufferSlot.texture = {};
FreeGraphicBufferNvMap(*bufferSlot.graphicBuffer); FreeGraphicBufferNvMap(*bufferSlot.graphicBuffer);
}
bufferSlot.graphicBuffer = nullptr; bufferSlot.graphicBuffer = nullptr;
bufferEvent->Signal(); bufferEvent->Signal();
@ -201,12 +193,8 @@ namespace skyline::service::hosbinder {
bufferSlot->state = BufferState::Free; bufferSlot->state = BufferState::Free;
bufferSlot->frameNumber = std::numeric_limits<u32>::max(); bufferSlot->frameNumber = std::numeric_limits<u32>::max();
if (std::exchange(bufferSlot->texture, {}))
if (bufferSlot->texture) {
bufferSlot->texture = {};
FreeGraphicBufferNvMap(*bufferSlot->graphicBuffer); FreeGraphicBufferNvMap(*bufferSlot->graphicBuffer);
}
graphicBuffer = *std::exchange(bufferSlot->graphicBuffer, nullptr); graphicBuffer = *std::exchange(bufferSlot->graphicBuffer, nullptr);
fence = AndroidFence{}; fence = AndroidFence{};
@ -226,11 +214,6 @@ namespace skyline::service::hosbinder {
} }
} }
if (bufferSlot->texture) {
bufferSlot->texture = {};
FreeGraphicBufferNvMap(*bufferSlot->graphicBuffer);
}
if (bufferSlot == queue.end()) { if (bufferSlot == queue.end()) {
state.logger->Warn("Could not find any free slots to attach the graphic buffer to"); state.logger->Warn("Could not find any free slots to attach the graphic buffer to");
return AndroidStatus::NoMemory; return AndroidStatus::NoMemory;
@ -258,6 +241,8 @@ namespace skyline::service::hosbinder {
bufferSlot->state = BufferState::Dequeued; bufferSlot->state = BufferState::Dequeued;
bufferSlot->wasBufferRequested = true; bufferSlot->wasBufferRequested = true;
bufferSlot->isPreallocated = false; bufferSlot->isPreallocated = false;
if (std::exchange(bufferSlot->texture, {}))
FreeGraphicBufferNvMap(*bufferSlot->graphicBuffer);
bufferSlot->graphicBuffer = std::make_unique<GraphicBuffer>(graphicBuffer); bufferSlot->graphicBuffer = std::make_unique<GraphicBuffer>(graphicBuffer);
slot = std::distance(queue.begin(), bufferSlot); slot = std::distance(queue.begin(), bufferSlot);
@ -342,26 +327,24 @@ namespace skyline::service::hosbinder {
if (surface.size > (nvMapHandleObj->origSize - surface.offset)) if (surface.size > (nvMapHandleObj->origSize - surface.offset))
throw exception("Surface doesn't fit into NvMap mapping of size 0x{:X} when mapped at 0x{:X} -> 0x{:X}", nvMapHandleObj->origSize, surface.offset, surface.offset + surface.size); throw exception("Surface doesn't fit into NvMap mapping of size 0x{:X} when mapped at 0x{:X} -> 0x{:X}", nvMapHandleObj->origSize, surface.offset, surface.offset + surface.size);
gpu::texture::TileMode tileMode;
gpu::texture::TileConfig tileConfig{}; gpu::texture::TileConfig tileConfig{};
if (surface.layout == NvSurfaceLayout::Blocklinear) { if (surface.layout == NvSurfaceLayout::Blocklinear) {
tileMode = gpu::texture::TileMode::Block;
tileConfig = { tileConfig = {
.surfaceWidth = static_cast<u16>(surface.width), .mode = gpu::texture::TileMode::Block,
.blockHeight = static_cast<u8>(1U << surface.blockHeightLog2), .blockHeight = static_cast<u8>(1U << surface.blockHeightLog2),
.blockDepth = 1, .blockDepth = 1,
}; };
} else if (surface.layout == NvSurfaceLayout::Pitch) { } else if (surface.layout == NvSurfaceLayout::Pitch) {
tileMode = gpu::texture::TileMode::Pitch;
tileConfig = { tileConfig = {
.mode = gpu::texture::TileMode::Pitch,
.pitch = surface.pitch, .pitch = surface.pitch,
}; };
} else if (surface.layout == NvSurfaceLayout::Tiled) { } else if (surface.layout == NvSurfaceLayout::Tiled) {
throw exception("Legacy 16Bx16 tiled surfaces are not supported"); throw exception("Legacy 16Bx16 tiled surfaces are not supported");
} }
auto guestTexture{std::make_shared<gpu::GuestTexture>(state, nvMapHandleObj->GetPointer() + surface.offset, gpu::texture::Dimensions(surface.width, surface.height), format, tileMode, tileConfig)}; gpu::GuestTexture guestTexture(span<u8>(nvMapHandleObj->GetPointer() + surface.offset, surface.size), gpu::texture::Dimensions(surface.width, surface.height), format, tileConfig, gpu::texture::TextureType::e2D);
buffer.texture = guestTexture->CreateTexture({}, vk::ImageTiling::eLinear); buffer.texture = state.gpu->texture.FindOrCreate(guestTexture).backing;
} }
switch (transform) { switch (transform) {
@ -545,12 +528,8 @@ namespace skyline::service::hosbinder {
for (auto &slot : queue) { for (auto &slot : queue) {
slot.state = BufferState::Free; slot.state = BufferState::Free;
slot.frameNumber = std::numeric_limits<u32>::max(); slot.frameNumber = std::numeric_limits<u32>::max();
if (std::exchange(slot.texture, {}))
if (slot.texture) {
slot.texture = {};
FreeGraphicBufferNvMap(*slot.graphicBuffer); FreeGraphicBufferNvMap(*slot.graphicBuffer);
}
slot.graphicBuffer = nullptr; slot.graphicBuffer = nullptr;
} }
@ -566,14 +545,11 @@ namespace skyline::service::hosbinder {
} }
auto &buffer{queue[slot]}; auto &buffer{queue[slot]};
if (buffer.texture) {
buffer.texture = {};
FreeGraphicBufferNvMap(*buffer.graphicBuffer);
}
buffer.state = BufferState::Free; buffer.state = BufferState::Free;
buffer.frameNumber = 0; buffer.frameNumber = 0;
buffer.wasBufferRequested = false; buffer.wasBufferRequested = false;
if (std::exchange(buffer.texture, {}))
FreeGraphicBufferNvMap(*buffer.graphicBuffer);
buffer.isPreallocated = graphicBuffer != nullptr; buffer.isPreallocated = graphicBuffer != nullptr;
buffer.graphicBuffer = graphicBuffer ? std::make_unique<GraphicBuffer>(*graphicBuffer) : nullptr; buffer.graphicBuffer = graphicBuffer ? std::make_unique<GraphicBuffer>(*graphicBuffer) : nullptr;

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once #pragma once
@ -28,6 +29,44 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
MethodReplay = 3, //!< Replays older tracked writes for any new writes to registers, discarding the contents of the new write MethodReplay = 3, //!< Replays older tracked writes for any new writes to registers, discarding the contents of the new write
}; };
constexpr static size_t RenderTargetCount{8}; //!< Maximum amount of render targets that can be bound at once on Maxwell 3D
/**
* @brief The target image's metadata for any rendering operations
* @note Any render target with ColorFormat::None as their format are effectively disabled
*/
struct RenderTarget {
Address address;
u32 width;
u32 height;
enum class ColorFormat : u32 {
None = 0x0,
R8G8B8A8Unorm = 0xD5,
} format;
struct TileMode {
u8 blockWidthLog2 : 4; //!< The width of a block in GOBs with log2 encoding, this is always assumed to be 1 as it is the only configuration the X1 supports
u8 blockHeightLog2 : 4; //!< The height of a block in GOBs with log2 encoding
u8 blockDepthLog2 : 4; //!< The depth of a block in GOBs with log2 encoding
bool isLinear : 1;
u8 _pad0_ : 3;
bool is3d : 1;
u16 _pad1_ : 15;
} tileMode;
struct ArrayMode {
u16 layerCount;
bool volume : 1;
u16 _pad_ : 15;
} arrayMode;
u32 layerStrideLsr2; //!< The length of the stride of a layer shifted right by 2 bits
u32 baseLayer;
u32 _pad_[0x7];
};
static_assert(sizeof(RenderTarget) == (0x10 * sizeof(u32)));
constexpr static size_t ViewportCount{16}; //!< Amount of viewports on Maxwell 3D, array size for any per-viewport parameter such as transform, scissors, etc constexpr static size_t ViewportCount{16}; //!< Amount of viewports on Maxwell 3D, array size for any per-viewport parameter such as transform, scissors, etc
/** /**
@ -120,7 +159,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
u16 minimum; //!< The lower bound of the masked region in a dimension u16 minimum; //!< The lower bound of the masked region in a dimension
u16 maximum; //!< The higher bound of the masked region in a dimension u16 maximum; //!< The higher bound of the masked region in a dimension
} horizontal, vertical; } horizontal, vertical;
u32 next; u32 _pad_;
}; };
static_assert(sizeof(Scissor) == (0x4 * sizeof(u32))); static_assert(sizeof(Scissor) == (0x4 * sizeof(u32)));
@ -168,6 +207,45 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
}; };
static_assert(sizeof(VertexAttribute) == sizeof(u32)); static_assert(sizeof(VertexAttribute) == sizeof(u32));
/**
* @brief A descriptor that controls how the RenderTarget array (at 0x200) will be interpreted
*/
struct RenderTargetControl {
u8 count : 4; //!< The amount of active render targets, doesn't necessarily mean bound
u8 map0 : 3; //!< The index of the render target that maps to slot 0
u8 map1 : 3;
u8 map2 : 3;
u8 map3 : 3;
u8 map4 : 3;
u8 map5 : 3;
u8 map6 : 3;
u8 map7 : 3;
size_t Map(size_t index) {
switch (index) {
case 0:
return map0;
case 1:
return map1;
case 2:
return map2;
case 3:
return map3;
case 4:
return map4;
case 5:
return map5;
case 6:
return map6;
case 7:
return map7;
default:
throw exception("Invalid RT index is being mapped: {}", index);
}
}
};
static_assert(sizeof(RenderTargetControl) == sizeof(u32));
enum class CompareOp : u32 { enum class CompareOp : u32 {
Never = 1, Never = 1,
Less = 2, Less = 2,
@ -292,6 +370,22 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type {
}; };
static_assert(sizeof(ColorWriteMask) == sizeof(u32)); static_assert(sizeof(ColorWriteMask) == sizeof(u32));
/**
* @brief A method call which causes a layer of an RT to be cleared with a channel mask
*/
struct ClearBuffers {
bool depth : 1; //!< If the depth channel should be cleared
bool stencil : 1;
bool red : 1;
bool green : 1;
bool blue : 1;
bool alpha : 1;
u8 renderTargetId : 4; //!< The ID of the render target to clear
u16 layerId : 11; //!< The index of the layer of the render target to clear
u16 _pad_ : 10;
};
static_assert(sizeof(ClearBuffers) == sizeof(u32));
struct SemaphoreInfo { struct SemaphoreInfo {
enum class Op : u8 { enum class Op : u8 {
Release = 0, Release = 0,

View File

@ -1,6 +1,8 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#include <boost/preprocessor/repeat.hpp>
#include <soc.h> #include <soc.h>
namespace skyline::soc::gm20b::engine::maxwell3d { namespace skyline::soc::gm20b::engine::maxwell3d {
@ -76,7 +78,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument); state.logger->Debug("Called method in Maxwell 3D: 0x{:X} args: 0x{:X}", method, argument);
// Methods that are greater than the register size are for macro control // Methods that are greater than the register size are for macro control
if (method > RegisterCount) [[unlikely]] { if (method >= RegisterCount) [[unlikely]] {
// Starting a new macro at index 'method - RegisterCount' // Starting a new macro at index 'method - RegisterCount'
if (!(method & 1)) { if (!(method & 1)) {
if (macroInvocation.index != -1) { if (macroInvocation.index != -1) {
@ -109,6 +111,12 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
else if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodReplay) else if (shadowRegisters.mme.shadowRamControl == type::MmeShadowRamControl::MethodReplay)
argument = shadowRegisters.raw[method]; argument = shadowRegisters.raw[method];
#define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
#define MAXWELL3D_STRUCT_OFFSET(field, member) U32_OFFSET(Registers, field) + offsetof(typeof(Registers::field), member)
#define MAXWELL3D_ARRAY_OFFSET(field, index) U32_OFFSET(Registers, field) + ((sizeof(typeof(Registers::field[0])) / sizeof(u32)) * index)
#define MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) MAXWELL3D_ARRAY_OFFSET(field, index) + U32_OFFSET(typeof(Registers::field[0]), member)
#define MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(field, index, member, submember) MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) + U32_OFFSET(typeof(Registers::field[0].member), submember)
switch (method) { switch (method) {
case MAXWELL3D_OFFSET(mme.instructionRamLoad): case MAXWELL3D_OFFSET(mme.instructionRamLoad):
if (registers.mme.instructionRamPointer >= macroCode.size()) if (registers.mme.instructionRamPointer >= macroCode.size())
@ -137,7 +145,43 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment(); state.soc->host1x.syncpoints.at(registers.syncpointAction.id).Increment();
break; break;
#define VIEWPORT_TRANSFORM_CALLBACKS(index) \ #define RENDER_TARGET_ARRAY(z, index, data) \
case MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(renderTargets, index, address, high): \
context.SetRenderTargetAddressHigh(index, argument); \
break; \
case MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(renderTargets, index, address, low): \
context.SetRenderTargetAddressLow(index, argument); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, width): \
context.SetRenderTargetAddressWidth(index, argument); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, height): \
context.SetRenderTargetAddressHeight(index, argument); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, format): \
context.SetRenderTargetAddressFormat(index, \
static_cast<type::RenderTarget::ColorFormat>(argument)); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, tileMode): \
context.SetRenderTargetTileMode(index, \
*reinterpret_cast<type::RenderTarget::TileMode*>(&argument)); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, arrayMode): \
context.SetRenderTargetArrayMode(index, \
*reinterpret_cast<type::RenderTarget::ArrayMode*>(&argument)); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, layerStrideLsr2): \
context.SetRenderTargetLayerStride(index, argument); \
break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(renderTargets, index, baseLayer): \
context.SetRenderTargetBaseLayer(index, argument); \
break;
BOOST_PP_REPEAT(8, RENDER_TARGET_ARRAY, 0)
static_assert(type::RenderTargetCount == 8 && type::RenderTargetCount < BOOST_PP_LIMIT_REPEAT);
#undef RENDER_TARGET_ARRAY
#define VIEWPORT_TRANSFORM_CALLBACKS(z, index, data) \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleX): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleX): \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateX): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateX): \
context.SetViewportX(index, registers.viewportTransforms[index].scaleX, registers.viewportTransforms[index].translateX); \ context.SetViewportX(index, registers.viewportTransforms[index].scaleX, registers.viewportTransforms[index].translateX); \
@ -149,29 +193,22 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleZ): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, scaleZ): \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateZ): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(viewportTransforms, index, translateZ): \
context.SetViewportZ(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \ context.SetViewportZ(index, registers.viewportTransforms[index].scaleY, registers.viewportTransforms[index].translateY); \
break break;
VIEWPORT_TRANSFORM_CALLBACKS(0); BOOST_PP_REPEAT(16, VIEWPORT_TRANSFORM_CALLBACKS, 0)
VIEWPORT_TRANSFORM_CALLBACKS(1); static_assert(type::ViewportCount == 16 && type::ViewportCount < BOOST_PP_LIMIT_REPEAT);
VIEWPORT_TRANSFORM_CALLBACKS(2);
VIEWPORT_TRANSFORM_CALLBACKS(3);
VIEWPORT_TRANSFORM_CALLBACKS(4);
VIEWPORT_TRANSFORM_CALLBACKS(5);
VIEWPORT_TRANSFORM_CALLBACKS(6);
VIEWPORT_TRANSFORM_CALLBACKS(7);
VIEWPORT_TRANSFORM_CALLBACKS(8);
VIEWPORT_TRANSFORM_CALLBACKS(9);
VIEWPORT_TRANSFORM_CALLBACKS(10);
VIEWPORT_TRANSFORM_CALLBACKS(11);
VIEWPORT_TRANSFORM_CALLBACKS(12);
VIEWPORT_TRANSFORM_CALLBACKS(13);
VIEWPORT_TRANSFORM_CALLBACKS(14);
VIEWPORT_TRANSFORM_CALLBACKS(15);
static_assert(type::ViewportCount == 16);
#undef VIEWPORT_TRANSFORM_CALLBACKS #undef VIEWPORT_TRANSFORM_CALLBACKS
#define SCISSOR_CALLBACKS(index) \ #define COLOR_CLEAR_CALLBACKS(z, index, data) \
case MAXWELL3D_ARRAY_OFFSET(clearColorValue, index): \
context.UpdateClearColorValue(index, argument); \
break;
BOOST_PP_REPEAT(4, COLOR_CLEAR_CALLBACKS, 0)
static_assert(4 < BOOST_PP_LIMIT_REPEAT);
#undef COLOR_CLEAR_CALLBACKS
#define SCISSOR_CALLBACKS(z, index, data) \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, enable): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, enable): \
context.SetScissor(index, argument ? registers.scissors[index] : std::optional<type::Scissor>{}); \ context.SetScissor(index, argument ? registers.scissors[index] : std::optional<type::Scissor>{}); \
break; \ break; \
@ -180,28 +217,20 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
break; \ break; \
case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, vertical): \ case MAXWELL3D_ARRAY_STRUCT_OFFSET(scissors, index, vertical): \
context.SetScissorVertical(index, registers.scissors[index].vertical); \ context.SetScissorVertical(index, registers.scissors[index].vertical); \
break break;
SCISSOR_CALLBACKS(0); BOOST_PP_REPEAT(16, SCISSOR_CALLBACKS, 0)
SCISSOR_CALLBACKS(1); static_assert(type::ViewportCount == 16 && type::ViewportCount < BOOST_PP_LIMIT_REPEAT);
SCISSOR_CALLBACKS(2);
SCISSOR_CALLBACKS(3);
SCISSOR_CALLBACKS(4);
SCISSOR_CALLBACKS(5);
SCISSOR_CALLBACKS(6);
SCISSOR_CALLBACKS(7);
SCISSOR_CALLBACKS(8);
SCISSOR_CALLBACKS(9);
SCISSOR_CALLBACKS(10);
SCISSOR_CALLBACKS(11);
SCISSOR_CALLBACKS(12);
SCISSOR_CALLBACKS(13);
SCISSOR_CALLBACKS(14);
SCISSOR_CALLBACKS(15);
static_assert(type::ViewportCount == 16);
#undef SCISSOR_CALLBACKS #undef SCISSOR_CALLBACKS
case MAXWELL3D_OFFSET(renderTargetControl):
context.UpdateRenderTargetControl(registers.renderTargetControl);
break;
case MAXWELL3D_OFFSET(clearBuffers):
context.ClearBuffers(registers.clearBuffers);
break;
case MAXWELL3D_OFFSET(semaphore.info): case MAXWELL3D_OFFSET(semaphore.info):
switch (registers.semaphore.info.op) { switch (registers.semaphore.info.op) {
case type::SemaphoreInfo::Op::Release: case type::SemaphoreInfo::Op::Release:
@ -233,6 +262,12 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
default: default:
break; break;
} }
#undef MAXWELL3D_OFFSET
#undef MAXWELL3D_STRUCT_OFFSET
#undef MAXWELL3D_ARRAY_OFFSET
#undef MAXWELL3D_ARRAY_STRUCT_OFFSET
#undef MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET
} }
void Maxwell3D::WriteSemaphoreResult(u64 result) { void Maxwell3D::WriteSemaphoreResult(u64 result) {

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d)
#pragma once #pragma once
@ -7,12 +8,6 @@
#include "engine.h" #include "engine.h"
#include "maxwell/macro_interpreter.h" #include "maxwell/macro_interpreter.h"
#define MAXWELL3D_OFFSET(field) U32_OFFSET(Registers, field)
#define MAXWELL3D_STRUCT_OFFSET(field, member) U32_OFFSET(Registers, field) + offsetof(typeof(Registers::field), member)
#define MAXWELL3D_ARRAY_OFFSET(field, index) U32_OFFSET(Registers, field) + ((sizeof(typeof(Registers::field[0])) / sizeof(u32)) * index)
#define MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) MAXWELL3D_ARRAY_OFFSET(field, index) + U32_OFFSET(typeof(Registers::field[0]), member)
#define MAXWELL3D_ARRAY_STRUCT_STRUCT_OFFSET(field, index, member, submember) MAXWELL3D_ARRAY_STRUCT_OFFSET(field, index, member) + U32_OFFSET(typeof(Registers::field[0].member), submember)
namespace skyline::gpu::context { namespace skyline::gpu::context {
class GraphicsContext; class GraphicsContext;
} }
@ -78,10 +73,16 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 _pad3_[0x2C]; // 0xB3 u32 _pad3_[0x2C]; // 0xB3
u32 rasterizerEnable; // 0xDF u32 rasterizerEnable; // 0xDF
u32 _pad4_[0x1A0]; // 0xE0 u32 _pad4_[0x120]; // 0xE0
std::array<type::RenderTarget, type::RenderTargetCount> renderTargets; // 0x200
std::array<type::ViewportTransform, type::ViewportCount> viewportTransforms; // 0x280 std::array<type::ViewportTransform, type::ViewportCount> viewportTransforms; // 0x280
std::array<type::Viewport, type::ViewportCount> viewports; // 0x300 std::array<type::Viewport, type::ViewportCount> viewports; // 0x300
u32 _pad5_[0x2B]; // 0x340 u32 _pad5_[0x20]; // 0x340
std::array<u32, 4> clearColorValue; // 0x360
u32 clearDepthValue; // 0x364
u32 _pad5_1_[0x6]; // 0x365
struct { struct {
type::PolygonMode front; // 0x36B type::PolygonMode front; // 0x36B
@ -89,9 +90,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
} polygonMode; } polygonMode;
u32 _pad6_[0x13]; // 0x36D u32 _pad6_[0x13]; // 0x36D
std::array<type::Scissor, type::ViewportCount> scissors; // 0x380 std::array<type::Scissor, type::ViewportCount> scissors; // 0x380
u32 _pad6_1_[0x15]; // 0x3C0 u32 _pad6_1_[0x15]; // 0x3C0
struct { struct {
@ -104,7 +103,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 rtSeparateFragData; // 0x3EB u32 rtSeparateFragData; // 0x3EB
u32 _pad8_[0x6C]; // 0x3EC u32 _pad8_[0x6C]; // 0x3EC
std::array<type::VertexAttribute, 0x20> vertexAttributeState; // 0x458 std::array<type::VertexAttribute, 0x20> vertexAttributeState; // 0x458
u32 _pad9_[0x4B]; // 0x478 u32 _pad9_[0xF]; // 0x478
type::RenderTargetControl renderTargetControl; // 0x487
u32 _pad9_1_[0x3B]; // 0x488
type::CompareOp depthTestFunc; // 0x4C3 type::CompareOp depthTestFunc; // 0x4C3
float alphaTestRef; // 0x4C4 float alphaTestRef; // 0x4C4
type::CompareOp alphaTestFunc; // 0x4C5 type::CompareOp alphaTestFunc; // 0x4C5
@ -217,8 +218,10 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
u32 pixelCentreImage; // 0x649 u32 pixelCentreImage; // 0x649
u32 _pad21_; // 0x64A u32 _pad21_; // 0x64A
u32 viewportTransformEnable; // 0x64B u32 viewportTransformEnable; // 0x64B
u32 _pad22_[0x34]; // 0x64A u32 _pad22_[0x28]; // 0x64C
std::array<type::ColorWriteMask, 8> colorMask; // 0x680 For each render target type::ClearBuffers clearBuffers; // 0x674
u32 _pad22_1_[0xB]; // 0x675
std::array<type::ColorWriteMask, type::RenderTargetCount> colorMask; // 0x680
u32 _pad23_[0x38]; // 0x688 u32 _pad23_[0x38]; // 0x688
struct { struct {
@ -228,12 +231,13 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
} semaphore; } semaphore;
u32 _pad24_[0xBC]; // 0x6C4 u32 _pad24_[0xBC]; // 0x6C4
std::array<type::Blend, 8> independentBlend; // 0x780 For each render target std::array<type::Blend, type::RenderTargetCount> independentBlend; // 0x780
u32 _pad25_[0x100]; // 0x7C0 u32 _pad25_[0x100]; // 0x7C0
u32 firmwareCall[0x20]; // 0x8C0 u32 firmwareCall[0x20]; // 0x8C0
}; };
}; };
static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32))); static_assert(sizeof(Registers) == (RegisterCount * sizeof(u32)));
static_assert(U32_OFFSET(Registers, firmwareCall) == 0x8C0);
#pragma pack(pop) #pragma pack(pop)
Registers registers{}; Registers registers{};