Implement the Kepler compute engine

This can reuse a fair bit of the now-commonised Maxwell 3D code and mostly consists of compute-specific pipeline code which was deemed not suitable for being commonised (e.g. descriptor update code is somewhat duplicated). Of note is how compute lacks any active state at all de to its use of QMDs which bundle up all state into a single object in memory.
This commit is contained in:
Billy Laws 2022-11-18 21:51:00 +00:00
parent 4bc81f007f
commit bf03f945ee
13 changed files with 602 additions and 2 deletions

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <tsl/robin_map.h>
#include "common.h" #include "common.h"
namespace skyline::gpu::interconnect { namespace skyline::gpu::interconnect {

View File

@ -0,0 +1,11 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <soc/gm20b/engines/kepler_compute/qmd.h>
#include <gpu/interconnect/common/common.h>
namespace skyline::gpu::interconnect::kepler_compute {
using QMD = skyline::soc::gm20b::engine::kepler_compute::QMD;
}

View File

@ -0,0 +1,22 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "constant_buffers.h"
namespace skyline::gpu::interconnect::kepler_compute {
void ConstantBuffers::Update(InterconnectContext &ctx, const QMD &qmd) {
for (u32 i{}; i < QMD::ConstantBufferCount; i++) {
if (qmd.constantBufferValid & (1U << i)) {
auto &buffer{cachedBuffers[i]};
const auto &qmdBuffer{qmd.constantBuffer[i]};
buffer.Update(ctx, qmdBuffer.Address(), qmdBuffer.size);
boundConstantBuffers[i] = {*buffer};
}
}
}
void ConstantBuffers::MarkAllDirty() {
for (auto &buffer : cachedBuffers)
buffer.PurgeCaches();
}
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <soc/gm20b/engines/kepler_compute/qmd.h>
#include "common.h"
namespace skyline::gpu::interconnect::kepler_compute {
using ConstantBufferSet = std::array<ConstantBuffer, QMD::ConstantBufferCount>;
/**
* @brief Abstracts out QMD constant buffer creation
*/
struct ConstantBuffers {
private:
std::array<CachedMappedBufferView, QMD::ConstantBufferCount> cachedBuffers;
public:
ConstantBufferSet boundConstantBuffers{}; //!< The currently active set of constant buffers from the QMD
void Update(InterconnectContext &ctx, const QMD &qmd);
void MarkAllDirty();
};
}

View File

@ -0,0 +1,67 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/Ryujinx/)
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu/interconnect/command_executor.h>
#include <gpu/interconnect/common/state_updater.h>
#include <soc/gm20b/channel.h>
#include "pipeline_state.h"
#include "kepler_compute.h"
namespace skyline::gpu::interconnect::kepler_compute {
KeplerCompute::KeplerCompute(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle)
: ctx{channelCtx, channelCtx.executor, gpu, nce, memoryManager},
pipelineState{manager, registerBundle.pipelineStateRegisters},
samplers{manager, registerBundle.samplerPoolRegisters},
textures{manager, registerBundle.texturePoolRegisters} {
ctx.executor.AddFlushCallback([this] {
pipelineState.PurgeCaches();
constantBuffers.MarkAllDirty();
samplers.MarkAllDirty();
textures.MarkAllDirty();
});
}
void KeplerCompute::Dispatch(const QMD &qmd) {
StateUpdateBuilder builder{*ctx.executor.allocator};
constantBuffers.Update(ctx, qmd);
samplers.Update(ctx, qmd.samplerIndex == soc::gm20b::engine::kepler_compute::QMD::SamplerIndex::ViaHeaderIndex);
auto *pipeline{pipelineState.Update(ctx, builder, textures, constantBuffers.boundConstantBuffers, qmd)};
auto *descUpdateInfo{pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures)};
builder.SetPipeline(*pipeline->compiledPipeline.pipeline, vk::PipelineBindPoint::eCompute);
if (ctx.gpu.traits.supportsPushDescriptors) {
builder.SetDescriptorSetWithPush(descUpdateInfo);
} else {
auto set{std::make_shared<DescriptorAllocator::ActiveDescriptorSet>(ctx.gpu.descriptor.AllocateSet(descUpdateInfo->descriptorSetLayout))};
builder.SetDescriptorSetWithUpdate(descUpdateInfo, set.get(), nullptr);
ctx.executor.AttachDependency(set);
}
auto stateUpdater{builder.Build()};
/**
* @brief Struct that can be linearly allocated, holding all state for the draw to avoid a dynamic allocation with lambda captures
*/
struct DrawParams {
StateUpdater stateUpdater;
std::array<u32, 3> dimensions;
};
auto *drawParams{ctx.executor.allocator->EmplaceUntracked<DrawParams>(DrawParams{stateUpdater, {qmd.ctaRasterWidth, qmd.ctaRasterHeight, qmd.ctaRasterDepth}})};
ctx.executor.AddOutsideRpCommand([drawParams](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
drawParams->stateUpdater.RecordAll(gpu, commandBuffer);
commandBuffer.dispatch(drawParams->dimensions[0], drawParams->dimensions[1], drawParams->dimensions[2]);
});
}
}

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <gpu/descriptor_allocator.h>
#include <gpu/interconnect/common/samplers.h>
#include <gpu/interconnect/common/textures.h>
#include "constant_buffers.h"
#include "pipeline_state.h"
namespace skyline::gpu::interconnect::kepler_compute {
/**
* @brief The core Kepler Compute interconnect object, directly accessed by the engine code to perform compute dispatches
*/
class KeplerCompute {
public:
/**
* @brief The full set of register state used by the GPU interconnect
*/
struct EngineRegisterBundle {
PipelineState::EngineRegisters pipelineStateRegisters;
SamplerPoolState::EngineRegisters samplerPoolRegisters;
TexturePoolState::EngineRegisters texturePoolRegisters;
};
private:
InterconnectContext ctx;
PipelineState pipelineState;
ConstantBuffers constantBuffers;
Samplers samplers;
Textures textures;
public:
KeplerCompute(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle);
/**
* @brief Performs a compute dispatch using the given QMD
*/
void Dispatch(const QMD &qmd);
};
}

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::gpu::interconnect::kepler_compute {
/**
* @brief Packed struct of pipeline state suitable for use as a map key
*/
struct PackedPipelineState {
u64 shaderHash;
std::array<u32, 3> dimensions;
u32 localMemorySize;
u32 sharedMemorySize;
u32 bindlessTextureConstantBufferSlotSelect;
bool operator==(const PackedPipelineState &) const = default;
};
}

View File

@ -0,0 +1,206 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu/texture/texture.h>
#include <gpu/interconnect/command_executor.h>
#include <gpu/interconnect/common/pipeline.inc>
#include <gpu/shader_manager.h>
#include <gpu.h>
#include "pipeline_manager.h"
namespace skyline::gpu::interconnect::kepler_compute {
static Pipeline::ShaderStage MakePipelineShader(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, const PackedPipelineState &packedState, const ShaderBinary &shaderBinary) {
ctx.gpu.shader.ResetPools();
auto program{ctx.gpu.shader.ParseComputeShader(
shaderBinary.binary, shaderBinary.baseOffset,
packedState.bindlessTextureConstantBufferSlotSelect,
packedState.localMemorySize, packedState.sharedMemorySize,
packedState.dimensions,
[&](u32 index, u32 offset) {
return constantBuffers[index].Read<int>(ctx.executor, offset);
}, [&](u32 index) {
return textures.GetTextureType(ctx, BindlessHandle{ .raw = index }.textureIndex);
})};
Shader::Backend::Bindings bindings{};
return {ctx.gpu.shader.CompileShader({}, program, bindings), program.info};
}
static Pipeline::DescriptorInfo MakePipelineDescriptorInfo(const Pipeline::ShaderStage &stage) {
Pipeline::DescriptorInfo descriptorInfo{};
u32 bindingIndex{};
auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u32 &count) {
descriptorInfo.totalWriteDescCount += descs.size();
for (u32 descIdx{}; descIdx < descs.size(); descIdx++) {
const auto &desc{descs[descIdx]};
count += desc.count;
descriptorInfo.descriptorSetLayoutBindings.push_back(vk::DescriptorSetLayoutBinding{
.binding = bindingIndex++,
.descriptorType = type,
.descriptorCount = desc.count,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
}};
pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, descriptorInfo.totalBufferDescCount);
pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, descriptorInfo.totalBufferDescCount);
pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, descriptorInfo.totalTexelBufferDescCount);
pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, descriptorInfo.totalTexelBufferDescCount);
if (descriptorInfo.totalTexelBufferDescCount > 0)
Logger::Warn("Image buffer descriptors are not supported");
pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, descriptorInfo.totalImageDescCount);
pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, descriptorInfo.totalImageDescCount);
if (stage.info.image_descriptors.size() > 0)
Logger::Warn("Image descriptors are not supported");
return descriptorInfo;
}
static Pipeline::CompiledPipeline MakeCompiledPipeline(InterconnectContext &ctx,
const PackedPipelineState &packedState,
const Pipeline::ShaderStage &shaderStage,
span<vk::DescriptorSetLayoutBinding> layoutBindings) {
vk::raii::DescriptorSetLayout descriptorSetLayout{ctx.gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
.flags = vk::DescriptorSetLayoutCreateFlags{ctx.gpu.traits.supportsPushDescriptors ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
.pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()),
}};
vk::raii::PipelineLayout pipelineLayout{ctx.gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*descriptorSetLayout,
.setLayoutCount = 1,
}};
vk::PipelineShaderStageCreateInfo shaderStageInfo{
.stage = vk::ShaderStageFlagBits::eCompute,
.module = &*shaderStage.module,
.pName = "main"
};
vk::ComputePipelineCreateInfo pipelineInfo{
.stage = shaderStageInfo,
.layout = *pipelineLayout,
};
vk::raii::Pipeline pipeline{ctx.gpu.vkDevice, nullptr, pipelineInfo};
return Pipeline::CompiledPipeline{
.pipeline = std::move(pipeline),
.pipelineLayout = std::move(pipelineLayout),
.descriptorSetLayout = std::move(descriptorSetLayout),
};
}
Pipeline::Pipeline(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, const PackedPipelineState &packedState, const ShaderBinary &shaderBinary)
: shaderStage{MakePipelineShader(ctx, textures, constantBuffers, packedState, shaderBinary)},
descriptorInfo{MakePipelineDescriptorInfo(shaderStage)},
compiledPipeline{MakeCompiledPipeline(ctx, packedState, shaderStage, descriptorInfo.descriptorSetLayoutBindings)},
sourcePackedState{packedState} {
storageBufferViews.resize(shaderStage.info.storage_buffers_descriptors.size());
}
void Pipeline::SyncCachedStorageBufferViews(u32 executionNumber) {
if (lastExecutionNumber != executionNumber) {
for (auto &view : storageBufferViews)
view.PurgeCaches();
lastExecutionNumber = executionNumber;
}
}
DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) {
SyncCachedStorageBufferViews(ctx.executor.executionNumber);
u32 writeIdx{};
auto writes{ctx.executor.allocator->AllocateUntracked<vk::WriteDescriptorSet>(descriptorInfo.totalWriteDescCount)};
u32 bufferIdx{};
auto bufferDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorBufferInfo>(descriptorInfo.totalBufferDescCount)};
auto bufferDescDynamicBindings{ctx.executor.allocator->AllocateUntracked<DynamicBufferBinding>(descriptorInfo.totalBufferDescCount)};
u32 imageIdx{};
auto imageDescs{ctx.executor.allocator->AllocateUntracked<vk::DescriptorImageInfo>(descriptorInfo.totalImageDescCount)};
u32 storageBufferIdx{};
u32 bindingIdx{};
/**
* @brief Adds descriptor writes for a single Vulkan descriptor type that uses buffer descriptors
* @param count Total number of descriptors to write, including array elements
*/
auto writeBufferDescs{[&](vk::DescriptorType type, const auto &descs, auto getBufferCb) {
if (!descs.empty()) {
// The underlying buffer bindings will be resolved from the dynamic ones during recording
for (const auto &desc : descs) {
writes[writeIdx++] = {
.dstBinding = bindingIdx++,
.descriptorCount = desc.count,
.descriptorType = type,
.pBufferInfo = &bufferDescs[bufferIdx],
};
for (u32 arrayIdx{}; arrayIdx < desc.count; arrayIdx++)
bufferDescDynamicBindings[bufferIdx++] = getBufferCb(desc, arrayIdx);
}
}
}};
auto writeImageDescs{[&](vk::DescriptorType type, const auto &descs, auto getTextureCb) {
if (!descs.empty()) {
for (const auto &desc : descs) {
writes[writeIdx++] = {
.dstBinding = bindingIdx++,
.descriptorCount = desc.count,
.descriptorType = type,
.pImageInfo = &imageDescs[imageIdx],
};
for (u32 arrayIdx{}; arrayIdx < desc.count; arrayIdx++)
imageDescs[imageIdx++] = getTextureCb(desc, arrayIdx);
}
}
}};
writeBufferDescs(vk::DescriptorType::eUniformBuffer, shaderStage.info.constant_buffer_descriptors,
[&](const Shader::ConstantBufferDescriptor &desc, size_t arrayIdx) {
size_t cbufIdx{desc.index + arrayIdx};
return GetConstantBufferBinding(ctx, shaderStage.info, constantBuffers[cbufIdx].view, cbufIdx);
});
writeBufferDescs(vk::DescriptorType::eStorageBuffer, shaderStage.info.storage_buffers_descriptors,
[&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) {
auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[desc.cbuf_index], storageBufferViews[storageBufferIdx])};
storageBufferIdx += arrayIdx ? 0 : 1;
return binding;
});
writeImageDescs(vk::DescriptorType::eCombinedImageSampler, shaderStage.info.texture_descriptors,
[&](const Shader::TextureDescriptor &desc, size_t arrayIdx) {
BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers, desc, arrayIdx)};
auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)};
return binding.first;
});
// Since we don't implement all descriptor types the number of writes might not match what's expected
if (!writeIdx)
return nullptr;
return ctx.executor.allocator->EmplaceUntracked<DescriptorUpdateInfo>(DescriptorUpdateInfo{
.writes = writes.first(writeIdx),
.bufferDescs = bufferDescs.first(bufferIdx),
.bufferDescDynamicBindings = bufferDescDynamicBindings.first(bufferIdx),
.pipelineLayout = *compiledPipeline.pipelineLayout,
.descriptorSetLayout = *compiledPipeline.descriptorSetLayout,
.bindPoint = vk::PipelineBindPoint::eCompute,
.descriptorSetIndex = 0,
});
}
}

View File

@ -0,0 +1,75 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <tsl/robin_map.h>
#include <shader_compiler/frontend/ir/program.h>
#include <gpu/interconnect/common/samplers.h>
#include <gpu/interconnect/common/textures.h>
#include "packed_pipeline_state.h"
#include "constant_buffers.h"
namespace skyline::gpu {
class TextureView;
}
namespace skyline::gpu::interconnect::kepler_compute {
class Pipeline {
public:
struct ShaderStage {
vk::ShaderModule module;
Shader::Info info;
};
struct DescriptorInfo {
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBindings;
u32 totalWriteDescCount;
u32 totalBufferDescCount;
u32 totalTexelBufferDescCount;
u32 totalImageDescCount;
};
struct CompiledPipeline {
vk::raii::DescriptorSetLayout descriptorSetLayout;
vk::raii::PipelineLayout pipelineLayout;
vk::raii::Pipeline pipeline;
};
private:
ShaderStage shaderStage;
DescriptorInfo descriptorInfo;
std::vector<CachedMappedBufferView> storageBufferViews;
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
void SyncCachedStorageBufferViews(u32 executionNumber);
public:
CompiledPipeline compiledPipeline;
PackedPipelineState sourcePackedState;
Pipeline(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, const PackedPipelineState &packedState, const ShaderBinary &shaderBinary);
/**
* @brief Creates a descriptor set update from the current GPU state
*/
DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures);
};
class PipelineManager {
private:
tsl::robin_map<PackedPipelineState, std::unique_ptr<Pipeline>, util::ObjectHash<PackedPipelineState>> map;
public:
Pipeline *FindOrCreate(InterconnectContext &ctx, Textures &textures, ConstantBufferSet &constantBuffers, const PackedPipelineState &packedState, const ShaderBinary &shaderBinary) {
auto it{map.find(packedState)};
if (it != map.end())
return it->second.get();
return map.emplace(packedState, std::make_unique<Pipeline>(ctx, textures, constantBuffers, packedState, shaderBinary)).first->second.get();
}
};
}

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include "pipeline_state.h"
namespace skyline::gpu::interconnect::kepler_compute {
/* Pipeline Stage */
void PipelineStageState::EngineRegisters::DirtyBind(DirtyManager &manager, dirty::Handle handle) const {
manager.Bind(handle, programRegion);
}
PipelineStageState::PipelineStageState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine)
: engine{manager, dirtyHandle, engine} {}
void PipelineStageState::Flush(InterconnectContext &ctx, u32 programOffset) {
binary = cache.Lookup(ctx, engine->programRegion, programOffset);
}
bool PipelineStageState::Refresh(InterconnectContext &ctx, u32 programOffset) {
return cache.Refresh(ctx, engine->programRegion, programOffset);
}
void PipelineStageState::PurgeCaches() {
cache.PurgeCaches();
}
/* Pipeline State */
PipelineState::PipelineState(DirtyManager &manager, const EngineRegisters &engine)
: pipelineStage{manager, engine.pipelineStageRegisters},
bindlessTexture{engine.bindlessTexture} {}
Pipeline *PipelineState::Update(InterconnectContext &ctx, StateUpdateBuilder &builder, Textures &textures, ConstantBufferSet &constantBuffers, const QMD &qmd) {
const auto &stage{pipelineStage.UpdateGet(ctx, qmd.programOffset)};
packedState.shaderHash = stage.binary.hash;
packedState.dimensions = {qmd.ctaThreadDimension0, qmd.ctaThreadDimension1, qmd.ctaThreadDimension2};
packedState.localMemorySize = qmd.shaderLocalMemoryLowSize + qmd.shaderLocalMemoryHighSize;
packedState.sharedMemorySize = qmd.sharedMemorySize;
packedState.bindlessTextureConstantBufferSlotSelect = bindlessTexture.constantBufferSlotSelect;
return pipelineManager.FindOrCreate(ctx, textures, constantBuffers, packedState, stage.binary);
}
void PipelineState::PurgeCaches() {
pipelineStage.MarkDirty(true);
}
}

View File

@ -0,0 +1,61 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#include <gpu/texture/texture.h>
#include <gpu/interconnect/common/shader_cache.h>
#include "common.h"
#include "packed_pipeline_state.h"
#include "pipeline_manager.h"
namespace skyline::gpu::interconnect::kepler_compute {
class PipelineStageState : dirty::RefreshableManualDirty, dirty::CachedManualDirty {
public:
struct EngineRegisters {
const soc::gm20b::engine::Address &programRegion;
void DirtyBind(DirtyManager &manager, dirty::Handle handle) const;
};
private:
dirty::BoundSubresource<EngineRegisters> engine;
ShaderCache cache;
public:
ShaderBinary binary;
PipelineStageState(dirty::Handle dirtyHandle, DirtyManager &manager, const EngineRegisters &engine);
void Flush(InterconnectContext &ctx, u32 programOffset);
bool Refresh(InterconnectContext &ctx, u32 programOffset);
void PurgeCaches();
};
class PipelineState {
public:
struct EngineRegisters {
PipelineStageState::EngineRegisters pipelineStageRegisters;
const engine_common::BindlessTexture &bindlessTexture;
};
private:
dirty::ManualDirtyState<PipelineStageState> pipelineStage;
const engine_common::BindlessTexture &bindlessTexture;
PackedPipelineState packedState{};
PipelineManager pipelineManager;
public:
PipelineState(DirtyManager &manager, const EngineRegisters &engine);
Pipeline *Update(InterconnectContext &ctx, StateUpdateBuilder &builder, Textures &textures, ConstantBufferSet &constantBuffers, const QMD &qmd);
void PurgeCaches();
};
}

View File

@ -7,8 +7,20 @@
#include "kepler_compute.h" #include "kepler_compute.h"
namespace skyline::soc::gm20b::engine { namespace skyline::soc::gm20b::engine {
static gpu::interconnect::kepler_compute::KeplerCompute::EngineRegisterBundle MakeEngineRegisters(const KeplerCompute::Registers &registers) {
return {
.pipelineStateRegisters = {*registers.programRegion, *registers.bindlessTexture},
.samplerPoolRegisters = {*registers.texSamplerPool, *registers.texHeaderPool},
.texturePoolRegisters = {*registers.texHeaderPool}
};
}
KeplerCompute::KeplerCompute(const DeviceState &state, ChannelContext &channelCtx) KeplerCompute::KeplerCompute(const DeviceState &state, ChannelContext &channelCtx)
: syncpoints{state.soc->host1x.syncpoints}, i2m{state, channelCtx} {} : syncpoints{state.soc->host1x.syncpoints},
channelCtx{channelCtx},
i2m{state, channelCtx},
dirtyManager{registers},
interconnect{*state.gpu, channelCtx, *state.nce, state.process->memory, dirtyManager, MakeEngineRegisters(registers)} {}
__attribute__((always_inline)) void KeplerCompute::CallMethod(u32 method, u32 argument) { __attribute__((always_inline)) void KeplerCompute::CallMethod(u32 method, u32 argument) {
Logger::Verbose("Called method in Kepler compute: 0x{:X} args: 0x{:X}", method, argument); Logger::Verbose("Called method in Kepler compute: 0x{:X} args: 0x{:X}", method, argument);
@ -27,7 +39,7 @@ namespace skyline::soc::gm20b::engine {
i2m.LoadInlineData(*registers.i2m, argument); i2m.LoadInlineData(*registers.i2m, argument);
}) })
ENGINE_CASE(sendSignalingPcasB, { ENGINE_CASE(sendSignalingPcasB, {
Logger::Warn("Attempted to execute compute kernel!"); interconnect.Dispatch(channelCtx.asCtx->gmmu.Read<kepler_compute::QMD>(registers.sendPcas->QmdAddress()));
}) })
ENGINE_STRUCT_CASE(reportSemaphore, action, { ENGINE_STRUCT_CASE(reportSemaphore, action, {
throw exception("Compute semaphores are unimplemented!"); throw exception("Compute semaphores are unimplemented!");

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <gpu/interconnect/kepler_compute/kepler_compute.h>
#include "engine.h" #include "engine.h"
#include "inline2memory.h" #include "inline2memory.h"
@ -18,7 +19,10 @@ namespace skyline::soc::gm20b::engine {
class KeplerCompute { class KeplerCompute {
private: private:
host1x::SyncpointSet &syncpoints; host1x::SyncpointSet &syncpoints;
ChannelContext &channelCtx;
Inline2MemoryBackend i2m; Inline2MemoryBackend i2m;
gpu::interconnect::DirtyManager dirtyManager;
gpu::interconnect::kepler_compute::KeplerCompute interconnect;
void HandleMethod(u32 method, u32 argument); void HandleMethod(u32 method, u32 argument);