diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 97748fb5..e3e09a20 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -103,12 +103,12 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp - ${source_DIR}/skyline/soc/gm20b.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp + ${source_DIR}/skyline/soc/gm20b/channel.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp + ${source_DIR}/skyline/soc/gm20b/gmmu.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp ${source_DIR}/skyline/soc/gm20b/engines/maxwell/macro_interpreter.cpp - ${source_DIR}/skyline/soc/gm20b/gmmu.cpp ${source_DIR}/skyline/input/npad.cpp ${source_DIR}/skyline/input/npad_device.cpp ${source_DIR}/skyline/input/touch.cpp diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 9795a5fd..923c027a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -4,8 +4,10 @@ #pragma once #include +#include #include #include + #include "command_executor.h" namespace skyline::gpu::interconnect { @@ -18,7 +20,7 @@ namespace skyline::gpu::interconnect { class GraphicsContext { private: GPU &gpu; - soc::gm20b::GMMU &gmmu; + soc::gm20b::ChannelContext &channelCtx; gpu::interconnect::CommandExecutor &executor; struct RenderTarget { @@ -50,7 +52,7 @@ namespace skyline::gpu::interconnect { public: - GraphicsContext(GPU &gpu, soc::gm20b::GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), gmmu(gmmu), executor(executor) { + GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) { scissors.fill(DefaultScissor); } @@ -182,7 +184,7 @@ namespace skyline::gpu::interconnect { if (renderTarget.guest.mappings.empty()) { auto size{std::max(renderTarget.guest.layerStride * (renderTarget.guest.layerCount - renderTarget.guest.baseArrayLayer), renderTarget.guest.format->GetSize(renderTarget.guest.dimensions))}; - auto mappings{gmmu.TranslateRange(renderTarget.gpuAddress, size)}; + auto mappings{channelCtx.asCtx->gmmu.TranslateRange(renderTarget.gpuAddress, size)}; renderTarget.guest.mappings.assign(mappings.begin(), mappings.end()); } diff --git a/app/src/main/cpp/skyline/services/common/result.h b/app/src/main/cpp/skyline/services/common/result.h index 6e221450..54bef066 100644 --- a/app/src/main/cpp/skyline/services/common/result.h +++ b/app/src/main/cpp/skyline/services/common/result.h @@ -11,6 +11,7 @@ namespace skyline::service { NotPermitted = 1, // EPERM TryAgain = 11, // EAGAIN Busy = 16, // EBUSY + FileExists = 17, // EEXIST InvalidArgument = 22, // EINVAL InappropriateIoctlForDevice = 25, // ENOTTY FunctionNotImplemented = 38, // ENOSYS diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp index 1c2aca70..6e42601f 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.cpp @@ -5,7 +5,11 @@ #include "nvdevice.h" namespace skyline::service::nvdrv::device { - NvDevice::NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx) : state(state), core(core), ctx(ctx) {} + NvDevice::NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + state(state), + driver(driver), + core(core), + ctx(ctx) {} const std::string &NvDevice::GetName() { if (name.empty()) { diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h index 60e0c87e..1a39b8f3 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvdevice.h @@ -11,6 +11,10 @@ #include "deserialisation/types.h" +namespace skyline::service::nvdrv { + class Driver; +} + namespace skyline::service::nvdrv::device { using namespace kernel; using namespace deserialisation; @@ -24,11 +28,12 @@ namespace skyline::service::nvdrv::device { protected: const DeviceState &state; + Driver &driver; Core &core; SessionContext ctx; public: - NvDevice(const DeviceState &state, Core &core, const SessionContext &ctx); + NvDevice(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); virtual ~NvDevice() = default; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp index 3b294273..2ddd5d72 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.cpp @@ -3,7 +3,10 @@ #include #include +#include +#include #include +#include "gpu_channel.h" #include "as_gpu.h" namespace skyline { @@ -14,10 +17,31 @@ namespace skyline { namespace skyline::service::nvdrv::device::nvhost { using GMMU = soc::gm20b::GMMU; - AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + AsGpu::AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} PosixResult AsGpu::BindChannel(In channelFd) { - // TODO: support once multiple address spaces are supported + std::scoped_lock lock(mutex); + + if (!vm.initialised) + return PosixResult::InvalidArgument; + + try { + std::shared_lock gpuLock(driver.deviceMutex); + auto &gpuCh{dynamic_cast(*driver.devices.at(channelFd))}; + + std::scoped_lock channelLock(gpuCh.channelMutex); + + if (gpuCh.asCtx) { + state.logger->Warn("Attempting to bind multiple ASes to a single GPU channel"); + return PosixResult::InvalidArgument; + } + + gpuCh.asCtx = asCtx; + } catch (const std::out_of_range &e) { + state.logger->Warn("Attempting to bind AS to an invalid channel: {}", channelFd); + return PosixResult::InvalidArgument; + } + return PosixResult::Success; } @@ -53,7 +77,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 size{static_cast(pages) * pageSize}; if (flags.sparse) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), size, {true}); allocationMap[offset] = { .size = size, @@ -77,9 +101,9 @@ namespace skyline::service::nvdrv::device::nvhost { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparseAlloc) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); else - state.soc->gm20b.gmmu.Unmap(offset, mapping->size); + asCtx->gmmu.Unmap(offset, mapping->size); mappingMap.erase(offset); } @@ -103,7 +127,7 @@ namespace skyline::service::nvdrv::device::nvhost { // Unset sparse flag if required if (allocation.sparse) - state.soc->gm20b.gmmu.Unmap(offset, allocation.size); + asCtx->gmmu.Unmap(offset, allocation.size); auto &allocator{pageSize == VM::PageSize ? vm.smallPageAllocator : vm.bigPageAllocator}; u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits}; @@ -138,9 +162,9 @@ namespace skyline::service::nvdrv::device::nvhost { // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state // Only FreeSpace can unmap them fully if (mapping->sparseAlloc) - state.soc->gm20b.gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); + asCtx->gmmu.Map(offset, GMMU::SparsePlaceholderAddress(), mapping->size, {true}); else - state.soc->gm20b.gmmu.Unmap(offset, mapping->size); + asCtx->gmmu.Unmap(offset, mapping->size); mappingMap.erase(offset); } catch (const std::out_of_range &e) { @@ -172,7 +196,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 gpuAddress{offset + bufferOffset}; u8 *cpuPtr{mapping->ptr + bufferOffset}; - state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize); + asCtx->gmmu.Map(gpuAddress, cpuPtr, mappingSize); return PosixResult::Success; } catch (const std::out_of_range &e) { @@ -194,7 +218,7 @@ namespace skyline::service::nvdrv::device::nvhost { if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size) throw exception("Cannot perform a fixed mapping into an unallocated region!"); - state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + asCtx->gmmu.Map(offset, cpuPtr, size); auto mapping{std::make_shared(cpuPtr, offset, size, true, false, alloc->second.sparse)}; alloc->second.mappings.push_back(mapping); @@ -214,7 +238,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits}; offset = static_cast(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits; - state.soc->gm20b.gmmu.Map(offset, cpuPtr, size); + asCtx->gmmu.Map(offset, cpuPtr, size); auto mapping{std::make_shared(cpuPtr, offset, size, false, bigPage, false)}; mappingMap[offset] = mapping; @@ -292,6 +316,7 @@ namespace skyline::service::nvdrv::device::nvhost { u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits}; vm.bigPageAllocator = std::make_unique(startBigPages, endBigPages); + asCtx = std::make_shared(); vm.initialised = true; return PosixResult::Success; @@ -320,7 +345,7 @@ namespace skyline::service::nvdrv::device::nvhost { } if (!entry.handle) { - state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GMMU::SparsePlaceholderAddress(), size, {true}); + asCtx->gmmu.Map(virtAddr, GMMU::SparsePlaceholderAddress(), size, {true}); } else { auto h{core.nvMap.GetHandle(entry.handle)}; if (!h) @@ -328,7 +353,7 @@ namespace skyline::service::nvdrv::device::nvhost { u8 *cpuPtr{reinterpret_cast(h->address + (static_cast(entry.handleOffsetBigPages) << vm.bigPageSizeBits))}; - state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size); + asCtx->gmmu.Map(virtAddr, cpuPtr, size); } } diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h index 00529fe7..26c7ff00 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/as_gpu.h @@ -4,7 +4,7 @@ #pragma once #include - +#include #include namespace skyline::service::nvdrv::device::nvhost { @@ -65,6 +65,8 @@ namespace skyline::service::nvdrv::device::nvhost { bool initialised{}; } vm; + std::shared_ptr asCtx; + void FreeMappingLocked(u64 offset); public: @@ -95,7 +97,7 @@ namespace skyline::service::nvdrv::device::nvhost { }; static_assert(sizeof(RemapEntry) == 0x14); - AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx); + AsGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Binds this address space to a channel diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp index bb1798da..2d095534 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.cpp @@ -34,7 +34,7 @@ namespace skyline::service::nvdrv::device::nvhost { state == SyncpointEvent::State::Signalling; } - Ctrl::Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + Ctrl::Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} u32 Ctrl::FindFreeSyncpointEvent(u32 syncpointId) { u32 eventSlot{SyncpointEventCount}; //!< Holds the slot of the last populated event in the event array diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h index 51c357a7..d596571f 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl.h @@ -96,7 +96,7 @@ namespace skyline::service::nvdrv::device::nvhost { PosixResult SyncpointFreeEventLocked(In slot); public: - Ctrl(const DeviceState &state, Core &core, const SessionContext &ctx); + Ctrl(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Clears a syncpoint event diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp index f78810a1..09f93bf5 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.cpp @@ -5,8 +5,8 @@ #include "ctrl_gpu.h" namespace skyline::service::nvdrv::device::nvhost { - CtrlGpu::CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : - NvDevice(state, core, ctx), + CtrlGpu::CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + NvDevice(state, driver, core, ctx), errorNotifierEvent(std::make_shared(state, false)), unknownEvent(std::make_shared(state, false)) {} diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h index 2c202c6b..ff93b41b 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/ctrl_gpu.h @@ -73,7 +73,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 subregionCount{0x10}; }; - CtrlGpu(const DeviceState &state, Core &core, const SessionContext &ctx); + CtrlGpu(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Returns the zcull context size diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp index 29b652fe..2bebb602 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.cpp @@ -6,8 +6,8 @@ #include "gpu_channel.h" namespace skyline::service::nvdrv::device::nvhost { - GpuChannel::GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx) : - NvDevice(state, core, ctx), + GpuChannel::GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : + NvDevice(state, driver, core, ctx), smExceptionBreakpointIntReportEvent(std::make_shared(state, false)), smExceptionBreakpointPauseReportEvent(std::make_shared(state, false)), errorNotifierEvent(std::make_shared(state, false)) { @@ -39,16 +39,20 @@ namespace skyline::service::nvdrv::device::nvhost { if (flags.incrementWithValue) return PosixResult::InvalidArgument; - if (core.syncpointManager.IsFenceSignalled(fence)) + if (!core.syncpointManager.IsFenceSignalled(fence)) throw exception("Waiting on a fence through SubmitGpfifo is unimplemented"); } - state.soc->gm20b.gpfifo.Push(gpEntries.subspan(0, numEntries)); + { + std::scoped_lock lock(channelMutex); - fence.id = channelSyncpoint; + channelCtx->gpfifo.Push(gpEntries.subspan(0, numEntries)); - u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)}; - fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment); + fence.id = channelSyncpoint; + + u32 increment{(flags.fenceIncrement ? 2 : 0) + (flags.incrementWithValue ? fence.threshold : 0)}; + fence.threshold = core.syncpointManager.IncrementSyncpointMaxExt(channelSyncpoint, increment); + } if (flags.fenceIncrement) throw exception("Incrementing a fence through SubmitGpfifo is unimplemented"); @@ -84,7 +88,19 @@ namespace skyline::service::nvdrv::device::nvhost { PosixResult GpuChannel::AllocGpfifoEx2(In numEntries, In numJobs, In flags, Out fence) { state.logger->Debug("numEntries: {}, numJobs: {}, flags: 0x{:X}", numEntries, numJobs, flags); - state.soc->gm20b.gpfifo.Initialize(numEntries); + + std::scoped_lock lock(channelMutex); + if (!asCtx) { + state.logger->Warn("Trying to allocate a channel without a bound address space"); + return PosixResult::InvalidArgument; + } + + if (channelCtx) { + state.logger->Warn("Trying to allocate a channel twice!"); + return PosixResult::FileExists; + } + + channelCtx = std::make_unique(state, asCtx, numEntries); fence = core.syncpointManager.GetSyncpointFence(channelSyncpoint); diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h index b9a807f3..a8c71bad 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvhost/gpu_channel.h @@ -3,23 +3,32 @@ #pragma once -#include #include +#include // TODO: remove +#include #include "services/nvdrv/devices/nvdevice.h" namespace skyline::service::nvdrv::device::nvhost { + class AsGpu; + /** * @brief nvhost::GpuChannel is used to create and submit commands to channels which are effectively GPU processes * @url https://switchbrew.org/wiki/NV_services#Channels */ class GpuChannel : public NvDevice { private: - u32 channelSyncpoint{}; + u32 channelSyncpoint{}; //!< The syncpoint for submissions allocated to this channel in `AllocGpfifo` u32 channelUserData{}; + std::mutex channelMutex; std::shared_ptr smExceptionBreakpointIntReportEvent; std::shared_ptr smExceptionBreakpointPauseReportEvent; std::shared_ptr errorNotifierEvent; + std::shared_ptr asCtx; + std::unique_ptr channelCtx; + + friend AsGpu; + public: /** * @brief A bitfield of the flags that can be supplied for a specific GPFIFO submission @@ -37,7 +46,7 @@ namespace skyline::service::nvdrv::device::nvhost { u32 raw; }; - GpuChannel(const DeviceState &state, Core &core, const SessionContext &ctx); + GpuChannel(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Sets the nvmap handle id to be used for channel submits (does nothing for GPU channels) diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp index 7a0bfe47..274cf9ed 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.cpp @@ -6,7 +6,7 @@ #include "nvmap.h" namespace skyline::service::nvdrv::device { - NvMap::NvMap(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {} + NvMap::NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx) : NvDevice(state, driver, core, ctx) {} PosixResult NvMap::Create(In size, Out handle) { auto handleDesc{core.nvMap.CreateHandle(util::AlignUp(size, PAGE_SIZE))}; diff --git a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h index 7846e317..74fe4557 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h +++ b/app/src/main/cpp/skyline/services/nvdrv/devices/nvmap.h @@ -23,7 +23,7 @@ namespace skyline::service::nvdrv::device { IsSharedMemMapped = 6 }; - NvMap(const DeviceState &state, Core &core, const SessionContext &ctx); + NvMap(const DeviceState &state, Driver &driver, Core &core, const SessionContext &ctx); /** * @brief Creates an nvmap handle for the given size diff --git a/app/src/main/cpp/skyline/services/nvdrv/driver.cpp b/app/src/main/cpp/skyline/services/nvdrv/driver.cpp index 010c737f..0f551f44 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/driver.cpp +++ b/app/src/main/cpp/skyline/services/nvdrv/driver.cpp @@ -23,10 +23,13 @@ namespace skyline::service::nvdrv { break; \ } - #define DEVICE_CASE(path, object) \ - case util::Hash(path): \ - devices.emplace(fd, std::make_unique(state, core, ctx)); \ - return NvResult::Success; + #define DEVICE_CASE(path, object) \ + case util::Hash(path): \ + { \ + std::unique_lock lock(deviceMutex); \ + devices.emplace(fd, std::make_unique(state, *this, core, ctx)); \ + return NvResult::Success; \ + } DEVICE_SWITCH( DEVICE_CASE("/dev/nvmap", NvMap) @@ -69,13 +72,13 @@ namespace skyline::service::nvdrv { default: throw exception("Unhandled POSIX result: {}!", static_cast(result)); } - } NvResult Driver::Ioctl(u32 fd, IoctlDescriptor cmd, span buffer) { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl(cmd, buffer)); } catch (const std::out_of_range &) { throw exception("Ioctl was called with invalid file descriptor: {}", fd); @@ -86,6 +89,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl2(cmd, buffer, inlineBuffer)); } catch (const std::out_of_range &) { throw exception("Ioctl2 was called with invalid file descriptor: 0x{:X}", fd); @@ -96,6 +100,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, cmd: 0x{:X}, device: {}", fd, cmd.raw, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return ConvertResult(devices.at(fd)->Ioctl3(cmd, buffer, inlineBuffer)); } catch (const std::out_of_range &) { throw exception("Ioctl3 was called with invalid file descriptor: {}", fd); @@ -104,6 +109,7 @@ namespace skyline::service::nvdrv { void Driver::CloseDevice(u32 fd) { try { + std::unique_lock lock(deviceMutex); devices.erase(fd); } catch (const std::out_of_range &) { state.logger->Warn("Trying to close non-existent file descriptor: {}"); @@ -114,6 +120,7 @@ namespace skyline::service::nvdrv { state.logger->Debug("fd: {}, eventId: 0x{:X}, device: {}", fd, eventId, devices.at(fd)->GetName()); try { + std::shared_lock lock(deviceMutex); return devices.at(fd)->QueryEvent(eventId); } catch (const std::exception &) { throw exception("QueryEvent was called with invalid file descriptor: {}", fd); diff --git a/app/src/main/cpp/skyline/services/nvdrv/driver.h b/app/src/main/cpp/skyline/services/nvdrv/driver.h index 2e827fc3..c3d2a4bb 100644 --- a/app/src/main/cpp/skyline/services/nvdrv/driver.h +++ b/app/src/main/cpp/skyline/services/nvdrv/driver.h @@ -4,16 +4,27 @@ #pragma once #include +#include #include "types.h" -#include "devices/nvdevice.h" #include "core/core.h" +#include "devices/nvdevice.h" namespace skyline::service::nvdrv { + namespace device { + namespace nvhost { + class AsGpu; + } + } + class Driver { private: const DeviceState &state; + + std::shared_mutex deviceMutex; //!< Protects access to `devices` std::unordered_map> devices; + friend device::nvhost::AsGpu; // For channel address space binding + public: Core core; //!< The core global state object of nvdrv that is accessed by devices diff --git a/app/src/main/cpp/skyline/soc.h b/app/src/main/cpp/skyline/soc.h index 717bb321..32c13d59 100644 --- a/app/src/main/cpp/skyline/soc.h +++ b/app/src/main/cpp/skyline/soc.h @@ -4,7 +4,7 @@ #pragma once #include "soc/host1x.h" -#include "soc/gm20b.h" +#include "soc/gm20b/gpfifo.h" namespace skyline::soc { /** @@ -14,8 +14,7 @@ namespace skyline::soc { class SOC { public: host1x::Host1X host1x; - gm20b::GM20B gm20b; - SOC(const DeviceState &state) : gm20b(state) {} + SOC(const DeviceState &state) {} }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b.cpp b/app/src/main/cpp/skyline/soc/gm20b.cpp deleted file mode 100644 index 206b8e8a..00000000 --- a/app/src/main/cpp/skyline/soc/gm20b.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 -// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) - -#include "gm20b.h" - -namespace skyline::soc::gm20b { - GM20B::GM20B(const DeviceState &state) : - fermi2D(state), - keplerMemory(state), - maxwell3D(state, gmmu, executor), - maxwellCompute(state), - maxwellDma(state), - gpfifo(state), - executor(state) {} -} diff --git a/app/src/main/cpp/skyline/soc/gm20b/channel.cpp b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp new file mode 100644 index 00000000..73622af4 --- /dev/null +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.cpp @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include "engines/maxwell_3d.h" //TODO: remove +#include "channel.h" + +namespace skyline::soc::gm20b { + ChannelContext::ChannelContext(const DeviceState &state, std::shared_ptr asCtx, size_t numEntries) : + fermi2D(state), + keplerMemory(state), + maxwell3D(std::make_unique(state, *this, executor)), + maxwellCompute(state), + maxwellDma(state), + gpfifo(state, *this, numEntries), + executor(state), + asCtx(std::move(asCtx)){} +} diff --git a/app/src/main/cpp/skyline/soc/gm20b.h b/app/src/main/cpp/skyline/soc/gm20b/channel.h similarity index 58% rename from app/src/main/cpp/skyline/soc/gm20b.h rename to app/src/main/cpp/skyline/soc/gm20b/channel.h index 73058738..e6d9b4db 100644 --- a/app/src/main/cpp/skyline/soc/gm20b.h +++ b/app/src/main/cpp/skyline/soc/gm20b/channel.h @@ -4,26 +4,30 @@ #pragma once #include -#include "gm20b/engines/maxwell_3d.h" -#include "gm20b/gpfifo.h" -#include "gm20b/gmmu.h" +#include "engines/engine.h" +#include "gpfifo.h" namespace skyline::soc::gm20b { + namespace engine::maxwell3d { + class Maxwell3D; + } + + struct AddressSpaceContext; + /** * @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations * @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly */ - class GM20B { - public: - GMMU gmmu; + struct ChannelContext { + std::shared_ptr asCtx; gpu::interconnect::CommandExecutor executor; engine::Engine fermi2D; - engine::maxwell3d::Maxwell3D maxwell3D; + std::unique_ptr maxwell3D; //!< TODO: fix this once graphics context is moved into a cpp file engine::Engine maxwellCompute; engine::Engine maxwellDma; engine::Engine keplerMemory; - GPFIFO gpfifo; + ChannelGpfifo gpfifo; - GM20B(const DeviceState &state); + ChannelContext(const DeviceState &state, std::shared_ptr asCtx, size_t numEntries); }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index c869e86e..66884a15 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -3,10 +3,11 @@ // Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) #include +#include "maxwell_3d.h" #include namespace skyline::soc::gm20b::engine::maxwell3d { - Maxwell3D::Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, gmmu, executor) { + Maxwell3D::Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : Engine(state), macroInterpreter(*this), context(*state.gpu, channelCtx, executor), channelCtx(channelCtx) { ResetRegs(); } @@ -244,7 +245,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { MAXWELL3D_CASE(syncpointAction, { state.logger->Debug("Increment syncpoint: {}", static_cast(syncpointAction.id)); - state.soc->gm20b.executor.Execute(); + channelCtx.executor.Execute(); state.soc->host1x.syncpoints.at(syncpointAction.id).Increment(); }) @@ -307,7 +308,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { switch (registers.semaphore.info.structureSize) { case type::SemaphoreInfo::StructureSize::OneWord: - state.soc->gm20b.gmmu.Write(registers.semaphore.address.Pack(), static_cast(result)); + channelCtx.asCtx->gmmu.Write(registers.semaphore.address.Pack(), static_cast(result)); break; case type::SemaphoreInfo::StructureSize::FourWords: { @@ -318,7 +319,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { u64 nsTime{util::GetTimeNs()}; u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator}; - state.soc->gm20b.gmmu.Write(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); + channelCtx.asCtx->gmmu.Write(registers.semaphore.address.Pack(), FourWordResult{result, timestamp}); break; } } diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 6a43f637..01862c26 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -8,6 +8,10 @@ #include "engine.h" #include "maxwell/macro_interpreter.h" +namespace skyline::soc::gm20b { + struct ChannelContext; +} + namespace skyline::soc::gm20b::engine::maxwell3d { /** * @brief The Maxwell 3D engine handles processing 3D graphics @@ -245,9 +249,11 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Registers registers{}; Registers shadowRegisters{}; //!< A shadow-copy of the registers, their function is controlled by the 'shadowRamControl' register + ChannelContext &channelCtx; + std::array macroCode{}; //!< Stores GPU macros, writes to it will wraparound on overflow - Maxwell3D(const DeviceState &state, GMMU &gmmu, gpu::interconnect::CommandExecutor &executor); + Maxwell3D(const DeviceState &state, ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor); /** * @brief Resets the Maxwell 3D registers to their default values diff --git a/app/src/main/cpp/skyline/soc/gm20b/gmmu.h b/app/src/main/cpp/skyline/soc/gm20b/gmmu.h index 00eb02a5..9eb3cadd 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gmmu.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gmmu.h @@ -10,8 +10,12 @@ namespace skyline::soc::gm20b { /** * @brief The GMMU (Graphics Memory Management Unit) class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1 - * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment + * @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't need to emulate this abstraction * @note The GMMU is implemented entirely as a template specialization over FlatMemoryManager */ using GMMU = FlatMemoryManager; + + struct AddressSpaceContext { + GMMU gmmu; + }; } diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp index bf32f1c4..2d895f5f 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "engines/maxwell_3d.h" namespace skyline::soc::gm20b { /** @@ -58,7 +59,14 @@ namespace skyline::soc::gm20b { }; static_assert(sizeof(PushBufferMethodHeader) == sizeof(u32)); - void GPFIFO::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) { + ChannelGpfifo::ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries) : + state(state), + gpfifoEngine(state), + channelCtx(channelCtx), + gpEntries(numEntries), + thread(std::thread(&ChannelGpfifo::Run, this)) {} + + void ChannelGpfifo::Send(u32 method, u32 argument, u32 subChannel, bool lastCall) { constexpr u32 ThreeDSubChannel{0}; constexpr u32 ComputeSubChannel{1}; constexpr u32 Inline2MemorySubChannel{2}; @@ -72,19 +80,19 @@ namespace skyline::soc::gm20b { } else { switch (subChannel) { case ThreeDSubChannel: - state.soc->gm20b.maxwell3D.CallMethod(method, argument, lastCall); + channelCtx.maxwell3D->CallMethod(method, argument, lastCall); break; case ComputeSubChannel: - state.soc->gm20b.maxwellCompute.CallMethod(method, argument, lastCall); + channelCtx.maxwellCompute.CallMethod(method, argument, lastCall); break; case Inline2MemorySubChannel: - state.soc->gm20b.keplerMemory.CallMethod(method, argument, lastCall); + channelCtx.keplerMemory.CallMethod(method, argument, lastCall); break; case TwoDSubChannel: - state.soc->gm20b.fermi2D.CallMethod(method, argument, lastCall); + channelCtx.fermi2D.CallMethod(method, argument, lastCall); break; case CopySubChannel: - state.soc->gm20b.maxwellDma.CallMethod(method, argument, lastCall); + channelCtx.maxwellDma.CallMethod(method, argument, lastCall); break; default: throw exception("Tried to call into a software subchannel: {}!", subChannel); @@ -92,7 +100,7 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Process(GpEntry gpEntry) { + void ChannelGpfifo::Process(GpEntry gpEntry) { if (!gpEntry.size) { // This is a GPFIFO control entry, all control entries have a zero length and contain no pushbuffers switch (gpEntry.opcode) { @@ -105,7 +113,7 @@ namespace skyline::soc::gm20b { } pushBufferData.resize(gpEntry.size); - state.soc->gm20b.gmmu.Read(pushBufferData, gpEntry.Address()); + channelCtx.asCtx->gmmu.Read(pushBufferData, gpEntry.Address()); for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) { // An entry containing all zeroes is a NOP, skip over it @@ -142,18 +150,11 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Initialize(size_t numBuffers) { - if (pushBuffers) - throw exception("GPFIFO Initialization cannot be done multiple times"); - pushBuffers.emplace(numBuffers); - thread = std::thread(&GPFIFO::Run, this); - } - - void GPFIFO::Run() { + void ChannelGpfifo::Run() { pthread_setname_np(pthread_self(), "GPFIFO"); try { signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler); - pushBuffers->Process([this](GpEntry gpEntry) { + gpEntries.Process([this](GpEntry gpEntry) { state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address()); Process(gpEntry); }); @@ -170,11 +171,11 @@ namespace skyline::soc::gm20b { } } - void GPFIFO::Push(span entries) { - pushBuffers->Append(entries); + void ChannelGpfifo::Push(span entries) { + gpEntries.Append(entries); } - GPFIFO::~GPFIFO() { + ChannelGpfifo::~ChannelGpfifo() { if (thread.joinable()) { pthread_kill(thread.native_handle(), SIGINT); thread.join(); diff --git a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h index 318fed61..931c0e16 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h +++ b/app/src/main/cpp/skyline/soc/gm20b/gpfifo.h @@ -7,6 +7,8 @@ #include "engines/gpfifo.h" namespace skyline::soc::gm20b { + struct ChannelContext; + /** * @brief A GPFIFO entry as submitted through 'SubmitGpfifo' * @url https://nvidia.github.io/open-gpu-doc/manuals/volta/gv100/dev_pbdma.ref.txt @@ -73,15 +75,16 @@ namespace skyline::soc::gm20b { static_assert(sizeof(GpEntry) == sizeof(u64)); /** - * @brief The GPFIFO class handles creating pushbuffers from GP entries and then processing them + * @brief The ChannelGpfifo class handles creating pushbuffers from GP entries and then processing them for a single channel + * @note A single ChannelGpfifo thread exists per channel with a single shared mutex in `GPFIFO` to enforce that only one channel can run at a time * @note This class doesn't perfectly map to any particular hardware component on the X1, it does a mix of the GPU Host PBDMA (With and handling the GPFIFO entries * @url https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt#L62 */ - class GPFIFO { + class ChannelGpfifo { const DeviceState &state; + ChannelContext &channelCtx; engine::GPFIFO gpfifoEngine; //!< The engine for processing GPFIFO method calls - std::array subchannels; - std::optional> pushBuffers; + CircularQueue gpEntries; std::thread thread; //!< The thread that manages processing of pushbuffers std::vector pushBufferData; //!< Persistent vector storing pushbuffer data to avoid constant reallocations @@ -96,14 +99,12 @@ namespace skyline::soc::gm20b { void Process(GpEntry gpEntry); public: - GPFIFO(const DeviceState &state) : state(state), gpfifoEngine(state) {} - - ~GPFIFO(); - /** - * @param numBuffers The amount of push-buffers to allocate in the circular buffer + * @param numEntries The number of gpEntries to allocate space for in the FIFO */ - void Initialize(size_t numBuffers); + ChannelGpfifo(const DeviceState &state, ChannelContext &channelCtx, size_t numEntries); + + ~ChannelGpfifo(); /** * @brief Executes all pending entries in the FIFO