Implement a block based GPU virtual memory manager

The GPU has it's own seperate address space to the CPU. It is able to
address 40 bit addresses and accesses the system memory. A sorted vector
has been used to store blocks as insertions are not very frequent.
This commit is contained in:
Billy Laws 2020-07-14 15:15:28 +01:00 committed by ◱ PixelyIon
parent 80e7b82bad
commit b23779bda1
13 changed files with 402 additions and 20 deletions

View File

@ -39,6 +39,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/audio/resampler.cpp ${source_DIR}/skyline/audio/resampler.cpp
${source_DIR}/skyline/audio/adpcm_decoder.cpp ${source_DIR}/skyline/audio/adpcm_decoder.cpp
${source_DIR}/skyline/gpu.cpp ${source_DIR}/skyline/gpu.cpp
${source_DIR}/skyline/gpu/memory_manager.cpp
${source_DIR}/skyline/gpu/texture.cpp ${source_DIR}/skyline/gpu/texture.cpp
${source_DIR}/skyline/os.cpp ${source_DIR}/skyline/os.cpp
${source_DIR}/skyline/loader/loader.cpp ${source_DIR}/skyline/loader/loader.cpp

View File

@ -9,6 +9,7 @@
#include <kernel/types/KEvent.h> #include <kernel/types/KEvent.h>
#include <services/nvdrv/devices/nvmap.h> #include <services/nvdrv/devices/nvmap.h>
#include "gpu/texture.h" #include "gpu/texture.h"
#include "gpu/memory_manager.h"
namespace skyline::gpu { namespace skyline::gpu {
/** /**
@ -28,6 +29,7 @@ namespace skyline::gpu {
i32 format{}; //!< The format of the display window i32 format{}; //!< The format of the display window
std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< This KEvent is triggered every time a frame is drawn
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< This KEvent is triggered every time a buffer is freed
vmm::MemoryManager memoryManager; //!< The GPU Virtual Memory Manager
/** /**
* @param window The ANativeWindow to render to * @param window The ANativeWindow to render to

View File

@ -0,0 +1,122 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "memory_manager.h"
namespace skyline::gpu::vmm {
MemoryManager::MemoryManager() {
constexpr u64 GpuAddressSpaceSize = 1ul << 40; //!< The size of the GPU address space
constexpr u64 GpuAddressSpaceBase = 0x100000; //!< The base of the GPU address space - must be non-zero
// Create the initial chunk that will be split to create new chunks
ChunkDescriptor baseChunk(GpuAddressSpaceBase, GpuAddressSpaceSize, 0, ChunkState::Unmapped);
chunkList.push_back(baseChunk);
}
std::optional<ChunkDescriptor> MemoryManager::FindChunk(u64 size, ChunkState state) {
auto chunk = std::find_if(chunkList.begin(), chunkList.end(), [size, state] (const ChunkDescriptor& chunk) -> bool {
return chunk.size > size && chunk.state == state;
});
if (chunk != chunkList.end())
return *chunk;
return std::nullopt;
}
u64 MemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
auto chunkEnd = chunkList.end();
for (auto chunk = chunkList.begin(); chunk != chunkEnd; chunk++) {
if (chunk->CanContain(newChunk)) {
auto oldChunk = *chunk;
u64 newSize = newChunk.address - chunk->address;
u64 extension = chunk->size - newSize - newChunk.size;
if (newSize == 0) {
*chunk = newChunk;
} else {
chunk->size = newSize;
chunk = chunkList.insert(std::next(chunk), newChunk);
}
if (extension)
chunkList.insert(std::next(chunk), ChunkDescriptor(newChunk.address + newChunk.size, extension, oldChunk.cpuAddress + oldChunk.size + newChunk.size, oldChunk.state));
return newChunk.address;
} else if (chunk->address + chunk->size >= newChunk.address) {
chunk->size = (newChunk.address - chunk->address);
// Deletes all chunks that are within the chunk being inserted and split the final one
auto tailChunk = std::next(chunk);
while (tailChunk != chunkEnd) {
if (tailChunk->address + tailChunk->size >= newChunk.address + newChunk.size)
break;
tailChunk = chunkList.erase(tailChunk);
chunkEnd = chunkList.end();
}
// The given chunk is too large to fit into existing chunks
if (tailChunk == chunkEnd)
break;
u64 chunkSliceOffset = newChunk.address + newChunk.size - tailChunk->address;
tailChunk->address += chunkSliceOffset;
tailChunk->size -= chunkSliceOffset;
if (tailChunk->state == ChunkState::Mapped)
tailChunk->cpuAddress += chunkSliceOffset;
return newChunk.address;
}
}
throw exception("Failed to insert chunk into GPU address space!");
}
u64 MemoryManager::AllocateSpace(u64 size) {
size = util::AlignUp(size, constant::GpuPageSize);
auto newChunk = FindChunk(size, ChunkState::Unmapped);
if (!newChunk.has_value())
return 0;
auto chunk = newChunk.value();
chunk.size = size;
chunk.state = ChunkState::Allocated;
return InsertChunk(chunk);
}
u64 MemoryManager::AllocateFixed(u64 address, u64 size) {
if ((address & (constant::GpuPageSize - 1)) != 0)
return 0;
size = util::AlignUp(size, constant::GpuPageSize);
return InsertChunk(ChunkDescriptor(address, size, 0, ChunkState::Allocated));
}
u64 MemoryManager::MapAllocated(u64 address, u64 size) {
size = util::AlignUp(size, constant::GpuPageSize);
auto mappedChunk = FindChunk(size, ChunkState::Allocated);
if (!mappedChunk.has_value())
return 0;
auto chunk = mappedChunk.value();
chunk.cpuAddress = address;
chunk.size = size;
chunk.state = ChunkState::Mapped;
return InsertChunk(chunk);
}
u64 MemoryManager::MapFixed(u64 address, u64 cpuAddress, u64 size) {
if ((address & (constant::GpuPageSize - 1)) != 0)
return 0;
size = util::AlignUp(size, constant::GpuPageSize);
return InsertChunk(ChunkDescriptor(address, size, cpuAddress, ChunkState::Mapped));
}
}

View File

@ -0,0 +1,101 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline {
namespace constant {
constexpr u64 GpuPageSize = 1 << 16; //!< The page size of the GPU address space
}
namespace gpu::vmm {
/**
* @brief This enumerates the possible states of a memory chunk
*/
enum ChunkState {
Unmapped, //!< The chunk is unmapped
Allocated, //!< The chunk is allocated but unmapped
Mapped //!< The chunk is mapped and a CPU side address is present
};
/**
* @brief This describes a chunk of memory and all of it's individual attributes
*/
struct ChunkDescriptor {
u64 address; //!< The address of the chunk in the GPU address space
u64 size; //!< The size of the chunk in bytes
u64 cpuAddress; //!< The address of the chunk in the CPU address space (if mapped)
ChunkState state; //!< The state of the chunk
ChunkDescriptor(u64 address, u64 size, u64 cpuAddress, ChunkState state) : address(address), size(size), cpuAddress(cpuAddress), state(state) {}
/**
* @param chunk The chunk to check
* @return If the given chunk can be contained wholly within this chunk
*/
inline bool CanContain(const ChunkDescriptor& chunk) {
return (chunk.address >= this->address) && ((this->size + this->address) >= (chunk.size + chunk.address));
}
};
/**
* @brief The MemoryManager class handles the mapping of the GPU address space
*/
class MemoryManager {
private:
std::vector<ChunkDescriptor> chunkList; //!< This vector holds all the chunk descriptors
/**
* @brief This finds a chunk of the specified type in the GPU address space that is larger than the given size
* @param size The minimum size of the chunk to find
* @param state The state desired state of the chunk to find
* @return The first unmapped chunk in the GPU address space that fits the requested size
*/
std::optional<ChunkDescriptor> FindChunk(u64 size, ChunkState state);
/**
* @brief This inserts a chunk into the chunk list, resizing and splitting as necessary
* @param newChunk The chunk to insert
* @return The base virtual GPU address of the inserted chunk
*/
u64 InsertChunk(const ChunkDescriptor &newChunk);
public:
MemoryManager();
/**
* @brief This reserves a region of the GPU address space so it can be automatically used when mapping
* @param size The size of the region to reserve
* @return The virtual GPU base address of the region base
*/
u64 AllocateSpace(u64 size);
/**
* @brief This reserves a fixed region of the GPU address space so it can be automatically used when mapping
* @param address The virtual base address of the region to allocate
* @param size The size of the region to allocate
* @return The virtual address of the region base
*/
u64 AllocateFixed(u64 address, u64 size);
/**
* @brief This maps a physical CPU memory region to an automatically chosen virtual memory region
* @param address The physical CPU address of the region to be mapped into the GPU's address space
* @param size The size of the region to map
* @return The virtual address of the region base
*/
u64 MapAllocated(u64 address, u64 size);
/**
* @brief This maps a physical CPU memory region to a fixed virtual memory region
* @param address The target virtual address of the region
* @param cpuAddress The physical CPU address of the region to be mapped into the GPU's address space
* @param size The size of the region to map
* @return The virtual address of the region base
*/
u64 MapFixed(u64 address, u64 cpuAddress, u64 size);
};
}
}

View File

@ -77,6 +77,10 @@ namespace skyline::service::nvdrv {
auto cmd = request.Pop<u32>(); auto cmd = request.Pop<u32>();
state.logger->Debug("IOCTL on device: 0x{:X}, cmd: 0x{:X}", fd, cmd); state.logger->Debug("IOCTL on device: 0x{:X}, cmd: 0x{:X}", fd, cmd);
// Strip the permissions from the command leaving only the ID
cmd &= 0xffff;
try { try {
if (request.inputBuf.empty() || request.outputBuf.empty()) { if (request.inputBuf.empty() || request.outputBuf.empty()) {
if (request.inputBuf.empty()) { if (request.inputBuf.empty()) {

View File

@ -1,8 +1,110 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <os.h>
#include <kernel/types/KProcess.h>
#include <services/nvdrv/INvDrvServices.h>
#include "nvmap.h"
#include "nvhost_as_gpu.h" #include "nvhost_as_gpu.h"
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvHostAsGpu::NvHostAsGpu(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_as_gpu, {}) {} NvHostAsGpu::NvHostAsGpu(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_as_gpu, {
{0x4109, NFUNC(NvHostAsGpu::InitializeEx)},
{0x4108, NFUNC(NvHostAsGpu::GetVaRegions)},
{0x4102, NFUNC(NvHostAsGpu::AllocSpace)},
{0x4106, NFUNC(NvHostAsGpu::Modify)},
{0x4101, NFUNC(NvHostAsGpu::BindChannel)},
{0x4114, NFUNC(NvHostAsGpu::BindChannel)}
}) {}
void NvHostAsGpu::InitializeEx(IoctlData &buffer) {
struct Data {
u32 bigPageSize;
i32 asFd;
u32 flags;
u32 reserved;
u64 vaRangeStart;
u64 vaRangeEnd;
u64 vaRangeSplit;
} addressSpace = state.process->GetObject<Data>(buffer.input.at(0).address);
}
void NvHostAsGpu::GetVaRegions(IoctlData &buffer) {
struct Data {
u64 _pad0_;
u32 bufferSize;
u32 _pad1_;
struct {
u64 offset;
u32 page_size;
u32 pad;
u64 pages;
} regions[2];
} regionInfo = state.process->GetReference<Data>(buffer.input.at(0).address);
state.process->WriteMemory(regionInfo, buffer.output.at(0).address);
}
void NvHostAsGpu::BindChannel(IoctlData &buffer) {
struct Data {
u32 fd;
} channelInfo = state.process->GetReference<Data>(buffer.input.at(0).address);
}
void NvHostAsGpu::AllocSpace(IoctlData &buffer) {
struct Data {
u32 pages;
u32 pageSize;
u32 flags;
u32 _pad_;
u64 offset;
} region = state.process->GetObject<Data>(buffer.input.at(0).address);
u64 size = static_cast<u64>(region.pages) * static_cast<u64>(region.pageSize);
if (region.flags & 1)
region.offset = state.gpu->memoryManager.AllocateFixed(region.offset, size);
else
region.offset = state.gpu->memoryManager.AllocateSpace(size);
if (region.offset == 0) {
state.logger->Warn("Failed to allocate GPU address space region!");
buffer.status = NvStatus::BadParameter;
}
state.process->WriteMemory(region, buffer.output.at(0).address);
}
void NvHostAsGpu::Modify(IoctlData &buffer) {
struct Data {
u32 flags;
u32 kind;
u32 nvmapHandle;
u32 pageSize;
u64 bufferOffset;
u64 mappingSize;
u64 offset;
} region = state.process->GetObject<Data>(buffer.input.at(0).address);
if (!region.nvmapHandle)
return;
auto nvmap = state.os->serviceManager.GetService<nvdrv::INvDrvServices>(Service::nvdrv_INvDrvServices)->GetDevice<nvdrv::device::NvMap>(nvdrv::device::NvDeviceType::nvmap)->handleTable.at(region.nvmapHandle);
u64 mapPhysicalAddress = region.bufferOffset + nvmap->address;
u64 mapSize = region.mappingSize ? region.mappingSize : nvmap->size;
if (region.flags & 1)
region.offset = state.gpu->memoryManager.MapFixed(region.offset, mapPhysicalAddress, mapSize);
else
region.offset = state.gpu->memoryManager.MapAllocated(mapPhysicalAddress, mapSize);
if (region.offset == 0) {
state.logger->Warn("Failed to map GPU address space region!");
buffer.status = NvStatus::BadParameter;
}
state.process->WriteMemory(region, buffer.output.at(0).address);
}
} }

View File

@ -12,5 +12,30 @@ namespace skyline::service::nvdrv::device {
class NvHostAsGpu : public NvDevice { class NvHostAsGpu : public NvDevice {
public: public:
NvHostAsGpu(const DeviceState &state); NvHostAsGpu(const DeviceState &state);
/**
* @brief This initializes the application's GPU address space (https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_INITIALIZE_EX)
*/
void InitializeEx(IoctlData &buffer);
/**
* @brief This returns the application's GPU address space (https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_GET_VA_REGIONS)
*/
void GetVaRegions(IoctlData &buffer);
/**
* @brief This reserves a region in the GPU address space (https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_SPACE)
*/
void AllocSpace(IoctlData &buffer);
/**
* @brief This maps a region in the GPU address space (https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MODIFY)
*/
void Modify(IoctlData &buffer);
/**
* @brief This binds a channel to the address space (https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_BIND_CHANNEL)
*/
void BindChannel(IoctlData &buffer);
}; };
} }

View File

@ -6,17 +6,23 @@
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvHostChannel::NvHostChannel(const DeviceState &state, NvDeviceType type) : NvDevice(state, type, { NvHostChannel::NvHostChannel(const DeviceState &state, NvDeviceType type) : NvDevice(state, type, {
{0x40044801, NFUNC(NvHostChannel::SetNvmapFd)}, {0x4801, NFUNC(NvHostChannel::SetNvmapFd)},
{0xC0104809, NFUNC(NvHostChannel::AllocObjCtx)}, {0x4803, NFUNC(NvHostChannel::SetSubmitTimeout)},
{0xC010480B, NFUNC(NvHostChannel::ZcullBind)}, {0x4808, NFUNC(NvHostChannel::SubmitGpFifo)},
{0xC018480C, NFUNC(NvHostChannel::SetErrorNotifier)}, {0x4809, NFUNC(NvHostChannel::AllocObjCtx)},
{0x4004480D, NFUNC(NvHostChannel::SetPriority)}, {0x480B, NFUNC(NvHostChannel::ZcullBind)},
{0xC020481A, NFUNC(NvHostChannel::AllocGpfifoEx2)}, {0x480C, NFUNC(NvHostChannel::SetErrorNotifier)},
{0x40084714, NFUNC(NvHostChannel::SetUserData)} {0x480D, NFUNC(NvHostChannel::SetPriority)},
{0x481A, NFUNC(NvHostChannel::AllocGpfifoEx2)},
{0x4714, NFUNC(NvHostChannel::SetUserData)},
}) {} }) {}
void NvHostChannel::SetNvmapFd(IoctlData &buffer) {} void NvHostChannel::SetNvmapFd(IoctlData &buffer) {}
void NvHostChannel::SetSubmitTimeout(IoctlData &buffer) {}
void NvHostChannel::SubmitGpFifo(IoctlData &buffer) {}
void NvHostChannel::AllocObjCtx(IoctlData &buffer) {} void NvHostChannel::AllocObjCtx(IoctlData &buffer) {}
void NvHostChannel::ZcullBind(IoctlData &buffer) {} void NvHostChannel::ZcullBind(IoctlData &buffer) {}

View File

@ -27,6 +27,16 @@ namespace skyline::service::nvdrv::device {
*/ */
void SetNvmapFd(IoctlData &buffer); void SetNvmapFd(IoctlData &buffer);
/**
* @brief This sets the timeout for the channel (https://switchbrew.org/wiki/NV_services#NVHOST_IOCTL_CHANNEL_SET_SUBMIT_TIMEOUT)
*/
void SetSubmitTimeout(IoctlData &buffer);
/**
* @brief This submits a command to the GPFIFO (https://switchbrew.org/wiki/NV_services#NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO)
*/
void SubmitGpFifo(IoctlData &buffer);
/** /**
* @brief This allocates a graphic context object (https://switchbrew.org/wiki/NV_services#NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX) * @brief This allocates a graphic context object (https://switchbrew.org/wiki/NV_services#NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX)
*/ */

View File

@ -4,5 +4,9 @@
#include "nvhost_ctrl.h" #include "nvhost_ctrl.h"
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_ctrl, {}) {} NvHostCtrl::NvHostCtrl(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_ctrl, {
{0x001F, NFUNC(NvHostCtrl::EventRegister)},
}) {}
void NvHostCtrl::EventRegister(IoctlData &buffer) {}
} }

View File

@ -12,5 +12,10 @@ namespace skyline::service::nvdrv::device {
class NvHostCtrl : public NvDevice { class NvHostCtrl : public NvDevice {
public: public:
NvHostCtrl(const DeviceState &state); NvHostCtrl(const DeviceState &state);
/**
* @brief This registers a GPU event (https://switchbrew.org/wiki/NV_services#NVHOST_IOCTL_CTRL_EVENT_REGISTER)
*/
void EventRegister(IoctlData &buffer);
}; };
} }

View File

@ -6,11 +6,11 @@
namespace skyline::service::nvdrv::device { namespace skyline::service::nvdrv::device {
NvHostCtrlGpu::NvHostCtrlGpu(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_ctrl_gpu, { NvHostCtrlGpu::NvHostCtrlGpu(const DeviceState &state) : NvDevice(state, NvDeviceType::nvhost_ctrl_gpu, {
{0x80044701, NFUNC(NvHostCtrlGpu::ZCullGetCtxSize)}, {0x4701, NFUNC(NvHostCtrlGpu::ZCullGetCtxSize)},
{0x80284702, NFUNC(NvHostCtrlGpu::ZCullGetInfo)}, {0x4702, NFUNC(NvHostCtrlGpu::ZCullGetInfo)},
{0xC0184706, NFUNC(NvHostCtrlGpu::GetTpcMasks)}, {0x4706, NFUNC(NvHostCtrlGpu::GetTpcMasks)},
{0xC0B04705, NFUNC(NvHostCtrlGpu::GetCharacteristics)}, {0x4705, NFUNC(NvHostCtrlGpu::GetCharacteristics)},
{0x80084714, NFUNC(NvHostCtrlGpu::GetActiveSlotMask)} {0x4714, NFUNC(NvHostCtrlGpu::GetActiveSlotMask)}
}) {} }) {}
void NvHostCtrlGpu::ZCullGetCtxSize(IoctlData &buffer) { void NvHostCtrlGpu::ZCullGetCtxSize(IoctlData &buffer) {

View File

@ -8,12 +8,12 @@ namespace skyline::service::nvdrv::device {
NvMap::NvMapObject::NvMapObject(u32 id, u32 size) : id(id), size(size) {} NvMap::NvMapObject::NvMapObject(u32 id, u32 size) : id(id), size(size) {}
NvMap::NvMap(const DeviceState &state) : NvDevice(state, NvDeviceType::nvmap, { NvMap::NvMap(const DeviceState &state) : NvDevice(state, NvDeviceType::nvmap, {
{0xC0080101, NFUNC(NvMap::Create)}, {0x0101, NFUNC(NvMap::Create)},
{0xC0080103, NFUNC(NvMap::FromId)}, {0x0103, NFUNC(NvMap::FromId)},
{0xC0200104, NFUNC(NvMap::Alloc)}, {0x0104, NFUNC(NvMap::Alloc)},
{0xC0180105, NFUNC(NvMap::Free)}, {0x0105, NFUNC(NvMap::Free)},
{0xC00C0109, NFUNC(NvMap::Param)}, {0x0109, NFUNC(NvMap::Param)},
{0xC008010E, NFUNC(NvMap::GetId)} {0x010E, NFUNC(NvMap::GetId)}
}) {} }) {}
void NvMap::Create(IoctlData &buffer) { void NvMap::Create(IoctlData &buffer) {