mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-23 10:01:50 +01:00
NEEDS CLEANUP: Reimplement GPU VMM and rewrite nvdrv VM impl
This commit is contained in:
parent
b9098ac14a
commit
d094cc142d
@ -96,7 +96,7 @@ add_library(skyline SHARED
|
||||
${source_DIR}/skyline/gpu/command_scheduler.cpp
|
||||
${source_DIR}/skyline/gpu/texture/texture.cpp
|
||||
${source_DIR}/skyline/gpu/presentation_engine.cpp
|
||||
${source_DIR}/skyline/soc/gmmu.cpp
|
||||
${source_DIR}/skyline/soc/gm20b.cpp
|
||||
${source_DIR}/skyline/soc/host1x/syncpoint.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
|
||||
${source_DIR}/skyline/soc/gm20b/engines/maxwell_3d.cpp
|
||||
|
155
app/src/main/cpp/skyline/common/address_space.h
Normal file
155
app/src/main/cpp/skyline/common/address_space.h
Normal file
@ -0,0 +1,155 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <concepts>
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline {
|
||||
template<typename VaType, size_t AddressSpaceBits>
|
||||
concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
|
||||
|
||||
/**
|
||||
* @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
|
||||
*/
|
||||
template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatAddressSpaceMap {
|
||||
private:
|
||||
/**
|
||||
* @brief Represents a block of memory in the AS
|
||||
*/
|
||||
struct Block {
|
||||
VaType virt{UnmappedVa}; //!< VA of the block
|
||||
PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block is encountered
|
||||
bool flag{}; //!< General purpose flag for use by derived classes
|
||||
|
||||
Block() = default;
|
||||
|
||||
Block(VaType virt, PaType phys, bool flag) : virt(virt), phys(phys), flag(flag) {}
|
||||
|
||||
constexpr bool Valid() {
|
||||
return virt != UnmappedVa;
|
||||
}
|
||||
|
||||
constexpr bool Mapped() {
|
||||
return phys != UnmappedPa;
|
||||
}
|
||||
|
||||
constexpr bool Unmapped() {
|
||||
return phys == UnmappedPa;
|
||||
}
|
||||
|
||||
bool operator<(const VaType &pVirt) const {
|
||||
return virt < pVirt;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
std::mutex blockMutex;
|
||||
std::vector<Block> blocks{Block{}};
|
||||
|
||||
/**
|
||||
* @brief Maps a PA range into the given AS region, optionally setting the flag
|
||||
* @note blockMutex MUST be locked when calling this
|
||||
*/
|
||||
void MapLocked(VaType virt, PaType phys, VaType size, bool flag = {});
|
||||
|
||||
/**
|
||||
* @brief Unmaps the given range and merges it with other unmapped regions
|
||||
* @note blockMutex MUST be locked when calling this
|
||||
*/
|
||||
void UnmapLocked(VaType virt, VaType size);
|
||||
|
||||
public:
|
||||
static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + ((1ULL << (AddressSpaceBits - 1)) - 1)}; //!< The maximum VA that this AS can technically reach
|
||||
|
||||
VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
|
||||
|
||||
FlatAddressSpaceMap(VaType pVaLimit);
|
||||
|
||||
FlatAddressSpaceMap() = default;
|
||||
|
||||
/**
|
||||
* @brief Locked version of MapLocked
|
||||
*/
|
||||
void Map(VaType virt, PaType phys, VaType size, bool flag = {});
|
||||
|
||||
/**
|
||||
* @brief Locked version of UnmapLocked
|
||||
*/
|
||||
void Unmap(VaType virt, VaType size);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to focus on pointers as PAs, adding read/write functions
|
||||
*/
|
||||
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatMemoryManager : public FlatAddressSpaceMap<VaType, UnmappedVa, u8 *, nullptr, true, AddressSpaceBits> {
|
||||
public:
|
||||
/**
|
||||
* @return A placeholder address for sparse mapped regions, this means nothing
|
||||
*/
|
||||
static u8 *SparsePlaceholderAddress() {
|
||||
return reinterpret_cast<u8 *>(0xCAFEBABE);
|
||||
}
|
||||
|
||||
void Read(u8 *destination, VaType virt, VaType size);
|
||||
|
||||
template<typename T>
|
||||
void Read(span <T> destination, VaType virt) {
|
||||
Read(reinterpret_cast<u8 *>(destination.data()), virt, destination.size_bytes());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T Read(VaType virt) {
|
||||
T obj;
|
||||
Read(reinterpret_cast<u8 *>(&obj), virt, sizeof(T));
|
||||
return obj;
|
||||
}
|
||||
|
||||
void Write(VaType virt, u8 *source, VaType size);
|
||||
|
||||
template<typename T>
|
||||
void Write(VaType virt, span <T> source) {
|
||||
Write(virt, reinterpret_cast<u8 *>(source.data()), source.size_bytes());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void Write(VaType virt, T source) {
|
||||
Write(virt, reinterpret_cast<u8 *>(&source), sizeof(T));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
|
||||
*/
|
||||
template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits>
|
||||
class FlatAllocator : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
|
||||
private:
|
||||
using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
|
||||
|
||||
VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once this reaches the AS limit the slower allocation path will be used
|
||||
|
||||
public:
|
||||
VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
|
||||
|
||||
FlatAllocator(VaType vaStart, VaType vaLimit);
|
||||
|
||||
/**
|
||||
* @brief Allocates a region in the AS of the given size and returns its address
|
||||
*/
|
||||
VaType Allocate(VaType size);
|
||||
|
||||
/**
|
||||
* @brief Marks the given region in the AS as allocated
|
||||
*/
|
||||
void AllocateFixed(VaType virt, VaType size);
|
||||
|
||||
/**
|
||||
* @brief Frees an AS region so it can be used again
|
||||
*/
|
||||
void Free(VaType virt, VaType size);
|
||||
};
|
||||
}
|
354
app/src/main/cpp/skyline/common/address_space.inc
Normal file
354
app/src/main/cpp/skyline/common/address_space.inc
Normal file
@ -0,0 +1,354 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <common/trace.h>
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include "address_space.h"
|
||||
|
||||
#define MAP_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, bool PaContigSplit, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap<VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits>
|
||||
|
||||
#define MM_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
|
||||
|
||||
#define ALLOC_MEMBER(returnType) template<typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
|
||||
|
||||
namespace skyline {
|
||||
MAP_MEMBER()::FlatAddressSpaceMap(VaType pVaLimit) : vaLimit(pVaLimit) {
|
||||
if (pVaLimit > VaMaximum)
|
||||
throw exception("Invalid VA limit!");
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, bool flag) {
|
||||
TRACE_EVENT("containers", "FlatAddressSpaceMap::Map");
|
||||
|
||||
VaType virtEnd{virt + size};
|
||||
|
||||
if (virtEnd > vaLimit)
|
||||
throw exception("Trying to map a block past the VA limit!");
|
||||
|
||||
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
|
||||
if (blockEndSuccessor == blocks.begin())
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
|
||||
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
|
||||
|
||||
if (blockEndSuccessor != blocks.end()) {
|
||||
// We have blocks in front of us, if one is directly in front then we don't have to add a tail
|
||||
if (blockEndSuccessor->virt != virtEnd) {
|
||||
PaType tailPhys{[&]() -> PaType {
|
||||
if (!PaContigSplit || blockEndPredecessor->Unmapped())
|
||||
return blockEndPredecessor->phys; // Always propagate unmapped regions
|
||||
else
|
||||
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
|
||||
}()};
|
||||
|
||||
if (blockEndPredecessor->virt >= virt) {
|
||||
// If this block's start would be overlapped by the map then reuse it as a tail block
|
||||
blockEndPredecessor->virt = virtEnd;
|
||||
blockEndPredecessor->phys = tailPhys;
|
||||
blockEndPredecessor->flag = blockEndPredecessor->flag;
|
||||
} else {
|
||||
// Else insert a new one and we're done
|
||||
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped chunk
|
||||
if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
|
||||
// Move the unmapped block start backwards
|
||||
blockEndPredecessor->virt = virtEnd;
|
||||
} else {
|
||||
// Else insert a new one and we're done
|
||||
blocks.insert(blockEndSuccessor, {Block(virt, phys, flag), Block(virtEnd, UnmappedPa, false)});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto blockStartSuccessor{blockEndPredecessor};
|
||||
|
||||
// Walk the block vector to find the start successor as this is more efficient than another binary search in most scenarios
|
||||
while (std::prev(blockStartSuccessor)->virt >= virt)
|
||||
std::advance(blockStartSuccessor, -1);
|
||||
|
||||
if (blockStartSuccessor->virt > virtEnd)
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
|
||||
if (blockStartSuccessor->virt == virtEnd) {
|
||||
// We need to create a new block as there are none spare that we would overwrite
|
||||
blocks.insert(blockStartSuccessor, Block(virt, phys, flag));
|
||||
return;
|
||||
} else {
|
||||
blockStartSuccessor->virt = virt;
|
||||
blockStartSuccessor->phys = phys;
|
||||
blockStartSuccessor->flag = flag;
|
||||
|
||||
// Erase overwritten blocks
|
||||
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
|
||||
if (eraseStart == blockEndPredecessor)
|
||||
__builtin_trap();
|
||||
|
||||
blocks.erase(eraseStart, blockEndPredecessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
|
||||
TRACE_EVENT("containers", "FlatAddressSpaceMap::Unmap");
|
||||
|
||||
VaType virtEnd{virt + size};
|
||||
|
||||
if (virtEnd > vaLimit)
|
||||
throw exception("Trying to map a block past the VA limit!");
|
||||
|
||||
auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
|
||||
if (blockEndSuccessor == blocks.begin())
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
|
||||
auto blockEndPredecessor{std::prev(blockEndSuccessor)};
|
||||
|
||||
auto walkBackToPredecessor{[&](auto iter) {
|
||||
while (iter->virt >= virt)
|
||||
std::advance(iter, -1);
|
||||
|
||||
return iter;
|
||||
}};
|
||||
|
||||
auto eraseBlocksWithEndUnmapped{[&] (auto unmappedEnd) {
|
||||
auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
|
||||
auto blockStartSuccessor{std::next(blockStartPredecessor)};
|
||||
|
||||
auto eraseEnd{[&]() {
|
||||
if (blockStartPredecessor->Unmapped()) {
|
||||
// If the start predecessor is unmapped then we can erase everything in our region and be done
|
||||
return std::next(unmappedEnd);
|
||||
} else {
|
||||
// Else reuse the end predecessor as the start of our unmapped region then erase all up to it
|
||||
unmappedEnd->virt = virt;
|
||||
return unmappedEnd;
|
||||
}
|
||||
}()};
|
||||
|
||||
// We can't have two unmapped regions after each other
|
||||
if (eraseEnd == blockStartSuccessor || (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
|
||||
blocks.erase(blockStartSuccessor, eraseEnd);
|
||||
}};
|
||||
|
||||
// We can avoid any splitting logic if these are the case
|
||||
if (blockEndPredecessor->Unmapped()) {
|
||||
if (blockEndPredecessor->virt > virt)
|
||||
eraseBlocksWithEndUnmapped(blockEndPredecessor);
|
||||
|
||||
return; // The region is unmapped, bail out early
|
||||
} else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
|
||||
eraseBlocksWithEndUnmapped(blockEndSuccessor);
|
||||
return; // The region is unmapped here and doesn't need splitting, bail out early
|
||||
} else if (blockEndSuccessor == blocks.end()) {
|
||||
// This should never happen as the end should always follow an unmapped block
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
} else if (blockEndSuccessor->virt != virtEnd) {
|
||||
// If one block is directly in front then we don't have to add a tail
|
||||
|
||||
// The previous block is mapped so we will need to add a tail with an offset
|
||||
PaType tailPhys{[&]() {
|
||||
if constexpr (PaContigSplit)
|
||||
return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
|
||||
else
|
||||
return blockEndPredecessor->phys;
|
||||
}()};
|
||||
|
||||
if (blockEndPredecessor->virt >= virt) {
|
||||
// If this block's start would be overlapped by the unmap then reuse it as a tail block
|
||||
blockEndPredecessor->virt = virtEnd;
|
||||
blockEndPredecessor->phys = tailPhys;
|
||||
} else {
|
||||
blocks.insert(blockEndSuccessor, {Block(virt, UnmappedPa, false), Block(virtEnd, tailPhys, blockEndPredecessor->flag)});
|
||||
return; // The previous block is mapped and ends bef
|
||||
}
|
||||
}
|
||||
|
||||
// Walk the block vector to find the start predecessor as this is more efficient than another binary search in most scenarios
|
||||
auto blockStartPredecessor{walkBackToPredecessor(blockEndPredecessor)};
|
||||
auto blockStartSuccessor{std::next(blockStartPredecessor)};
|
||||
|
||||
if (blockStartSuccessor->virt > virtEnd)
|
||||
throw exception("Unexpected Memory Manager state!");
|
||||
|
||||
if (blockStartSuccessor->virt == virtEnd) {
|
||||
// There are no blocks between the start and the end that would let us skip inserting a new one for head
|
||||
|
||||
// The previous block is may be unmapped, if so we don't need to insert any unmaps after it
|
||||
if (blockStartPredecessor->Mapped())
|
||||
blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, false));
|
||||
} else if (blockStartPredecessor->Unmapped()) {
|
||||
// If the previous block is unmapped
|
||||
blocks.erase(blockStartSuccessor, blockEndPredecessor);
|
||||
} else {
|
||||
// Add in the unmapped block header
|
||||
blockStartSuccessor->virt = virt;
|
||||
blockStartSuccessor->phys = UnmappedPa;
|
||||
|
||||
// Erase overwritten blocks, skipping the first one as we have written the unmapped start block there
|
||||
if (auto eraseStart{std::next(blockStartSuccessor)}; blockStartSuccessor != blockEndPredecessor) {
|
||||
if (eraseStart == blockEndPredecessor)
|
||||
__builtin_trap();
|
||||
|
||||
blocks.erase(eraseStart, blockEndPredecessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::Map(VaType virt, PaType phys, VaType size, bool flag) {
|
||||
std::scoped_lock lock(blockMutex);
|
||||
MapLocked(virt, phys, size, flag);
|
||||
}
|
||||
|
||||
MAP_MEMBER(void)::Unmap(VaType virt, VaType size) {
|
||||
std::scoped_lock lock(blockMutex);
|
||||
UnmapLocked(virt, size);
|
||||
}
|
||||
|
||||
MM_MEMBER(void)::Read(u8 *destination, VaType virt, VaType size) {
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
|
||||
TRACE_EVENT("containers", "FlatMemoryManager::Read");
|
||||
|
||||
VaType virtEnd{virt + size};
|
||||
|
||||
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
|
||||
return virt < block.virt;
|
||||
})};
|
||||
|
||||
auto predecessor{std::prev(successor)};
|
||||
|
||||
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
|
||||
VaType blockReadSize{std::min(successor->virt - virt, size)};
|
||||
|
||||
while (size) {
|
||||
if (predecessor->phys == nullptr) {
|
||||
if (predecessor->flag) // Sparse mapping
|
||||
std::memset(destination, 0, blockReadSize);
|
||||
else
|
||||
throw exception("Page fault at: 0x{:X}", predecessor->virt);
|
||||
} else {
|
||||
std::memcpy(destination, blockPhys, blockReadSize);
|
||||
}
|
||||
|
||||
destination += blockReadSize;
|
||||
size -= blockReadSize;
|
||||
|
||||
if (size) {
|
||||
predecessor = successor++;
|
||||
blockPhys = predecessor->phys;
|
||||
blockReadSize = std::min(successor->virt - predecessor->virt, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MM_MEMBER(void)::Write(VaType virt, u8 *source, VaType size) {
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
|
||||
TRACE_EVENT("containers", "FlatMemoryManager::Write");
|
||||
|
||||
VaType virtEnd{virt + size};
|
||||
|
||||
auto successor{std::upper_bound(this->blocks.begin(), this->blocks.end(), virt, [] (auto virt, const auto &block) {
|
||||
return virt < block.virt;
|
||||
})};
|
||||
|
||||
auto predecessor{std::prev(successor)};
|
||||
|
||||
u8 *blockPhys{predecessor->phys + (virt - predecessor->virt)};
|
||||
VaType blockWriteSize{std::min(successor->virt - virt, size)};
|
||||
|
||||
while (size) {
|
||||
if (predecessor->phys == nullptr) {
|
||||
if (!predecessor->flag) // Sparse mappings allow unmapped writes
|
||||
throw exception("Page fault at: 0x{:X}", predecessor->virt);
|
||||
} else {
|
||||
std::memcpy(blockPhys, source, blockWriteSize);
|
||||
}
|
||||
|
||||
source += blockWriteSize;
|
||||
size -= blockWriteSize;
|
||||
|
||||
if (size) {
|
||||
predecessor = successor++;
|
||||
blockPhys = predecessor->phys;
|
||||
blockWriteSize = std::min(successor->virt - predecessor->virt, size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ALLOC_MEMBER()::FlatAllocator(VaType vaStart, VaType vaLimit) : Base(vaLimit), vaStart(vaStart), currentLinearAllocEnd(vaStart) {}
|
||||
|
||||
ALLOC_MEMBER(VaType)::Allocate(VaType size) {
|
||||
std::scoped_lock lock(this->blockMutex);
|
||||
|
||||
TRACE_EVENT("containers", "FlatAllocator::Allocate");
|
||||
|
||||
VaType allocStart{UnmappedVa};
|
||||
VaType allocEnd{currentLinearAllocEnd + size};
|
||||
|
||||
if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
|
||||
auto allocEndSuccessor{std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
|
||||
if (allocEndSuccessor == this->blocks.begin())
|
||||
throw exception("Unexpected allocator state!");
|
||||
|
||||
auto allocEndPredecessor{std::prev(allocEndSuccessor)};
|
||||
if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
|
||||
allocStart = currentLinearAllocEnd;
|
||||
} else {
|
||||
while (allocEndSuccessor != this->blocks.end()) {
|
||||
if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || allocEndPredecessor->Mapped() ) {
|
||||
allocStart = allocEndPredecessor->virt;
|
||||
break;
|
||||
}
|
||||
|
||||
allocEndPredecessor = allocEndSuccessor++;
|
||||
|
||||
if (allocEndSuccessor == this->blocks.end()) {
|
||||
allocEnd = allocEndPredecessor->virt + size;
|
||||
|
||||
if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
|
||||
allocStart = allocEndPredecessor->virt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allocStart != UnmappedVa) {
|
||||
currentLinearAllocEnd = allocStart + size;
|
||||
} else { // If linear allocation overflows the AS then find a gap
|
||||
if (this->blocks.size() <= 2)
|
||||
throw exception("Unexpected allocator state!");
|
||||
|
||||
auto searchPredecessor{this->blocks.begin()};
|
||||
auto searchSuccessor{std::next(searchPredecessor)};
|
||||
|
||||
while (searchSuccessor != this->blocks.end() &&
|
||||
(searchSuccessor->virt - searchPredecessor->virt < size || searchPredecessor->Mapped())) {
|
||||
searchPredecessor = searchSuccessor++;
|
||||
}
|
||||
|
||||
if (searchSuccessor != this->blocks.end())
|
||||
allocStart = searchPredecessor->virt;
|
||||
else
|
||||
throw exception("Unexpected allocator state!");
|
||||
}
|
||||
|
||||
|
||||
this->MapLocked(allocStart, true, size);
|
||||
return allocStart;
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
|
||||
this->MapLocked(virt, true, size);
|
||||
}
|
||||
|
||||
ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
|
||||
this->UnmapLocked(virt, size);
|
||||
}
|
||||
}
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/trace.h>
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline {
|
||||
@ -51,10 +52,15 @@ namespace skyline {
|
||||
*/
|
||||
template<typename F>
|
||||
[[noreturn]] void Process(F function) {
|
||||
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
|
||||
|
||||
while (true) {
|
||||
if (start == end) {
|
||||
std::unique_lock lock(productionMutex);
|
||||
|
||||
TRACE_EVENT_END("containers");
|
||||
produceCondition.wait(lock, [this]() { return start != end; });
|
||||
TRACE_EVENT_BEGIN("containers", "CircularQueue::Process");
|
||||
}
|
||||
|
||||
while (start != end) {
|
||||
|
@ -183,8 +183,6 @@ namespace skyline::signal {
|
||||
std::call_once(signalHandlerOnce[signal], [signal, &action]() {
|
||||
struct sigaction oldAction;
|
||||
Sigaction(signal, &action, &oldAction);
|
||||
if (oldAction.sa_flags && oldAction.sa_flags != action.sa_flags)
|
||||
throw exception("Old sigaction flags aren't equivalent to the replaced signal: {:#b} | {:#b}", oldAction.sa_flags, action.sa_flags);
|
||||
|
||||
DefaultSignalHandlers.at(signal).function = (oldAction.sa_flags & SA_SIGINFO) ? oldAction.sa_sigaction : reinterpret_cast<void (*)(int, struct siginfo *, void *)>(oldAction.sa_handler);
|
||||
});
|
||||
|
@ -13,7 +13,8 @@ PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("kernel").SetDescription("Events from parts of the HLE kernel"),
|
||||
perfetto::Category("guest").SetDescription("Events relating to guest code"),
|
||||
perfetto::Category("gpu").SetDescription("Events from the emulated GPU"),
|
||||
perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations")
|
||||
perfetto::Category("service").SetDescription("Events from the HLE sysmodule implementations"),
|
||||
perfetto::Category("containers").SetDescription("Events from custom container implementations")
|
||||
);
|
||||
|
||||
namespace skyline::trace {
|
||||
|
@ -13,6 +13,7 @@ namespace skyline::service {
|
||||
Busy = 16, // EBUSY
|
||||
InvalidArgument = 22, // EINVAL
|
||||
InappropriateIoctlForDevice = 25, // ENOTTY
|
||||
FunctionNotImplemented = 38, // ENOSYS
|
||||
NotSupported = 95, // EOPNOTSUPP, ENOTSUP
|
||||
TimedOut = 110, // ETIMEDOUT
|
||||
|
||||
|
@ -1,10 +1,16 @@
|
||||
// SPDX-License-Identifier: MIT OR MPL-2.0
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <common/address_space.inc>
|
||||
#include <soc.h>
|
||||
#include <services/nvdrv/devices/deserialisation/deserialisation.h>
|
||||
#include "as_gpu.h"
|
||||
|
||||
namespace skyline {
|
||||
template class FlatAddressSpaceMap<u32, 0, bool, false, false, 32>;
|
||||
template class FlatAllocator<u32, 0, 32>;
|
||||
}
|
||||
|
||||
namespace skyline::service::nvdrv::device::nvhost {
|
||||
AsGpu::AsGpu(const DeviceState &state, Core &core, const SessionContext &ctx) : NvDevice(state, core, ctx) {}
|
||||
|
||||
@ -14,38 +20,66 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
}
|
||||
|
||||
PosixResult AsGpu::AllocSpace(In<u32> pages, In<u32> pageSize, In<MappingFlags> flags, InOut<u64> offset) {
|
||||
// TODO: track this on the nvdrv side and have the gmmu only do virt -> phys
|
||||
// Also fix error codes
|
||||
u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)};
|
||||
state.logger->Debug("pages: 0x{:X}, pageSize: 0x{:X}, flags: ( fixed: {}, sparse: {} ), offset: 0x{:X}", pages, pageSize, flags.fixed, flags.sparse, offset);
|
||||
|
||||
if (pageSize != VM::PageSize && pageSize != vm.bigPageSize)
|
||||
return PosixResult::InvalidArgument;
|
||||
|
||||
if (pageSize != vm.bigPageSize && flags.sparse)
|
||||
return PosixResult::FunctionNotImplemented;
|
||||
|
||||
u32 pageSizeBits{pageSize == VM::PageSize ? VM::PageSizeBits : vm.bigPageSizeBits};
|
||||
|
||||
auto &allocator{[&] () -> auto & {
|
||||
if (pageSize == VM::PageSize)
|
||||
return vm.smallPageAllocator;
|
||||
else
|
||||
return vm.bigPageAllocator;
|
||||
}()};
|
||||
|
||||
if (flags.fixed)
|
||||
offset = state.soc->gmmu.ReserveFixed(offset, size);
|
||||
allocator->AllocateFixed(offset >> pageSizeBits, pages);
|
||||
else
|
||||
offset = state.soc->gmmu.ReserveSpace(size, offset); // offset contains the input alignment
|
||||
offset = static_cast<u64>(allocator->Allocate(pages)) << pageSizeBits;
|
||||
|
||||
if (offset == 0) {
|
||||
state.logger->Warn("Failed to allocate GPU address space region!");
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
u64 size{static_cast<u64>(pages) * static_cast<u64>(pageSize)};
|
||||
|
||||
if (flags.sparse)
|
||||
state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
|
||||
|
||||
allocationMap[offset] = {
|
||||
.size = size,
|
||||
.pageSize = pageSize,
|
||||
.sparse = flags.sparse
|
||||
};
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
PosixResult AsGpu::FreeSpace(In<u64> offset, In<u32> pages, In<u32> pageSize) {
|
||||
// TODO: implement this when we add nvdrv side address space allocation
|
||||
// TODO: implement after UNMAP
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
PosixResult AsGpu::UnmapBuffer(In<u64> offset) {
|
||||
state.logger->Debug("offset: 0x{:X}", offset);
|
||||
|
||||
try {
|
||||
auto region{regionMap.at(offset)};
|
||||
auto mapping{mappingMap.at(offset)};
|
||||
|
||||
// Non-fixed regions are unmapped so that they can be used by future non-fixed mappings
|
||||
if (!region.fixed)
|
||||
if (!state.soc->gmmu.Unmap(offset, region.size))
|
||||
state.logger->Warn("Failed to unmap region at 0x{:X}", offset);
|
||||
if (!mapping->fixed) {
|
||||
auto &allocator{mapping->bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
|
||||
u32 pageSizeBits{mapping->bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
|
||||
|
||||
regionMap.erase(offset);
|
||||
allocator->Free(mapping->offset >> pageSizeBits, mapping->size >> pageSizeBits);
|
||||
}
|
||||
|
||||
if (mapping->sparseAlloc)
|
||||
state.soc->gm20b.gmmu.Map(offset, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), mapping->size, true);
|
||||
else
|
||||
state.soc->gm20b.gmmu.Unmap(offset, mapping->size);
|
||||
|
||||
mappingMap.erase(offset);
|
||||
} catch (const std::out_of_range &e) {
|
||||
state.logger->Warn("Couldn't find region to unmap at 0x{:X}", offset);
|
||||
}
|
||||
@ -53,62 +87,94 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) {
|
||||
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, pageSize: 0x{:X}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, pageSize, bufferOffset, mappingSize, offset);
|
||||
PosixResult AsGpu::MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset) {
|
||||
if (!vm.initialised)
|
||||
return PosixResult::InvalidArgument;
|
||||
|
||||
state.logger->Debug("flags: ( fixed: {}, remap: {} ), kind: {}, handle: {}, bufferOffset: 0x{:X}, mappingSize: 0x{:X}, offset: 0x{:X}", flags.fixed, flags.remap, kind, handle, bufferOffset, mappingSize, offset);
|
||||
|
||||
if (flags.remap) {
|
||||
auto region{regionMap.lower_bound(offset)};
|
||||
if (region == regionMap.end()) {
|
||||
try {
|
||||
auto mapping{mappingMap.at(offset)};
|
||||
|
||||
if (mapping->size < mappingSize) {
|
||||
state.logger->Warn("Cannot remap a partially mapped GPU address space region: 0x{:X}", offset);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
u64 gpuAddress{offset + bufferOffset};
|
||||
u8 *cpuPtr{mapping->ptr + bufferOffset};
|
||||
|
||||
state.soc->gm20b.gmmu.Map(gpuAddress, cpuPtr, mappingSize);
|
||||
|
||||
return PosixResult::Success;
|
||||
} catch (const std::out_of_range &e) {
|
||||
state.logger->Warn("Cannot remap an unmapped GPU address space region: 0x{:X}", offset);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
if (region->second.size < mappingSize) {
|
||||
state.logger->Warn("Cannot remap an partially mapped GPU address space region: 0x{:X}", offset);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
u64 gpuAddress{offset + bufferOffset};
|
||||
u8 *cpuPtr{region->second.ptr + bufferOffset};
|
||||
|
||||
if (!state.soc->gmmu.MapFixed(gpuAddress, cpuPtr, mappingSize)) {
|
||||
state.logger->Warn("Failed to remap GPU address space region: 0x{:X}", gpuAddress);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
auto h{core.nvMap.GetHandle(handle)};
|
||||
if (!h)
|
||||
return PosixResult::InvalidArgument;
|
||||
|
||||
if (auto err{h->Duplicate(ctx.internalSession)}; err != PosixResult::Success)
|
||||
return err;
|
||||
|
||||
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + bufferOffset)};
|
||||
u64 size{mappingSize ? mappingSize : h->origSize};
|
||||
|
||||
if (flags.fixed)
|
||||
offset = state.soc->gmmu.MapFixed(offset, cpuPtr, size);
|
||||
else
|
||||
offset = state.soc->gmmu.MapAllocate(cpuPtr, size);
|
||||
if (flags.fixed) {
|
||||
auto alloc{allocationMap.upper_bound(offset)};
|
||||
|
||||
if (offset == 0) {
|
||||
state.logger->Warn("Failed to map GPU address space region!");
|
||||
return PosixResult::InvalidArgument;
|
||||
if (alloc-- == allocationMap.begin() || (offset - alloc->first) + size > alloc->second.size)
|
||||
throw exception("Cannot perform a fixed mapping into an unallocated region!");
|
||||
|
||||
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
|
||||
|
||||
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, true, false, alloc->second.sparse)};
|
||||
alloc->second.mappings.push_back(mapping);
|
||||
mappingMap[offset] = mapping;
|
||||
} else {
|
||||
bool bigPage{[&] () {
|
||||
if (util::IsAligned(h->align, vm.bigPageSize))
|
||||
return true;
|
||||
else if (util::IsAligned(h->align, VM::PageSize))
|
||||
return false;
|
||||
else
|
||||
throw exception("Invalid handle alignment: 0x{:X}", h->align);
|
||||
}()};
|
||||
|
||||
auto &allocator{bigPage ? vm.bigPageAllocator : vm.smallPageAllocator};
|
||||
u32 pageSize{bigPage ? vm.bigPageSize : VM::PageSize};
|
||||
u32 pageSizeBits{bigPage ? vm.bigPageSizeBits : VM::PageSizeBits};
|
||||
|
||||
offset = static_cast<u64>(allocator->Allocate(util::AlignUp(size, pageSize) >> pageSizeBits)) << pageSizeBits;
|
||||
state.soc->gm20b.gmmu.Map(offset, cpuPtr, size);
|
||||
|
||||
auto mapping{std::make_shared<Mapping>(cpuPtr, offset, size, false, bigPage, false)};
|
||||
mappingMap[offset] = mapping;
|
||||
}
|
||||
|
||||
state.logger->Debug("Mapped to 0x{:X}", offset);
|
||||
|
||||
regionMap[offset] = {cpuPtr, size, flags.fixed};
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
PosixResult AsGpu::GetVaRegions(In<u64> bufAddr, InOut<u32> bufSize, Out<std::array<VaRegion, 2>> vaRegions) {
|
||||
// TODO: impl when we move allocator to nvdrv
|
||||
if (!vm.initialised)
|
||||
return PosixResult::InvalidArgument;
|
||||
|
||||
vaRegions = std::array<VaRegion, 2> {
|
||||
VaRegion{
|
||||
.pageSize = VM::PageSize,
|
||||
.pages = vm.smallPageAllocator->vaLimit - vm.smallPageAllocator->vaStart,
|
||||
.offset = vm.smallPageAllocator->vaStart << VM::PageSizeBits,
|
||||
},
|
||||
VaRegion{
|
||||
.pageSize = vm.bigPageSize,
|
||||
.pages = vm.bigPageAllocator->vaLimit - vm.bigPageAllocator->vaStart,
|
||||
.offset = vm.bigPageAllocator->vaStart << vm.bigPageSizeBits,
|
||||
}
|
||||
};
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
@ -116,30 +182,83 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
return GetVaRegions(bufAddr, bufSize, vaRegions);
|
||||
}
|
||||
|
||||
PosixResult AsGpu::AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) {
|
||||
// TODO: create the allocator here
|
||||
PosixResult AsGpu::AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit) {
|
||||
if (vm.initialised)
|
||||
throw exception("Cannot initialise an address space twice!");
|
||||
|
||||
state.logger->Debug("bigPageSize: 0x{:X}, asFd: {}, flags: 0x{:X}, vaRangeStart: 0x{:X}, vaRangeEnd: 0x{:X}, vaRangeSplit: 0x{:X}",
|
||||
bigPageSize, asFd, flags, vaRangeStart, vaRangeEnd, vaRangeSplit);
|
||||
|
||||
if (bigPageSize) {
|
||||
if (!std::ispow2(bigPageSize)) {
|
||||
state.logger->Error("Non power-of-2 big page size: 0x{:X}!", bigPageSize);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
if (!(bigPageSize & VM::SupportedBigPageSizes)) {
|
||||
state.logger->Error("Unsupported big page size: 0x{:X}!", bigPageSize);
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
vm.bigPageSize = bigPageSize;
|
||||
vm.bigPageSizeBits = std::countr_zero(bigPageSize);
|
||||
|
||||
vm.vaRangeStart = bigPageSize << VM::VaStartShift;
|
||||
}
|
||||
|
||||
if (vaRangeStart) {
|
||||
vm.vaRangeStart = vaRangeStart;
|
||||
vm.vaRangeSplit = vaRangeSplit;
|
||||
vm.vaRangeEnd = vaRangeEnd;
|
||||
}
|
||||
|
||||
u64 startPages{vm.vaRangeStart >> VM::PageSizeBits};
|
||||
u64 endPages{vm.vaRangeSplit >> VM::PageSizeBits};
|
||||
vm.smallPageAllocator = std::make_unique<VM::Allocator>(startPages, endPages);
|
||||
|
||||
u64 startBigPages{vm.vaRangeSplit >> vm.bigPageSizeBits};
|
||||
u64 endBigPages{(vm.vaRangeEnd - vm.vaRangeSplit) >> vm.bigPageSizeBits};
|
||||
vm.bigPageAllocator = std::make_unique<VM::Allocator>(startBigPages, endBigPages);
|
||||
|
||||
vm.initialised = true;
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
PosixResult AsGpu::Remap(span<RemapEntry> entries) {
|
||||
constexpr u32 BigPageSize{0x10}; //!< The big page size of the GPU
|
||||
|
||||
for (const auto &entry : entries) {
|
||||
auto h{core.nvMap.GetHandle(entry.handle)};
|
||||
if (!h)
|
||||
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << vm.bigPageSizeBits};
|
||||
u64 size{static_cast<u64>(entry.bigPages) << vm.bigPageSizeBits};
|
||||
|
||||
auto alloc{allocationMap.upper_bound(virtAddr)};
|
||||
|
||||
if (alloc-- == allocationMap.begin() || (virtAddr - alloc->first) + size > alloc->second.size) {
|
||||
state.logger->Warn("Cannot remap into an unallocated region!");
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
u64 virtAddr{static_cast<u64>(entry.asOffsetBigPages) << BigPageSize};
|
||||
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << BigPageSize))};
|
||||
u64 size{static_cast<u64>(entry.bigPages) << BigPageSize};
|
||||
if (!alloc->second.sparse) {
|
||||
state.logger->Warn("Cannot remap a non-sparse mapping!");
|
||||
return PosixResult::InvalidArgument;
|
||||
}
|
||||
|
||||
state.soc->gmmu.MapFixed(virtAddr, cpuPtr, size);
|
||||
if (!entry.handle) {
|
||||
state.soc->gm20b.gmmu.Map(virtAddr, soc::gm20b::GM20B::GMMU::SparsePlaceholderAddress(), size, true);
|
||||
} else {
|
||||
auto h{core.nvMap.GetHandle(entry.handle)};
|
||||
if (!h)
|
||||
return PosixResult::InvalidArgument;
|
||||
|
||||
u8 *cpuPtr{reinterpret_cast<u8 *>(h->address + (static_cast<u64>(entry.handleOffsetBigPages) << vm.bigPageSizeBits))};
|
||||
|
||||
state.soc->gm20b.gmmu.Map(virtAddr, cpuPtr, size);
|
||||
}
|
||||
}
|
||||
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
|
||||
static constexpr u32 AsGpuMagic{0x41};
|
||||
|
||||
VARIABLE_IOCTL_HANDLER_FUNC(AsGpu, ({
|
||||
@ -152,7 +271,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(AsGpuMagic), FUNC(0x5),
|
||||
UnmapBuffer, ARGS(In<u64>))
|
||||
IOCTL_CASE_ARGS(INOUT, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x6),
|
||||
MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, InOut<u32>, In<u64>, In<u64>, InOut<u64>))
|
||||
MapBufferEx, ARGS(In<MappingFlags>, In<u32>, In<core::NvMap::Handle::Id>, Pad<u32>, In<u64>, In<u64>, InOut<u64>))
|
||||
IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8),
|
||||
GetVaRegions, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
|
||||
IOCTL_CASE_ARGS(IN, SIZE(0x28), MAGIC(AsGpuMagic), FUNC(0x9),
|
||||
@ -166,5 +285,5 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x40), MAGIC(AsGpuMagic), FUNC(0x8),
|
||||
GetVaRegions3, ARGS(In<u64>, InOut<u32>, Pad<u32>, Out<std::array<VaRegion, 2>>))
|
||||
}))
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/address_space.h>
|
||||
|
||||
#include <services/nvdrv/devices/nvdevice.h>
|
||||
|
||||
namespace skyline::service::nvdrv::device::nvhost {
|
||||
@ -12,18 +14,64 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
*/
|
||||
class AsGpu : public NvDevice {
|
||||
private:
|
||||
struct AddressSpaceRegion {
|
||||
struct Mapping {
|
||||
u8 *ptr;
|
||||
u64 offset;
|
||||
u64 size;
|
||||
bool fixed;
|
||||
bool bigPage; // Only valid if fixed == false
|
||||
bool sparseAlloc;
|
||||
|
||||
Mapping(u8 *ptr, u64 offset, u64 size, bool fixed, bool bigPage, bool sparseAlloc) : ptr(ptr),
|
||||
offset(offset),
|
||||
size(size),
|
||||
fixed(fixed),
|
||||
bigPage(bigPage),
|
||||
sparseAlloc(sparseAlloc) {}
|
||||
};
|
||||
|
||||
std::map<u64, AddressSpaceRegion> regionMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag.
|
||||
struct Allocation {
|
||||
u64 size;
|
||||
std::list<std::shared_ptr<Mapping>> mappings;
|
||||
u32 pageSize;
|
||||
bool sparse;
|
||||
};
|
||||
|
||||
std::map<u64, std::shared_ptr<Mapping>> mappingMap; //!< This maps the base addresses of mapped buffers to their total sizes and mapping type, this is needed as what was originally a single buffer may have been split into multiple GPU side buffers with the remap flag.
|
||||
|
||||
std::map<u64, Allocation> allocationMap;
|
||||
|
||||
|
||||
struct VM {
|
||||
static constexpr u32 PageSize{0x1000};
|
||||
static constexpr u32 PageSizeBits{std::countr_zero(PageSize)};
|
||||
|
||||
static constexpr u32 SupportedBigPageSizes{0x30000};
|
||||
static constexpr u32 DefaultBigPageSize{0x20000};
|
||||
u32 bigPageSize{DefaultBigPageSize};
|
||||
u32 bigPageSizeBits{std::countr_zero(DefaultBigPageSize)};
|
||||
|
||||
static constexpr u32 VaStartShift{10};
|
||||
static constexpr u64 DefaultVaSplit{1ULL << 34};
|
||||
static constexpr u64 DefaultVaRange{1ULL << 37};
|
||||
u64 vaRangeStart{DefaultBigPageSize << VaStartShift};
|
||||
u64 vaRangeSplit{DefaultVaSplit};
|
||||
u64 vaRangeEnd{DefaultVaRange};
|
||||
|
||||
using Allocator = FlatAllocator<u32, 0, 32>;
|
||||
|
||||
std::unique_ptr<Allocator> bigPageAllocator{};
|
||||
std::unique_ptr<Allocator> smallPageAllocator{};
|
||||
|
||||
bool initialised{};
|
||||
} vm;
|
||||
|
||||
|
||||
public:
|
||||
struct MappingFlags {
|
||||
bool fixed : 1;
|
||||
u8 _pad0_ : 7;
|
||||
bool sparse : 1;
|
||||
u8 _pad0_ : 6;
|
||||
bool remap : 1;
|
||||
u32 _pad1_ : 23;
|
||||
};
|
||||
@ -77,7 +125,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
* @brief Maps a region into this address space with extra parameters
|
||||
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_MAP_BUFFER_EX
|
||||
*/
|
||||
PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, InOut<u32> pageSize, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset);
|
||||
PosixResult MapBufferEx(In<MappingFlags> flags, In<u32> kind, In<core::NvMap::Handle::Id> handle, In<u64> bufferOffset, In<u64> mappingSize, InOut<u64> offset);
|
||||
|
||||
/**
|
||||
* @brief Returns info about the address space and its page sizes
|
||||
@ -94,7 +142,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
* @brief Allocates this address space with the given parameters
|
||||
* @url https://switchbrew.org/wiki/NV_services#NVGPU_AS_IOCTL_ALLOC_AS_EX
|
||||
*/
|
||||
PosixResult AllocAsEx(In<u32> bigPageSize, In<FileDescriptor> asFd, In<u32> flags, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit);
|
||||
PosixResult AllocAsEx(In<u32> flags, In<FileDescriptor> asFd, In<u32> bigPageSize, In<u64> vaRangeStart, In<u64> vaRangeEnd, In<u64> vaRangeSplit);
|
||||
|
||||
/**
|
||||
* @brief Remaps a region of the GPU address space
|
||||
|
@ -234,7 +234,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
|
||||
static constexpr u32 CtrlMagic{0};
|
||||
|
||||
IOCTL_HANDLER_FUNC(Ctrl, ({
|
||||
@ -254,5 +254,5 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
IOCTL_CASE_RESULT(INOUT, SIZE(0x183), MAGIC(CtrlMagic), FUNC(0x1B),
|
||||
PosixResult::InvalidArgument) // GetConfig isn't available in production
|
||||
}))
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
}
|
||||
}
|
||||
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
|
||||
static constexpr u32 CtrlGpuMagic{0x47};
|
||||
|
||||
IOCTL_HANDLER_FUNC(CtrlGpu, ({
|
||||
@ -77,5 +77,5 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
IOCTL_CASE_ARGS(OUT, SIZE(0x8), MAGIC(CtrlGpuMagic), FUNC(0x14),
|
||||
GetActiveSlotMask, ARGS(Out<u32>, Out<u32>))
|
||||
}))
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
}
|
||||
}
|
||||
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_def.inc>
|
||||
static constexpr u32 GpuChannelUserMagic{0x47};
|
||||
static constexpr u32 GpuChannelMagic{0x48};
|
||||
|
||||
@ -138,5 +138,5 @@ namespace skyline::service::nvdrv::device::nvhost {
|
||||
INLINE_IOCTL_CASE_ARGS(INOUT, SIZE(0x18), MAGIC(GpuChannelMagic), FUNC(0x1B),
|
||||
SubmitGpfifo2, ARGS(In<u64>, In<u32>, InOut<SubmitGpfifoFlags>, InOut<Fence>))
|
||||
}))
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.h>
|
||||
#include <services/nvdrv/devices/deserialisation/macro_undef.inc>
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ namespace skyline::service::nvdrv::device {
|
||||
return PosixResult::Success;
|
||||
}
|
||||
|
||||
#include "deserialisation/macro_def.h"
|
||||
#include "deserialisation/macro_def.inc"
|
||||
static constexpr u32 NvMapMagic{1};
|
||||
|
||||
IOCTL_HANDLER_FUNC(NvMap, ({
|
||||
@ -132,6 +132,6 @@ namespace skyline::service::nvdrv::device {
|
||||
IOCTL_CASE_ARGS(INOUT, SIZE(0x8), MAGIC(NvMapMagic), FUNC(0xE),
|
||||
GetId, ARGS(Out<NvMapCore::Handle::Id>, In<NvMapCore::Handle::Id>))
|
||||
}))
|
||||
#include "deserialisation/macro_undef.h"
|
||||
#include "deserialisation/macro_undef.inc"
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "soc/gmmu.h"
|
||||
#include "soc/host1x.h"
|
||||
#include "soc/gm20b.h"
|
||||
|
||||
@ -14,10 +13,9 @@ namespace skyline::soc {
|
||||
*/
|
||||
class SOC {
|
||||
public:
|
||||
gmmu::GraphicsMemoryManager gmmu;
|
||||
host1x::Host1X host1x;
|
||||
gm20b::GM20B gm20b;
|
||||
|
||||
SOC(const DeviceState &state) : gmmu(state), gm20b(state) {}
|
||||
SOC(const DeviceState &state) : gm20b(state) {}
|
||||
};
|
||||
}
|
||||
|
20
app/src/main/cpp/skyline/soc/gm20b.cpp
Normal file
20
app/src/main/cpp/skyline/soc/gm20b.cpp
Normal file
@ -0,0 +1,20 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <common/address_space.inc>
|
||||
#include "gm20b.h"
|
||||
|
||||
namespace skyline {
|
||||
template class FlatAddressSpaceMap<u64, 0, u8 *, nullptr, true, soc::gm20b::GM20B::AddressSpaceBits>;
|
||||
template class FlatMemoryManager<u64, 0, soc::gm20b::GM20B::AddressSpaceBits>;
|
||||
}
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
GM20B::GM20B(const DeviceState &state) :
|
||||
fermi2D(state),
|
||||
keplerMemory(state),
|
||||
maxwell3D(state),
|
||||
maxwellCompute(state),
|
||||
maxwellDma(state),
|
||||
gpfifo(state) {}
|
||||
}
|
@ -3,23 +3,28 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/address_space.h>
|
||||
#include "gm20b/engines/maxwell_3d.h"
|
||||
#include "gm20b/gpfifo.h"
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
/**
|
||||
* @brief The GPU block in the X1, it contains all GPU engines required for accelerating graphics operations
|
||||
* @note We omit parts of components related to external access such as the GM20B Host, all accesses to the external components are done directly
|
||||
* @note We omit parts of components related to external access such as the grhost, all accesses to the external components are done directly
|
||||
*/
|
||||
class GM20B {
|
||||
public:
|
||||
static constexpr u8 AddressSpaceBits{40}; //!< The width of the GMMU AS
|
||||
using GMMU = FlatMemoryManager<u64, 0, AddressSpaceBits>;
|
||||
|
||||
engine::Engine fermi2D;
|
||||
engine::maxwell3d::Maxwell3D maxwell3D;
|
||||
engine::Engine maxwellCompute;
|
||||
engine::Engine maxwellDma;
|
||||
engine::Engine keplerMemory;
|
||||
GPFIFO gpfifo;
|
||||
GMMU gmmu;
|
||||
|
||||
GM20B(const DeviceState &state) : fermi2D(state), keplerMemory(state), maxwell3D(state), maxwellCompute(state), maxwellDma(state), gpfifo(state) {}
|
||||
GM20B(const DeviceState &state);
|
||||
};
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <soc/gmmu.h>
|
||||
#include <common/address_space.h>
|
||||
#include <soc/gm20b/engines/maxwell_3d.h>
|
||||
|
||||
namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
|
@ -157,7 +157,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
|
||||
switch (registers.semaphore.info.structureSize) {
|
||||
case Registers::SemaphoreInfo::StructureSize::OneWord:
|
||||
state.soc->gmmu.Write<u32>(static_cast<u32>(result), registers.semaphore.address.Pack());
|
||||
state.soc->gm20b.gmmu.Write<u32>(registers.semaphore.address.Pack(), static_cast<u32>(result));
|
||||
break;
|
||||
case Registers::SemaphoreInfo::StructureSize::FourWords: {
|
||||
// Convert the current nanosecond time to GPU ticks
|
||||
@ -167,7 +167,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
|
||||
u64 nsTime{util::GetTimeNs()};
|
||||
u64 timestamp{(nsTime / NsToTickDenominator) * NsToTickNumerator + ((nsTime % NsToTickDenominator) * NsToTickNumerator) / NsToTickDenominator};
|
||||
|
||||
state.soc->gmmu.Write<FourWordResult>(FourWordResult{result, timestamp}, registers.semaphore.address.Pack());
|
||||
state.soc->gm20b.gmmu.Write<FourWordResult>(registers.semaphore.address.Pack(), FourWordResult{result, timestamp});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ namespace skyline::soc::gm20b {
|
||||
}
|
||||
|
||||
pushBufferData.resize(gpEntry.size);
|
||||
state.soc->gmmu.Read<u32>(pushBufferData, gpEntry.Address());
|
||||
state.soc->gm20b.gmmu.Read<u32>(pushBufferData, gpEntry.Address());
|
||||
|
||||
for (auto entry{pushBufferData.begin()}; entry != pushBufferData.end(); entry++) {
|
||||
// An entry containing all zeroes is a NOP, skip over it
|
||||
@ -88,8 +88,7 @@ namespace skyline::soc::gm20b {
|
||||
return;
|
||||
|
||||
default:
|
||||
state.logger->Warn("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
|
||||
break;
|
||||
throw exception("Unsupported pushbuffer method SecOp: {}", static_cast<u8>(methodHeader.secOp));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -106,7 +105,7 @@ namespace skyline::soc::gm20b {
|
||||
try {
|
||||
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
||||
pushBuffers->Process([this](GpEntry gpEntry) {
|
||||
state.logger->Debug("Processing pushbuffer: 0x{:X}", gpEntry.Address());
|
||||
state.logger->Warn("Processing pushbuffer: 0x{:X}", gpEntry.Address());
|
||||
Process(gpEntry);
|
||||
});
|
||||
} catch (const signal::SignalException &e) {
|
||||
|
@ -1,214 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <kernel/types/KProcess.h>
|
||||
#include "gmmu.h"
|
||||
|
||||
namespace skyline::soc::gmmu {
|
||||
constexpr u64 GpuPageSize{1 << 16}; //!< The page size of the GPU address space
|
||||
|
||||
GraphicsMemoryManager::GraphicsMemoryManager(const DeviceState &state) : state(state) {
|
||||
constexpr u64 gpuAddressSpaceSize{1UL << 40}; //!< The size of the GPU address space
|
||||
constexpr u64 gpuAddressSpaceBase{0x100000}; //!< The base of the GPU address space - must be non-zero
|
||||
|
||||
// Create the initial chunk that will be split to create new chunks
|
||||
ChunkDescriptor baseChunk(gpuAddressSpaceBase, gpuAddressSpaceSize, nullptr, ChunkState::Unmapped);
|
||||
chunks.push_back(baseChunk);
|
||||
}
|
||||
|
||||
std::optional<ChunkDescriptor> GraphicsMemoryManager::FindChunk(ChunkState desiredState, u64 size, u64 alignment) {
|
||||
auto chunk{std::find_if(chunks.begin(), chunks.end(), [desiredState, size, alignment](const ChunkDescriptor &chunk) -> bool {
|
||||
return (alignment ? util::IsAligned(chunk.virtualAddress, alignment) : true) && chunk.size > size && chunk.state == desiredState;
|
||||
})};
|
||||
|
||||
if (chunk != chunks.end())
|
||||
return *chunk;
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u64 GraphicsMemoryManager::InsertChunk(const ChunkDescriptor &newChunk) {
|
||||
auto chunkEnd{chunks.end()};
|
||||
for (auto chunk{chunks.begin()}; chunk != chunkEnd; chunk++) {
|
||||
if (chunk->CanContain(newChunk)) {
|
||||
auto oldChunk{*chunk};
|
||||
u64 newSize{newChunk.virtualAddress - chunk->virtualAddress};
|
||||
u64 extension{chunk->size - newSize - newChunk.size};
|
||||
|
||||
if (newSize == 0) {
|
||||
*chunk = newChunk;
|
||||
} else {
|
||||
chunk->size = newSize;
|
||||
chunk = chunks.insert(std::next(chunk), newChunk);
|
||||
}
|
||||
|
||||
if (extension)
|
||||
chunks.insert(std::next(chunk), ChunkDescriptor(newChunk.virtualAddress + newChunk.size, extension, (oldChunk.state == ChunkState::Mapped) ? (oldChunk.cpuPtr + newSize + newChunk.size) : nullptr, oldChunk.state));
|
||||
|
||||
return newChunk.virtualAddress;
|
||||
} else if (chunk->virtualAddress + chunk->size > newChunk.virtualAddress) {
|
||||
chunk->size = newChunk.virtualAddress - chunk->virtualAddress;
|
||||
|
||||
// Deletes all chunks that are within the chunk being inserted and split the final one
|
||||
auto tailChunk{std::next(chunk)};
|
||||
while (tailChunk != chunkEnd) {
|
||||
if (tailChunk->virtualAddress + tailChunk->size >= newChunk.virtualAddress + newChunk.size)
|
||||
break;
|
||||
|
||||
tailChunk = chunks.erase(tailChunk);
|
||||
chunkEnd = chunks.end();
|
||||
}
|
||||
|
||||
// The given chunk is too large to fit into existing chunks
|
||||
if (tailChunk == chunkEnd)
|
||||
break;
|
||||
|
||||
u64 chunkSliceOffset{newChunk.virtualAddress + newChunk.size - tailChunk->virtualAddress};
|
||||
tailChunk->virtualAddress += chunkSliceOffset;
|
||||
tailChunk->size -= chunkSliceOffset;
|
||||
if (tailChunk->state == ChunkState::Mapped)
|
||||
tailChunk->cpuPtr += chunkSliceOffset;
|
||||
|
||||
// If the size of the head chunk is zero then we can directly replace it with our new one rather than inserting it
|
||||
auto headChunk{std::prev(tailChunk)};
|
||||
if (headChunk->size == 0)
|
||||
*headChunk = newChunk;
|
||||
else
|
||||
chunks.insert(std::next(headChunk), newChunk);
|
||||
|
||||
return newChunk.virtualAddress;
|
||||
}
|
||||
}
|
||||
|
||||
throw exception("Failed to insert chunk into GPU address space!");
|
||||
}
|
||||
|
||||
u64 GraphicsMemoryManager::ReserveSpace(u64 size, u64 alignment) {
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
auto newChunk{FindChunk(ChunkState::Unmapped, size, alignment)};
|
||||
if (!newChunk) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
auto chunk{*newChunk};
|
||||
chunk.size = size;
|
||||
chunk.state = ChunkState::Reserved;
|
||||
|
||||
return InsertChunk(chunk);
|
||||
}
|
||||
|
||||
u64 GraphicsMemoryManager::ReserveFixed(u64 virtualAddress, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
return InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Reserved));
|
||||
}
|
||||
|
||||
u64 GraphicsMemoryManager::MapAllocate(u8 *cpuPtr, u64 size) {
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
auto mappedChunk{FindChunk(ChunkState::Unmapped, size)};
|
||||
if (!mappedChunk) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
auto chunk{*mappedChunk};
|
||||
chunk.cpuPtr = cpuPtr;
|
||||
chunk.size = size;
|
||||
chunk.state = ChunkState::Mapped;
|
||||
|
||||
return InsertChunk(chunk);
|
||||
}
|
||||
|
||||
u64 GraphicsMemoryManager::MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return 0;
|
||||
|
||||
size = util::AlignUp(size, GpuPageSize);
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
return InsertChunk(ChunkDescriptor(virtualAddress, size, cpuPtr, ChunkState::Mapped));
|
||||
}
|
||||
|
||||
bool GraphicsMemoryManager::Unmap(u64 virtualAddress, u64 size) {
|
||||
if (!util::IsAligned(virtualAddress, GpuPageSize)) [[unlikely]]
|
||||
return false;
|
||||
|
||||
try {
|
||||
std::unique_lock lock(mutex);
|
||||
InsertChunk(ChunkDescriptor(virtualAddress, size, nullptr, ChunkState::Unmapped));
|
||||
} catch (const std::exception &e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GraphicsMemoryManager::Read(u8 *destination, u64 virtualAddress, u64 size) {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtualAddress;
|
||||
})};
|
||||
|
||||
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
chunk--;
|
||||
|
||||
u64 initialSize{size};
|
||||
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
|
||||
u8 *source{chunk->cpuPtr + chunkOffset};
|
||||
u64 sourceSize{std::min(chunk->size - chunkOffset, size)};
|
||||
|
||||
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
|
||||
while (size) {
|
||||
std::memcpy(destination + (initialSize - size), source, sourceSize);
|
||||
|
||||
size -= sourceSize;
|
||||
if (size) {
|
||||
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to read region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
source = chunk->cpuPtr;
|
||||
sourceSize = std::min(chunk->size, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GraphicsMemoryManager::Write(u8 *source, u64 virtualAddress, u64 size) {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto chunk{std::upper_bound(chunks.begin(), chunks.end(), virtualAddress, [](const u64 address, const ChunkDescriptor &chunk) -> bool {
|
||||
return address < chunk.virtualAddress;
|
||||
})};
|
||||
|
||||
if (chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
chunk--;
|
||||
|
||||
u64 initialSize{size};
|
||||
u64 chunkOffset{virtualAddress - chunk->virtualAddress};
|
||||
u8 *destination{chunk->cpuPtr + chunkOffset};
|
||||
u64 destinationSize{std::min(chunk->size - chunkOffset, size)};
|
||||
|
||||
// A continuous region in the GPU address space may be made up of several discontinuous regions in physical memory so we have to iterate over all chunks
|
||||
while (size) {
|
||||
std::memcpy(destination, source + (initialSize - size), destinationSize);
|
||||
|
||||
size -= destinationSize;
|
||||
if (size) {
|
||||
if (++chunk == chunks.end() || chunk->state != ChunkState::Mapped)
|
||||
throw exception("Failed to write region in GPU address space: Address: 0x{:X}, Size: 0x{:X}", virtualAddress, size);
|
||||
|
||||
destination = chunk->cpuPtr;
|
||||
destinationSize = std::min(chunk->size, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common.h>
|
||||
|
||||
namespace skyline::soc::gmmu {
|
||||
enum class ChunkState {
|
||||
Unmapped, //!< The chunk is unmapped
|
||||
Reserved, //!< The chunk is reserved
|
||||
Mapped //!< The chunk is mapped and a CPU side address is present
|
||||
};
|
||||
|
||||
struct ChunkDescriptor {
|
||||
u64 virtualAddress; //!< The address of the chunk in the virtual address space
|
||||
u64 size; //!< The size of the chunk in bytes
|
||||
u8 *cpuPtr; //!< A pointer to the chunk in the application's address space (if mapped)
|
||||
ChunkState state;
|
||||
|
||||
ChunkDescriptor(u64 virtualAddress, u64 size, u8 *cpuPtr, ChunkState state) : virtualAddress(virtualAddress), size(size), cpuPtr(cpuPtr), state(state) {}
|
||||
|
||||
/**
|
||||
* @return If the given chunk can be contained wholly within this chunk
|
||||
*/
|
||||
inline bool CanContain(const ChunkDescriptor &chunk) {
|
||||
return (chunk.virtualAddress >= virtualAddress) && ((size + virtualAddress) >= (chunk.size + chunk.virtualAddress));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The GraphicsMemoryManager class handles mapping between a Maxwell GPU virtual address space and an application's address space and is meant to roughly emulate the GMMU on the X1
|
||||
* @note This is not accurate to the X1 as it would have an SMMU between the GMMU and physical memory but we don't emulate this abstraction at the moment
|
||||
*/
|
||||
class GraphicsMemoryManager {
|
||||
private:
|
||||
const DeviceState &state;
|
||||
std::vector<ChunkDescriptor> chunks;
|
||||
std::shared_mutex mutex;
|
||||
|
||||
/**
|
||||
* @brief Finds a chunk in the virtual address space that is larger than meets the given requirements
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param desiredState The state of the chunk to find
|
||||
* @param size The minimum size of the chunk to find
|
||||
* @param alignment The minimum alignment of the chunk to find
|
||||
* @return The first applicable chunk
|
||||
*/
|
||||
std::optional<ChunkDescriptor> FindChunk(ChunkState desiredState, u64 size, u64 alignment = 0);
|
||||
|
||||
/**
|
||||
* @brief Inserts a chunk into the chunk list, resizing and splitting as necessary
|
||||
* @note vmmMutex MUST be locked when calling this
|
||||
* @param newChunk The chunk to insert
|
||||
* @return The base virtual address of the inserted chunk
|
||||
*/
|
||||
u64 InsertChunk(const ChunkDescriptor &newChunk);
|
||||
|
||||
public:
|
||||
GraphicsMemoryManager(const DeviceState &state);
|
||||
|
||||
/**
|
||||
* @brief Reserves a region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param size The size of the region to reserve
|
||||
* @param alignment The alignment of the region to reserve
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveSpace(u64 size, u64 alignment);
|
||||
|
||||
/**
|
||||
* @brief Reserves a fixed region of the virtual address space so it will not be chosen automatically when mapping
|
||||
* @param virtualAddress The virtual base address of the region to allocate
|
||||
* @param size The size of the region to allocate
|
||||
* @return The base virtual address of the reserved region
|
||||
*/
|
||||
u64 ReserveFixed(u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region into an automatically chosen region of the virtual address space
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapAllocate(u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Maps a CPU memory region to a fixed region in the virtual address space
|
||||
* @param virtualAddress The target virtual address of the region
|
||||
* @param cpuPtr A pointer to the region to be mapped into the virtual address space
|
||||
* @param size The size of the region to map
|
||||
* @return The base virtual address of the mapped region
|
||||
*/
|
||||
u64 MapFixed(u64 virtualAddress, u8 *cpuPtr, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Unmaps all chunks in the given region from the virtual address space
|
||||
* @return Whether the operation succeeded
|
||||
*/
|
||||
bool Unmap(u64 virtualAddress, u64 size);
|
||||
|
||||
void Read(u8 *destination, u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Reads in a span from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Read(span <T> destination, u64 virtualAddress) {
|
||||
Read(reinterpret_cast<u8 *>(destination.data()), virtualAddress, destination.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
* @tparam T The type of object to return
|
||||
*/
|
||||
template<typename T>
|
||||
T Read(u64 virtualAddress) {
|
||||
T obj;
|
||||
Read(reinterpret_cast<u8 *>(&obj), virtualAddress, sizeof(T));
|
||||
return obj;
|
||||
}
|
||||
|
||||
void Write(u8 *source, u64 virtualAddress, u64 size);
|
||||
|
||||
/**
|
||||
* @brief Writes out a span to a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(span <T> source, u64 virtualAddress) {
|
||||
Write(reinterpret_cast<u8 *>(source.data()), virtualAddress, source.size_bytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads in an object from a region of the virtual address space
|
||||
*/
|
||||
template<typename T>
|
||||
void Write(T source, u64 virtualAddress) {
|
||||
Write(reinterpret_cast<u8 *>(&source), virtualAddress, sizeof(T));
|
||||
}
|
||||
};
|
||||
}
|
Loading…
Reference in New Issue
Block a user