Implement the Host1x command FIFO together with barebones Host1x classes

The Host1x block of the TX1 supports 14 separate channels to which commands can be issued, these all run asynchronously so are emulated the same way as GPU channels with one FIFO emulation thread each. The command FIFO itself is very similar to the GPFIFO found in the GPU however there are some differences, mainly the introduction of classes (similar to engines) and the Mask opcode (which allows writing to a specific set of offsets much more efficiently).

There is an internal Host1x class which functions similar to the GPFIFO class in the GPU, handling general operations such as syncpoint waits, this is accessed via the simple method interface. Other channels such as NVDEC and VIC are behind the 'Tegra Host Interface' (THI) in HW, this abstracts out the classes internal details and provides a uniform method interface ontop of the Host1x method one. We emulate the THI as a templated wrapper for the underlying class.

Syncpoint increments in Host1x are different to GPU, the THI allows submitting increment requests that will be queued up and only be applied after a specific condition in the associated engine is met; however the option to for immediate increments is also available.
This commit is contained in:
Billy Laws 2021-10-30 18:57:53 +01:00 committed by PixelyIon
parent 2494cafee8
commit baefb0fe93
14 changed files with 482 additions and 2 deletions

View File

@ -137,6 +137,10 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp
${source_DIR}/skyline/soc/smmu.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/host1x/command_fifo.cpp
${source_DIR}/skyline/soc/host1x/classes/host1x.cpp
${source_DIR}/skyline/soc/host1x/classes/vic.cpp
${source_DIR}/skyline/soc/host1x/classes/nvdec.cpp
${source_DIR}/skyline/soc/gm20b/channel.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp
${source_DIR}/skyline/soc/gm20b/gmmu.cpp

View File

@ -17,6 +17,6 @@ namespace skyline::soc {
SMMU smmu;
host1x::Host1X host1x;
SOC(const DeviceState &state) {}
SOC(const DeviceState &state) : host1x(state) {}
};
}

View File

@ -4,14 +4,20 @@
#pragma once
#include "host1x/syncpoint.h"
#include "host1x/command_fifo.h"
namespace skyline::soc::host1x {
constexpr static size_t ChannelCount{14}; //!< The number of channels within host1x
/**
* @brief An abstraction for the graphics host, this handles DMA on behalf of the CPU when communicating to it's clients alongside handling syncpts
* @note This is different from the GM20B Host, it serves a similar function and has an interface for accessing host1x syncpts
*/
class Host1X {
public:
std::array<Syncpoint, SyncpointCount> syncpoints{};
SyncpointSet syncpoints;
std::array<ChannelCommandFifo, ChannelCount> channels;
Host1X(const DeviceState &state) : channels{util::MakeFilledArray<ChannelCommandFifo, ChannelCount>(state, syncpoints)} {}
};
}

View File

@ -0,0 +1,42 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::host1x {
/**
* @note See '14.4.10 Class IDs' in the TRM
*/
enum class ClassId : u16 {
Host1x = 0x1,
VIC = 0x5D,
NvJpg = 0xC0,
NvDec = 0xF0
};
constexpr static u32 IncrementSyncpointMethodId{0}; //!< See below
/**
* @note This method is common between all classes
* @note This is derived from '14.10.1 NV_CLASS_HOST_INCR_SYNCPT_0' in the TRM
*/
union IncrementSyncpointMethod {
enum class Condition : u8 {
Immediate = 0,
OpDone = 1,
RdDone = 2,
RegWRSafe = 3
};
u32 raw;
struct {
u8 index;
Condition condition;
u16 _pad_;
};
};
static_assert(sizeof(IncrementSyncpointMethod) == sizeof(u32));
}

View File

@ -0,0 +1,39 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common.h>
#include "class.h"
#include "host1x.h"
namespace skyline::soc::host1x {
Host1XClass::Host1XClass(const DeviceState &state, SyncpointSet &syncpoints) : state(state), syncpoints(syncpoints) {}
void Host1XClass::CallMethod(u32 method, u32 argument) {
constexpr static u32 LoadSyncpointPayload32MethodId{0x4E}; //!< See '14.3.2.12 32-Bit Sync Point Comparison Methods' in TRM
constexpr static u32 WaitSyncpoint32MethodId{0x50}; //!< As above
switch (method) {
case IncrementSyncpointMethodId: {
IncrementSyncpointMethod incrSyncpoint{.raw = argument};
// incrSyncpoint.condition doesn't matter for Host1x class increments
state.logger->Debug("Increment syncpoint: {}", incrSyncpoint.index);
syncpoints.at(incrSyncpoint.index).Increment();
break;
}
case LoadSyncpointPayload32MethodId:
syncpointPayload = argument;
break;
case WaitSyncpoint32MethodId: {
u32 syncpointId{static_cast<u8>(argument)};
state.logger->Debug("Wait syncpoint: {}, thresh: {}", syncpointId, syncpointPayload);
syncpoints.at(syncpointId).Wait(syncpointPayload, std::chrono::steady_clock::duration::max());
break;
}
default:
state.logger->Error("Unknown host1x class method called: 0x{:X}", method);
break;
}
}
}

View File

@ -0,0 +1,24 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#include <soc/host1x/syncpoint.h>
namespace skyline::soc::host1x {
/**
* @brief A class internal to Host1x, used for performing syncpoint waits and other general operations
*/
class Host1XClass {
private:
const DeviceState &state;
SyncpointSet &syncpoints;
u32 syncpointPayload{}; //!< Holds the current payload for the 32-bit syncpoint comparison methods
public:
Host1XClass(const DeviceState &state, SyncpointSet &syncpoints);
void CallMethod(u32 method, u32 argument);
};
}

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "nvdec.h"
namespace skyline::soc::host1x {
NvDecClass::NvDecClass(const DeviceState &state, std::function<void()> opDoneCallback)
: state(state),
opDoneCallback(std::move(opDoneCallback)) {}
void NvDecClass::CallMethod(u32 method, u32 argument) {
state.logger->Warn("Unknown NVDEC class method called: 0x{:X} argument: 0x{:X}", method, argument);
}
}

View File

@ -0,0 +1,22 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::host1x {
/**
* @brief The NVDEC Host1x class implements hardware accelerated video decoding for the VP9/VP8/H264/VC1 codecs
*/
class NvDecClass {
private:
const DeviceState &state;
std::function<void()> opDoneCallback;
public:
NvDecClass(const DeviceState &state, std::function<void()> opDoneCallback);
void CallMethod(u32 method, u32 argument);
};
}

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include "vic.h"
namespace skyline::soc::host1x {
VicClass::VicClass(const DeviceState &state, std::function<void()> opDoneCallback)
: state(state),
opDoneCallback(std::move(opDoneCallback)) {}
void VicClass::CallMethod(u32 method, u32 argument) {
state.logger->Warn("Unknown VIC class method called: 0x{:X} argument: 0x{:X}", method, argument);
}
}

View File

@ -0,0 +1,22 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
namespace skyline::soc::host1x {
/**
* @brief The VIC Host1x class implements hardware accelerated image operations
*/
class VicClass {
private:
const DeviceState &state;
std::function<void()> opDoneCallback;
public:
VicClass(const DeviceState &state, std::function<void()> opDoneCallback);
void CallMethod(u32 method, u32 argument);
};
}

View File

@ -0,0 +1,147 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <common/signal.h>
#include <loader/loader.h>
#include <kernel/types/KProcess.h>
#include <soc.h>
#include "command_fifo.h"
namespace skyline::soc::host1x {
/**
* @url https://github.com/torvalds/linux/blob/477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6/drivers/gpu/host1x/hw/debug_hw.c#L16
*/
enum class Host1xOpcode : u8 {
SetClass = 0x00,
Incr = 0x01,
NonIncr = 0x02,
Mask = 0x03,
Imm = 0x04,
Restart = 0x05,
Gather = 0x06,
SetStrmId = 0x07,
SetAppId = 0x08,
SetPlyd = 0x09,
IncrW = 0x0a,
NonIncrW = 0x0b,
GatherW = 0x0c,
RestartW = 0x0d,
Extend = 0x0e,
};
union ChannelCommandFifoMethodHeader {
u32 raw{};
u16 immdData : 12;
u16 methodCount;
u16 offsetMask;
struct {
u8 classMethodMask : 6;
ClassId classId : 10;
u16 methodAddress : 12;
Host1xOpcode opcode : 4;
};
};
static_assert(sizeof(ChannelCommandFifoMethodHeader) == sizeof(u32));
ChannelCommandFifo::ChannelCommandFifo(const DeviceState &state, SyncpointSet &syncpoints) : state(state), gatherQueue(GatherQueueSize), host1XClass(state, syncpoints), nvDecClass(state, syncpoints), vicClass(state, syncpoints) {}
void ChannelCommandFifo::Send(ClassId targetClass, u32 method, u32 argument) {
state.logger->Verbose("Calling method in class: 0x{:X}, method: 0x{:X}, argument: 0x{:X}", targetClass, method, argument);
switch (targetClass) {
case ClassId::Host1x:
host1XClass.CallMethod(method, argument);
break;
case ClassId::NvDec:
nvDecClass.CallMethod(method, argument);
break;
case ClassId::VIC:
vicClass.CallMethod(method, argument);
break;
default:
state.logger->Error("Sending method to unimplemented class: 0x{:X}", targetClass);
break;
}
}
void ChannelCommandFifo::Process(span<u32> gather) {
ClassId targetClass{ClassId::Host1x};
for (auto entry{gather.begin()}; entry != gather.end(); entry++) {
ChannelCommandFifoMethodHeader methodHeader{.raw = *entry};
switch (methodHeader.opcode) {
case Host1xOpcode::SetClass:
targetClass = methodHeader.classId;
for (u32 i{}; i < std::numeric_limits<u8>::max(); i++)
if (methodHeader.classMethodMask & (1 << i))
Send(targetClass, methodHeader.methodAddress + i, *++entry);
break;
case Host1xOpcode::Incr:
for (u32 i{}; i < methodHeader.methodCount; i++)
Send(targetClass, methodHeader.methodAddress + i, *++entry);
break;
case Host1xOpcode::NonIncr:
for (u32 i{}; i < methodHeader.methodCount; i++)
Send(targetClass, methodHeader.methodAddress, *++entry);
break;
case Host1xOpcode::Mask:
for (u32 i{}; i < std::numeric_limits<u16>::digits; i++)
if (methodHeader.offsetMask & (1 << i))
Send(targetClass, methodHeader.methodAddress + i, *++entry);
break;
case Host1xOpcode::Imm:
Send(targetClass, methodHeader.methodAddress, methodHeader.immdData);
break;
default:
throw exception("Unimplemented Host1x command FIFO opcode: 0x{:X}", static_cast<u8>(methodHeader.opcode));
}
}
}
void ChannelCommandFifo::Start() {
std::scoped_lock lock(threadStartMutex);
if (!thread.joinable())
thread = std::thread(&ChannelCommandFifo::Run, this);
}
void ChannelCommandFifo::Run() {
pthread_setname_np(pthread_self(), "ChannelCommandFifo");
try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
gatherQueue.Process([this](span<u32> gather) {
state.logger->Debug("Processing pushbuffer: 0x{:X}, size: 0x{:X}", gather.data(), gather.size());
Process(gather);
});
} catch (const signal::SignalException &e) {
if (e.signal != SIGINT) {
state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
signal::BlockSignal({SIGINT});
state.process->Kill(false);
}
} catch (const std::exception &e) {
state.logger->Error(e.what());
signal::BlockSignal({SIGINT});
state.process->Kill(false);
}
}
void ChannelCommandFifo::Push(span<u32> gather) {
gatherQueue.Push(gather);
}
ChannelCommandFifo::~ChannelCommandFifo() {
if (thread.joinable()) {
pthread_kill(thread.native_handle(), SIGINT);
thread.join();
}
}
}

View File

@ -0,0 +1,63 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <common.h>
#include <common/circular_queue.h>
#include "syncpoint.h"
#include "classes/class.h"
#include "classes/host1x.h"
#include "classes/nvdec.h"
#include "classes/vic.h"
#include "tegra_host_interface.h"
namespace skyline::soc::host1x {
/**
* @brief Represents the command FIFO block of the Host1x controller, with one per each channel allowing them to run asynchronously
* @note A "gather" is equivalent to a GpEntry except we don't need to emulate them directly as they will always be contiguous across CPU memory, hence a regular span is sufficient
*/
class ChannelCommandFifo {
private:
const DeviceState &state;
static constexpr size_t GatherQueueSize{0x1000}; //!< Maximum size of the gather queue, this value is arbritary
CircularQueue<span<u32>> gatherQueue;
std::thread thread; //!< The thread that manages processing of pushbuffers within gathers
std::mutex threadStartMutex; //!< Protects the thread from being started multiple times
Host1XClass host1XClass; //!< The internal Host1x class, used for performing syncpoint waits and other general operations
TegraHostInterface<NvDecClass> nvDecClass; //!< The THI wrapped NVDEC class for video decoding
TegraHostInterface<VicClass> vicClass; //!< The THI wrapped VIC class for acceleration of image operations
/**
* @brief Sends a method call to the target class
*/
void Send(ClassId targetClass, u32 method, u32 argument);
/**
* @brief Processes the pushbuffer contained within the given gather, calling methods as needed
*/
void Process(span<u32> gather);
/**
* @brief Executes all pending gathers in the FIFO and polls for more
*/
void Run();
public:
ChannelCommandFifo(const DeviceState &state, SyncpointSet &syncpoints);
~ChannelCommandFifo();
/**
* @brief Starts the pushbuffer processing thread if it hasn't already been started
*/
void Start();
/**
* @brief Pushes a single gather into the fifo to be processed asynchronously
*/
void Push(span<u32> gather);
};
}

View File

@ -61,4 +61,6 @@ namespace skyline::soc::host1x {
*/
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
};
using SyncpointSet = std::array<Syncpoint, SyncpointCount>;
}

View File

@ -0,0 +1,81 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <queue>
#include <common.h>
#include "syncpoint.h"
#include "classes/class.h"
namespace skyline::soc::host1x {
/**
* @brief The 'Tegra Host Interface' or THI sits inbetween the Host1x and the class falcons, implementing syncpoint queueing and a method interface
*/
template<typename ClassType>
class TegraHostInterface {
private:
const DeviceState &state;
SyncpointSet &syncpoints;
ClassType deviceClass; //!< The device class behind the THI, such as NVDEC or VIC
u32 storedMethod{}; //!< Method that will be used for deviceClass.CallMethod, set using Method0
std::queue<u32> incrQueue; //!< Queue of syncpoint IDs to be incremented when a device operation is finished, the same syncpoint may be held multiple times within the queue
std::mutex incrMutex;
void SubmitPendingIncrs() {
std::scoped_lock lock(incrMutex);
while (!incrQueue.empty()) {
u32 syncpointId{incrQueue.front()};
incrQueue.pop();
state.logger->Debug("Increment syncpoint: {}", syncpointId);
syncpoints.at(syncpointId).Increment();
}
}
public:
TegraHostInterface(const DeviceState &state, SyncpointSet &syncpoints)
: state(state),
deviceClass(state, [&] { SubmitPendingIncrs(); }),
syncpoints(syncpoints) {}
void CallMethod(u32 method, u32 argument) {
constexpr u32 Method0MethodId{0x10}; //!< Sets the method to be called on the device class upon a call to Method1, see TRM '15.5.6 NV_PVIC_THI_METHOD0'
constexpr u32 Method1MethodId{0x11}; //!< Calls the method set by Method1 with the supplied argument, see TRM '15.5.7 NV_PVIC_THI_METHOD1"
switch (method) {
case IncrementSyncpointMethodId: {
IncrementSyncpointMethod incrSyncpoint{.raw = argument};
switch (incrSyncpoint.condition) {
case IncrementSyncpointMethod::Condition::Immediate:
state.logger->Debug("Increment syncpoint: {}", incrSyncpoint.index);
syncpoints.at(incrSyncpoint.index).Increment();
break;
case IncrementSyncpointMethod::Condition::OpDone:
state.logger->Debug("Queue syncpoint for OpDone: {}", incrSyncpoint.index);
incrQueue.push(incrSyncpoint.index);
SubmitPendingIncrs(); // FIXME: immediately submit the incrs as classes are not yet implemented
default:
state.logger->Warn("Unimplemented syncpoint condition: {}", static_cast<u8>(incrSyncpoint.condition));
break;
}
break;
}
case Method0MethodId:
storedMethod = argument;
break;
case Method1MethodId:
deviceClass.CallMethod(storedMethod, argument);
break;
default:
state.logger->Error("Unknown THI method called: 0x{:X}, argument: 0x{:X}", method, argument);
break;
}
}
};
}