Merge branch 'cemu-project:main' into metal

This commit is contained in:
SamoZ256 2024-07-29 15:13:39 +02:00 committed by GitHub
commit 89a2c23dd7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 835 additions and 676 deletions

View File

@ -239,7 +239,17 @@ jobs:
- name: "Install system dependencies" - name: "Install system dependencies"
run: | run: |
brew update brew update
brew install llvm@15 ninja nasm molten-vk automake libtool brew install llvm@15 ninja nasm automake libtool
brew install cmake python3 ninja
- name: "Build and install molten-vk"
run: |
git clone https://github.com/KhronosGroup/MoltenVK.git
cd MoltenVK
git checkout bf097edc74ec3b6dfafdcd5a38d3ce14b11952d6
./fetchDependencies --macos
make macos
make install
- name: "Setup cmake" - name: "Setup cmake"
uses: jwlawson/actions-setup-cmake@v2 uses: jwlawson/actions-setup-cmake@v2

View File

@ -377,7 +377,9 @@ add_library(CemuCafe
OS/libs/gx2/GX2_Texture.h OS/libs/gx2/GX2_Texture.h
OS/libs/gx2/GX2_TilingAperture.cpp OS/libs/gx2/GX2_TilingAperture.cpp
OS/libs/h264_avc/H264Dec.cpp OS/libs/h264_avc/H264Dec.cpp
OS/libs/h264_avc/H264DecBackendAVC.cpp
OS/libs/h264_avc/h264dec.h OS/libs/h264_avc/h264dec.h
OS/libs/h264_avc/H264DecInternal.h
OS/libs/h264_avc/parser OS/libs/h264_avc/parser
OS/libs/h264_avc/parser/H264Parser.cpp OS/libs/h264_avc/parser/H264Parser.cpp
OS/libs/h264_avc/parser/H264Parser.h OS/libs/h264_avc/parser/H264Parser.h

View File

@ -501,8 +501,6 @@ void debugger_createPPCStateSnapshot(PPCInterpreter_t* hCPU)
debuggerState.debugSession.ppcSnapshot.cr[i] = hCPU->cr[i]; debuggerState.debugSession.ppcSnapshot.cr[i] = hCPU->cr[i];
} }
void DebugLogStackTrace(OSThread_t* thread, MPTR sp);
void debugger_enterTW(PPCInterpreter_t* hCPU) void debugger_enterTW(PPCInterpreter_t* hCPU)
{ {
// handle logging points // handle logging points

View File

@ -212,11 +212,12 @@ static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode)
{ {
// untested (Don't Starve Giant Edition uses this) // Seen in Don't Starve Giant Edition and Teslagrad
// also used by DS Virtual Console (Super Mario 64 DS) // also used by DS Virtual Console (Super Mario 64 DS)
PPC_OPC_TEMPL3_XO(); PPC_OPC_TEMPL3_XO();
hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; uint32 result = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1;
PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], hCPU->gpr[rD])); PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], result));
hCPU->gpr[rD] = result;
if (opHasRC()) if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]); ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU); PPCInterpreter_nextInstruction(hCPU);

View File

@ -1,6 +1,6 @@
#include "Cafe/OS/common/OSCommon.h" #include "Cafe/OS/common/OSCommon.h"
#include "Common/SysAllocator.h" #include "Common/SysAllocator.h"
#include "Cafe/OS/RPL/rpl.h" #include "Cafe/OS/RPL/rpl_symbol_storage.h"
#include "Cafe/OS/libs/coreinit/coreinit_Misc.h" #include "Cafe/OS/libs/coreinit/coreinit_Misc.h"
@ -69,7 +69,7 @@ sint32 ScoreStackTrace(OSThread_t* thread, MPTR sp)
return score; return score;
} }
void DebugLogStackTrace(OSThread_t* thread, MPTR sp) void DebugLogStackTrace(OSThread_t* thread, MPTR sp, bool printSymbols)
{ {
// sp might not point to a valid stackframe // sp might not point to a valid stackframe
// scan stack and evaluate which sp is most likely the beginning of the stackframe // scan stack and evaluate which sp is most likely the beginning of the stackframe
@ -107,7 +107,15 @@ void DebugLogStackTrace(OSThread_t* thread, MPTR sp)
uint32 returnAddress = 0; uint32 returnAddress = 0;
returnAddress = memory_readU32(nextStackPtr + 4); returnAddress = memory_readU32(nextStackPtr + 4);
cemuLog_log(LogType::Force, fmt::format("SP {0:08x} ReturnAddr {1:08x}", nextStackPtr, returnAddress));
RPLStoredSymbol* symbol = nullptr;
if(printSymbols)
symbol = rplSymbolStorage_getByClosestAddress(returnAddress);
if(symbol)
cemuLog_log(LogType::Force, fmt::format("SP {:08x} ReturnAddr {:08x} ({}.{}+0x{:x})", nextStackPtr, returnAddress, (const char*)symbol->libName, (const char*)symbol->symbolName, returnAddress - symbol->address));
else
cemuLog_log(LogType::Force, fmt::format("SP {:08x} ReturnAddr {:08x}", nextStackPtr, returnAddress));
currentStackPtr = nextStackPtr; currentStackPtr = nextStackPtr;
} }

View File

@ -2,8 +2,6 @@
#include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/HW/Espresso/PPCCallback.h"
#include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h" #include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h"
void DebugLogStackTrace(OSThread_t* thread, MPTR sp);
#define EXP_HEAP_GET_FROM_FREE_BLOCKCHAIN(__blockchain__) (MEMExpHeapHead2*)((uintptr_t)__blockchain__ - offsetof(MEMExpHeapHead2, expHeapHead) - offsetof(MEMExpHeapHead40_t, chainFreeBlocks)) #define EXP_HEAP_GET_FROM_FREE_BLOCKCHAIN(__blockchain__) (MEMExpHeapHead2*)((uintptr_t)__blockchain__ - offsetof(MEMExpHeapHead2, expHeapHead) - offsetof(MEMExpHeapHead40_t, chainFreeBlocks))
namespace coreinit namespace coreinit

View File

@ -14,13 +14,10 @@ namespace coreinit
return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment); return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment);
} }
void export_OSAllocFromSystem(PPCInterpreter_t* hCPU) void OSFreeToSystem(void* ptr)
{ {
ppcDefineParamU32(size, 0); _sysHeapFreeCounter++;
ppcDefineParamS32(alignment, 1); coreinit::MEMFreeToExpHeap(_sysHeapHandle, ptr);
MEMPTR<void> mem = OSAllocFromSystem(size, alignment);
cemuLog_logDebug(LogType::Force, "OSAllocFromSystem(0x{:x}, {}) -> 0x{:08x}", size, alignment, mem.GetMPTR());
osLib_returnFromFunction(hCPU, mem.GetMPTR());
} }
void InitSysHeap() void InitSysHeap()
@ -34,7 +31,8 @@ namespace coreinit
void InitializeSysHeap() void InitializeSysHeap()
{ {
osLib_addFunction("coreinit", "OSAllocFromSystem", export_OSAllocFromSystem); cafeExportRegister("h264", OSAllocFromSystem, LogType::CoreinitMem);
cafeExportRegister("h264", OSFreeToSystem, LogType::CoreinitMem);
} }
} }

View File

@ -4,5 +4,8 @@ namespace coreinit
{ {
void InitSysHeap(); void InitSysHeap();
void* OSAllocFromSystem(uint32 size, uint32 alignment);
void OSFreeToSystem(void* ptr);
void InitializeSysHeap(); void InitializeSysHeap();
} }

View File

@ -1,17 +1,12 @@
#include "Cafe/OS/common/OSCommon.h" #include "Cafe/OS/common/OSCommon.h"
#include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/HW/Espresso/PPCCallback.h"
#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h" #include "Cafe/OS/libs/h264_avc/parser/H264Parser.h"
#include "Cafe/OS/libs/h264_avc/H264DecInternal.h"
#include "util/highresolutiontimer/HighResolutionTimer.h" #include "util/highresolutiontimer/HighResolutionTimer.h"
#include "Cafe/CafeSystem.h" #include "Cafe/CafeSystem.h"
#include "h264dec.h" #include "h264dec.h"
extern "C"
{
#include "../dependencies/ih264d/common/ih264_typedefs.h"
#include "../dependencies/ih264d/decoder/ih264d.h"
};
enum class H264DEC_STATUS : uint32 enum class H264DEC_STATUS : uint32
{ {
SUCCESS = 0x0, SUCCESS = 0x0,
@ -33,10 +28,35 @@ namespace H264
return false; return false;
} }
struct H264Context
{
struct
{
MEMPTR<void> ptr{ nullptr };
uint32be length{ 0 };
float64be timestamp;
}BitStream;
struct
{
MEMPTR<void> outputFunc{ nullptr };
uint8be outputPerFrame{ 0 }; // whats the default?
MEMPTR<void> userMemoryParam{ nullptr };
}Param;
// misc
uint32be sessionHandle;
// decoder state
struct
{
uint32 numFramesInFlight{0};
}decoderState;
};
uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut) uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut)
{ {
if (H264_IsBotW()) if (H264_IsBotW())
{ {
static_assert(sizeof(H264Context) < 256);
*sizeRequirementOut = 256; *sizeRequirementOut = 256;
return 0; return 0;
} }
@ -169,591 +189,48 @@ namespace H264
return H264DEC_STATUS::BAD_STREAM; return H264DEC_STATUS::BAD_STREAM;
} }
struct H264Context
{
struct
{
MEMPTR<void> ptr{ nullptr };
uint32be length{ 0 };
float64be timestamp;
}BitStream;
struct
{
MEMPTR<void> outputFunc{ nullptr };
uint8be outputPerFrame{ 0 }; // whats the default?
MEMPTR<void> userMemoryParam{ nullptr };
}Param;
// misc
uint32be sessionHandle;
};
class H264AVCDecoder
{
static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size)
{
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
// alignment is atleast sizeof(void*)
alignment = std::max<WORD32>(alignment, sizeof(void*));
//smallest multiple of 2 at least as large as alignment
alignment--;
alignment |= alignment << 1;
alignment |= alignment >> 1;
alignment |= alignment >> 2;
alignment |= alignment >> 4;
alignment |= alignment >> 8;
alignment |= alignment >> 16;
alignment ^= (alignment >> 1);
void* temp;
posix_memalign(&temp, (size_t)alignment, (size_t)size);
return temp;
#endif
}
static void ivd_aligned_free(void* ctxt, void* buf)
{
#ifdef _WIN32
_aligned_free(buf);
#else
free(buf);
#endif
return;
}
public:
struct DecodeResult
{
bool frameReady{ false };
double timestamp;
void* imageOutput;
ivd_video_decode_op_t decodeOutput;
};
void Init(bool isBufferedMode)
{
ih264d_create_ip_t s_create_ip{ 0 };
ih264d_create_op_t s_create_op{ 0 };
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?)
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op);
cemu_assert(!status);
m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
m_codecCtx->u4_size = sizeof(iv_obj_t);
SetDecoderCoreCount(1);
m_isBufferedMode = isBufferedMode;
UpdateParameters(false);
m_bufferedResults.clear();
m_numDecodedFrames = 0;
m_hasBufferSizeInfo = false;
m_timestampIndex = 0;
}
void Destroy()
{
if (!m_codecCtx)
return;
ih264d_delete_ip_t s_delete_ip{ 0 };
ih264d_delete_op_t s_delete_op{ 0 };
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op);
cemu_assert_debug(!status);
m_codecCtx = nullptr;
}
void SetDecoderCoreCount(uint32 coreCount)
{
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
cemu_assert(status == IV_SUCCESS);
}
static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight)
{
// create temporary decoder
ih264d_create_ip_t s_create_ip{ 0 };
ih264d_create_op_t s_create_op{ 0 };
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0;
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
iv_obj_t* ctx = nullptr;
WORD32 status = ih264d_api_function(ctx, &s_create_ip, &s_create_op);
cemu_assert_debug(!status);
if (status != IV_SUCCESS)
return false;
ctx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
ctx->pv_fxns = (void*)&ih264d_api_function;
ctx->u4_size = sizeof(iv_obj_t);
// set header-only mode
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
ps_ctl_ip->u4_disp_wd = 0;
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
ps_ctl_ip->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
ps_ctl_ip->e_vid_dec_mode = IVD_DECODE_HEADER;
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
status = ih264d_api_function(ctx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
cemu_assert(!status);
// decode stream
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.pv_stream_buffer = stream;
s_dec_ip.u4_num_Bytes = length;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
s_dec_op.u4_raw_wd = 0;
s_dec_op.u4_raw_ht = 0;
status = ih264d_api_function(ctx, &s_dec_ip, &s_dec_op);
//cemu_assert(status == 0); -> This errors when not both the headers are present, but it will still set the parameters we need
bool isValid = false;
if (true)//status == 0)
{
imageWidth = s_dec_op.u4_raw_wd;
imageHeight = s_dec_op.u4_raw_ht;
cemu_assert_debug(imageWidth != 0 && imageHeight != 0);
isValid = true;
}
// destroy decoder
ih264d_delete_ip_t s_delete_ip{ 0 };
ih264d_delete_op_t s_delete_op{ 0 };
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
status = ih264d_api_function(ctx, &s_delete_ip, &s_delete_op);
cemu_assert_debug(!status);
return isValid;
}
void Decode(void* data, uint32 length, double timestamp, void* imageOutput, DecodeResult& decodeResult)
{
if (!m_hasBufferSizeInfo)
{
uint32 numByteConsumed = 0;
if (!DetermineBufferSizes(data, length, numByteConsumed))
{
cemuLog_log(LogType::Force, "H264: Unable to determine picture size. Ignoring decode input");
decodeResult.frameReady = false;
return;
}
length -= numByteConsumed;
data = (uint8*)data + numByteConsumed;
m_hasBufferSizeInfo = true;
}
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
// remember timestamp and associated output buffer
m_timestamps[m_timestampIndex] = timestamp;
m_imageBuffers[m_timestampIndex] = imageOutput;
s_dec_ip.u4_ts = m_timestampIndex;
m_timestampIndex = (m_timestampIndex + 1) % 64;
s_dec_ip.pv_stream_buffer = (uint8*)data;
s_dec_ip.u4_num_Bytes = length;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
BenchmarkTimer bt;
bt.Start();
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED)
{
// resolution change
ResetDecoder();
m_hasBufferSizeInfo = false;
Decode(data, length, timestamp, imageOutput, decodeResult);
return;
}
else if (status != 0)
{
cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status);
decodeResult.frameReady = false;
return;
}
bt.Stop();
double decodeTime = bt.GetElapsedMilliseconds();
cemu_assert(s_dec_op.u4_frame_decoded_flag);
cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == length);
cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?)
if (s_dec_op.u4_output_present)
{
cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV);
if (H264_IsBotW())
{
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
}
DecodeResult tmpResult;
tmpResult.frameReady = s_dec_op.u4_output_present != 0;
tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts];
tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts];
tmpResult.decodeOutput = s_dec_op;
AddBufferedResult(tmpResult);
// transfer image to PPC output buffer and also correct stride
bt.Start();
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op);
bt.Stop();
double copyTime = bt.GetElapsedMilliseconds();
// release buffer
sint32 bufferId = -1;
for (size_t i = 0; i < m_displayBuf.size(); i++)
{
if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size()))
{
bufferId = (sint32)i;
break;
}
}
cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id);
cemu_assert(bufferId >= 0);
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
s_video_rel_disp_ip.u4_disp_buf_id = bufferId;
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
cemu_assert(!status);
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime);
}
else
{
cemuLog_log(LogType::H264, "H264Bench | DecodeTime{}ms", decodeTime);
}
if (s_dec_op.u4_frame_decoded_flag)
m_numDecodedFrames++;
if (m_isBufferedMode)
{
// in buffered mode, always buffer 5 frames regardless of actual reordering and decoder latency
if (m_numDecodedFrames > 5)
GetCurrentBufferedResult(decodeResult);
}
else if(m_numDecodedFrames > 0)
GetCurrentBufferedResult(decodeResult);
// get VUI
//ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip;
//ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op;
//s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL;
//s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS;
//s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t);
//s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t);
//status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op);
//cemu_assert(status == 0);
}
std::vector<DecodeResult> Flush()
{
std::vector<DecodeResult> results;
// set flush mode
ivd_ctl_flush_ip_t s_video_flush_ip{ 0 };
ivd_ctl_flush_op_t s_video_flush_op{ 0 };
s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op);
if (status != 0)
cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status);
// get all frames from the codec
while (true)
{
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.pv_stream_buffer = NULL;
s_dec_ip.u4_num_Bytes = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0)
break;
cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be zero?
if(s_dec_op.u4_output_present == 0)
continue;
if (H264_IsBotW())
{
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
}
DecodeResult tmpResult;
tmpResult.frameReady = s_dec_op.u4_output_present != 0;
tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts];
tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts];
tmpResult.decodeOutput = s_dec_op;
AddBufferedResult(tmpResult);
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op);
}
results = std::move(m_bufferedResults);
return results;
}
void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo)
{
uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd;
uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht;
size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd;
size_t outputStride = (imageWidth + 0xFF) & ~0xFF;
// copy Y
uint8* yOut = bufOut;
for (uint32 row = 0; row < imageHeight; row++)
{
memcpy(yOut, yIn, imageWidth);
yIn += inputStride;
yOut += outputStride;
}
// copy UV
uint8* uvOut = bufOut + outputStride * imageHeight;
for (uint32 row = 0; row < imageHeight/2; row++)
{
memcpy(uvOut, uvIn, imageWidth);
uvIn += inputStride;
uvOut += outputStride;
}
}
private:
bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed)
{
numByteConsumed = 0;
UpdateParameters(true);
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.pv_stream_buffer = (uint8*)data;
s_dec_ip.u4_num_Bytes = length;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0)
{
cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream");
return false;
}
numByteConsumed = s_dec_op.u4_num_bytes_consumed;
cemu_assert(status == 0);
if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0)
return false;
UpdateParameters(false);
ReinitBuffers();
return true;
}
void ReinitBuffers()
{
ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 };
ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 };
WORD32 outlen = 0;
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op);
cemu_assert(!status);
// allocate
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]);
}
// set
ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs
ivd_set_display_frame_op_t s_set_display_frame_op{ 0 };
s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2);
cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0);
s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs;
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2;
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0];
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1];
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0;
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0];
}
status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op);
cemu_assert(!status);
// mark all as released (available)
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
s_video_rel_disp_ip.u4_disp_buf_id = i;
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
cemu_assert(!status);
}
}
void ResetDecoder()
{
ivd_ctl_reset_ip_t s_ctl_ip;
ivd_ctl_reset_op_t s_ctl_op;
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op);
cemu_assert_debug(status == 0);
}
void UpdateParameters(bool headerDecodeOnly)
{
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
ps_ctl_ip->u4_disp_wd = 0;
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT;
ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME;
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
cemu_assert(status == 0);
}
/* In non-flush mode we have a delay of (at least?) 5 frames */
void AddBufferedResult(DecodeResult& decodeResult)
{
if (decodeResult.frameReady)
m_bufferedResults.emplace_back(decodeResult);
}
void GetCurrentBufferedResult(DecodeResult& decodeResult)
{
cemu_assert(!m_bufferedResults.empty());
if (m_bufferedResults.empty())
{
decodeResult.frameReady = false;
return;
}
decodeResult = m_bufferedResults.front();
m_bufferedResults.erase(m_bufferedResults.begin());
}
private:
iv_obj_t* m_codecCtx{nullptr};
bool m_hasBufferSizeInfo{ false };
bool m_isBufferedMode{ false };
double m_timestamps[64];
void* m_imageBuffers[64];
uint32 m_timestampIndex{0};
std::vector<DecodeResult> m_bufferedResults;
uint32 m_numDecodedFrames{0};
std::vector<std::vector<uint8>> m_displayBuf;
};
H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight) H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight)
{ {
cemu_assert(offset <= length); if(!stream || length < 4 || !outputWidth || !outputHeight)
return H264DEC_STATUS::INVALID_PARAM;
uint32 imageWidth, imageHeight; if( (offset+4) > length )
return H264DEC_STATUS::INVALID_PARAM;
if (H264AVCDecoder::GetImageInfo(stream, length, imageWidth, imageHeight)) uint8* cur = stream + offset;
uint8* end = stream + length;
cur += 2; // we access cur[-2] and cur[-1] so we need to start at offset 2
while(cur < end-2)
{ {
if (H264_IsBotW()) // check for start code
if(*cur != 1)
{ {
if (imageWidth == 1920 && imageHeight == 1088) cur++;
imageHeight = 1080; continue;
} }
*outputWidth = imageWidth; // check if this is a valid NAL header
*outputHeight = imageHeight; if(cur[-2] != 0 || cur[-1] != 0 || cur[0] != 1)
}
else
{ {
*outputWidth = 0; cur++;
*outputHeight = 0; continue;
}
uint8 nalHeader = cur[1];
if((nalHeader & 0x1F) != 7)
{
cur++;
continue;
}
h264State_seq_parameter_set_t psp;
bool r = h264Parser_ParseSPS(cur+2, end-cur-2, psp);
if(!r)
{
cemu_assert_suspicious(); // should not happen
return H264DEC_STATUS::BAD_STREAM; return H264DEC_STATUS::BAD_STREAM;
} }
*outputWidth = (psp.pic_width_in_mbs_minus1 + 1) * 16;
*outputHeight = (psp.pic_height_in_map_units_minus1 + 1) * 16; // affected by frame_mbs_only_flag?
return H264DEC_STATUS::SUCCESS; return H264DEC_STATUS::SUCCESS;
} }
return H264DEC_STATUS::BAD_STREAM;
}
uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory) uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory)
{ {
@ -762,26 +239,28 @@ namespace H264
return 0; return 0;
} }
std::unordered_map<uint32, H264AVCDecoder*> sDecoderSessions; std::unordered_map<uint32, H264DecoderBackend*> sDecoderSessions;
std::mutex sDecoderSessionsMutex; std::mutex sDecoderSessionsMutex;
std::atomic_uint32_t sCurrentSessionHandle{ 1 }; std::atomic_uint32_t sCurrentSessionHandle{ 1 };
static H264AVCDecoder* _CreateDecoderSession(uint32& handleOut) H264DecoderBackend* CreateAVCDecoder();
static H264DecoderBackend* _CreateDecoderSession(uint32& handleOut)
{ {
std::unique_lock _lock(sDecoderSessionsMutex); std::unique_lock _lock(sDecoderSessionsMutex);
handleOut = sCurrentSessionHandle.fetch_add(1); handleOut = sCurrentSessionHandle.fetch_add(1);
H264AVCDecoder* session = new H264AVCDecoder(); H264DecoderBackend* session = CreateAVCDecoder();
sDecoderSessions.try_emplace(handleOut, session); sDecoderSessions.try_emplace(handleOut, session);
return session; return session;
} }
static H264AVCDecoder* _AcquireDecoderSession(uint32 handle) static H264DecoderBackend* _AcquireDecoderSession(uint32 handle)
{ {
std::unique_lock _lock(sDecoderSessionsMutex); std::unique_lock _lock(sDecoderSessionsMutex);
auto it = sDecoderSessions.find(handle); auto it = sDecoderSessions.find(handle);
if (it == sDecoderSessions.end()) if (it == sDecoderSessions.end())
return nullptr; return nullptr;
H264AVCDecoder* session = it->second; H264DecoderBackend* session = it->second;
if (sDecoderSessions.size() >= 5) if (sDecoderSessions.size() >= 5)
{ {
cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions"); cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions");
@ -790,7 +269,7 @@ namespace H264
return session; return session;
} }
static void _ReleaseDecoderSession(H264AVCDecoder* session) static void _ReleaseDecoderSession(H264DecoderBackend* session)
{ {
std::unique_lock _lock(sDecoderSessionsMutex); std::unique_lock _lock(sDecoderSessionsMutex);
@ -802,7 +281,7 @@ namespace H264
auto it = sDecoderSessions.find(handle); auto it = sDecoderSessions.find(handle);
if (it == sDecoderSessions.end()) if (it == sDecoderSessions.end())
return; return;
H264AVCDecoder* session = it->second; H264DecoderBackend* session = it->second;
session->Destroy(); session->Destroy();
delete session; delete session;
sDecoderSessions.erase(it); sDecoderSessions.erase(it);
@ -830,45 +309,44 @@ namespace H264
uint32 H264DECBegin(void* workMemory) uint32 H264DECBegin(void* workMemory)
{ {
H264Context* ctx = (H264Context*)workMemory; H264Context* ctx = (H264Context*)workMemory;
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
if (!session) if (!session)
{ {
cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session"); cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session");
return 0; return 0;
} }
session->Init(ctx->Param.outputPerFrame == 0); session->Init(ctx->Param.outputPerFrame == 0);
ctx->decoderState.numFramesInFlight = 0;
_ReleaseDecoderSession(session); _ReleaseDecoderSession(session);
return 0; return 0;
} }
void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult); void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult);
void _async_H264DECEnd(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, std::vector<H264AVCDecoder::DecodeResult>* decodeResultsOut)
{
*decodeResultsOut = session->Flush();
coreinit::OSSignalEvent(executeDoneEvent);
}
H264DEC_STATUS H264DECEnd(void* workMemory) H264DEC_STATUS H264DECEnd(void* workMemory)
{ {
H264Context* ctx = (H264Context*)workMemory; H264Context* ctx = (H264Context*)workMemory;
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
if (!session) if (!session)
{ {
cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session"); cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session");
return H264DEC_STATUS::SUCCESS; return H264DEC_STATUS::SUCCESS;
} }
StackAllocator<coreinit::OSEvent> executeDoneEvent; coreinit::OSEvent* flushEvt = &session->GetFlushEvent();
coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); coreinit::OSResetEvent(flushEvt);
std::vector<H264AVCDecoder::DecodeResult> results; session->QueueFlush();
auto asyncTask = std::async(std::launch::async, _async_H264DECEnd, executeDoneEvent.GetPointer(), session, ctx, &results); coreinit::OSWaitEvent(flushEvt);
coreinit::OSWaitEvent(&executeDoneEvent); while(true)
_ReleaseDecoderSession(session);
if (!results.empty())
{ {
for (auto& itr : results) H264DecoderBackend::DecodeResult decodeResult;
H264DoFrameOutputCallback(ctx, itr); if( !session->GetFrameOutputIfReady(decodeResult) )
break;
// todo - output all frames in a single callback?
H264DoFrameOutputCallback(ctx, decodeResult);
ctx->decoderState.numFramesInFlight--;
} }
cemu_assert_debug(ctx->decoderState.numFramesInFlight == 0); // no frames should be in flight anymore. Exact behavior is not well understood but we may have to output dummy frames if necessary
_ReleaseDecoderSession(session);
return H264DEC_STATUS::SUCCESS; return H264DEC_STATUS::SUCCESS;
} }
@ -930,7 +408,6 @@ namespace H264
return 0; return 0;
} }
struct H264DECFrameOutput struct H264DECFrameOutput
{ {
/* +0x00 */ uint32be result; /* +0x00 */ uint32be result;
@ -967,7 +444,7 @@ namespace H264
static_assert(sizeof(H264OutputCBStruct) == 12); static_assert(sizeof(H264OutputCBStruct) == 12);
void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult) void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult)
{ {
sint32 outputFrameCount = 1; sint32 outputFrameCount = 1;
@ -984,14 +461,14 @@ namespace H264
frameOutput->imagePtr = (uint8*)decodeResult.imageOutput; frameOutput->imagePtr = (uint8*)decodeResult.imageOutput;
frameOutput->result = 100; frameOutput->result = 100;
frameOutput->timestamp = decodeResult.timestamp; frameOutput->timestamp = decodeResult.timestamp;
frameOutput->frameWidth = decodeResult.decodeOutput.u4_pic_wd; frameOutput->frameWidth = decodeResult.frameWidth;
frameOutput->frameHeight = decodeResult.decodeOutput.u4_pic_ht; frameOutput->frameHeight = decodeResult.frameHeight;
frameOutput->bytesPerRow = (decodeResult.decodeOutput.u4_pic_wd + 0xFF) & ~0xFF; frameOutput->bytesPerRow = decodeResult.bytesPerRow;
frameOutput->cropEnable = decodeResult.decodeOutput.u1_frame_cropping_flag; frameOutput->cropEnable = decodeResult.cropEnable;
frameOutput->cropTop = decodeResult.decodeOutput.u1_frame_cropping_rect_top_ofst; frameOutput->cropTop = decodeResult.cropTop;
frameOutput->cropBottom = decodeResult.decodeOutput.u1_frame_cropping_rect_bottom_ofst; frameOutput->cropBottom = decodeResult.cropBottom;
frameOutput->cropLeft = decodeResult.decodeOutput.u1_frame_cropping_rect_left_ofst; frameOutput->cropLeft = decodeResult.cropLeft;
frameOutput->cropRight = decodeResult.decodeOutput.u1_frame_cropping_rect_right_ofst; frameOutput->cropRight = decodeResult.cropRight;
StackAllocator<H264OutputCBStruct> stack_fptrOutputData; StackAllocator<H264OutputCBStruct> stack_fptrOutputData;
stack_fptrOutputData->frameCount = outputFrameCount; stack_fptrOutputData->frameCount = outputFrameCount;
@ -1006,29 +483,41 @@ namespace H264
} }
} }
void _async_H264DECExecute(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, void* imageOutput, H264AVCDecoder::DecodeResult* decodeResult)
{
session->Decode(ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput, *decodeResult);
coreinit::OSSignalEvent(executeDoneEvent);
}
uint32 H264DECExecute(void* workMemory, void* imageOutput) uint32 H264DECExecute(void* workMemory, void* imageOutput)
{ {
BenchmarkTimer bt;
bt.Start();
H264Context* ctx = (H264Context*)workMemory; H264Context* ctx = (H264Context*)workMemory;
H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle);
if (!session) if (!session)
{ {
cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session"); cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session");
return 0; return 0;
} }
StackAllocator<coreinit::OSEvent> executeDoneEvent; // feed data to backend
coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); session->QueueForDecode((uint8*)ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput);
H264AVCDecoder::DecodeResult decodeResult; ctx->decoderState.numFramesInFlight++;
auto asyncTask = std::async(std::launch::async, _async_H264DECExecute, &executeDoneEvent, session, ctx, imageOutput , &decodeResult); // H264DECExecute is synchronous and will return a frame after either every call (non-buffered) or after 6 calls (buffered)
coreinit::OSWaitEvent(&executeDoneEvent); // normally frame decoding happens only during H264DECExecute, but in order to hide the latency of our CPU decoder we will decode asynchronously in buffered mode
_ReleaseDecoderSession(session); uint32 numFramesToBuffer = (ctx->Param.outputPerFrame == 0) ? 5 : 0;
if(decodeResult.frameReady) if(ctx->decoderState.numFramesInFlight > numFramesToBuffer)
{
ctx->decoderState.numFramesInFlight--;
while(true)
{
coreinit::OSEvent& evt = session->GetFrameOutputEvent();
coreinit::OSWaitEvent(&evt);
H264DecoderBackend::DecodeResult decodeResult;
if( !session->GetFrameOutputIfReady(decodeResult) )
continue;
H264DoFrameOutputCallback(ctx, decodeResult); H264DoFrameOutputCallback(ctx, decodeResult);
break;
}
}
_ReleaseDecoderSession(session);
bt.Stop();
double callTime = bt.GetElapsedMilliseconds();
cemuLog_log(LogType::H264, "H264Bench | H264DECExecute took {}ms", callTime);
return 0x80 | 100; return 0x80 | 100;
} }

View File

@ -0,0 +1,502 @@
#include "H264DecInternal.h"
#include "util/highresolutiontimer/HighResolutionTimer.h"
extern "C"
{
#include "../dependencies/ih264d/common/ih264_typedefs.h"
#include "../dependencies/ih264d/decoder/ih264d.h"
};
namespace H264
{
bool H264_IsBotW();
class H264AVCDecoder : public H264DecoderBackend
{
static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size)
{
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
// alignment is atleast sizeof(void*)
alignment = std::max<WORD32>(alignment, sizeof(void*));
//smallest multiple of 2 at least as large as alignment
alignment--;
alignment |= alignment << 1;
alignment |= alignment >> 1;
alignment |= alignment >> 2;
alignment |= alignment >> 4;
alignment |= alignment >> 8;
alignment |= alignment >> 16;
alignment ^= (alignment >> 1);
void* temp;
posix_memalign(&temp, (size_t)alignment, (size_t)size);
return temp;
#endif
}
static void ivd_aligned_free(void* ctxt, void* buf)
{
#ifdef _WIN32
_aligned_free(buf);
#else
free(buf);
#endif
}
public:
H264AVCDecoder()
{
m_decoderThread = std::thread(&H264AVCDecoder::DecoderThread, this);
}
~H264AVCDecoder()
{
m_threadShouldExit = true;
m_decodeSem.increment();
if (m_decoderThread.joinable())
m_decoderThread.join();
}
void Init(bool isBufferedMode)
{
ih264d_create_ip_t s_create_ip{ 0 };
ih264d_create_op_t s_create_op{ 0 };
s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t);
s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE;
s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?)
s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t);
s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV;
s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc;
s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free;
s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL;
WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op);
cemu_assert(!status);
m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle;
m_codecCtx->pv_fxns = (void*)&ih264d_api_function;
m_codecCtx->u4_size = sizeof(iv_obj_t);
SetDecoderCoreCount(1);
m_isBufferedMode = isBufferedMode;
UpdateParameters(false);
m_numDecodedFrames = 0;
m_hasBufferSizeInfo = false;
}
void Destroy()
{
if (!m_codecCtx)
return;
ih264d_delete_ip_t s_delete_ip{ 0 };
ih264d_delete_op_t s_delete_op{ 0 };
s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t);
s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE;
s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op);
cemu_assert_debug(!status);
m_codecCtx = nullptr;
}
void PushDecodedFrame(ivd_video_decode_op_t& s_dec_op)
{
// copy image data outside of lock since its an expensive operation
CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_decodedSliceArray[s_dec_op.u4_ts].result.imageOutput, s_dec_op);
std::unique_lock _l(m_decodeQueueMtx);
cemu_assert(s_dec_op.u4_ts < m_decodedSliceArray.size());
auto& result = m_decodedSliceArray[s_dec_op.u4_ts];
cemu_assert_debug(result.isUsed);
cemu_assert_debug(s_dec_op.u4_output_present != 0);
result.result.isDecoded = true;
result.result.hasFrame = s_dec_op.u4_output_present != 0;
result.result.frameWidth = s_dec_op.u4_pic_wd;
result.result.frameHeight = s_dec_op.u4_pic_ht;
result.result.bytesPerRow = (s_dec_op.u4_pic_wd + 0xFF) & ~0xFF;
result.result.cropEnable = s_dec_op.u1_frame_cropping_flag;
result.result.cropTop = s_dec_op.u1_frame_cropping_rect_top_ofst;
result.result.cropBottom = s_dec_op.u1_frame_cropping_rect_bottom_ofst;
result.result.cropLeft = s_dec_op.u1_frame_cropping_rect_left_ofst;
result.result.cropRight = s_dec_op.u1_frame_cropping_rect_right_ofst;
m_displayQueue.push_back(s_dec_op.u4_ts);
_l.unlock();
coreinit::OSSignalEvent(m_displayQueueEvt);
}
// called from async worker thread
void Decode(DecodedSlice& decodedSlice)
{
if (!m_hasBufferSizeInfo)
{
uint32 numByteConsumed = 0;
if (!DetermineBufferSizes(decodedSlice.dataToDecode.m_data, decodedSlice.dataToDecode.m_length, numByteConsumed))
{
cemuLog_log(LogType::Force, "H264AVC: Unable to determine picture size. Ignoring decode input");
std::unique_lock _l(m_decodeQueueMtx);
decodedSlice.result.isDecoded = true;
decodedSlice.result.hasFrame = false;
coreinit::OSSignalEvent(m_displayQueueEvt);
return;
}
decodedSlice.dataToDecode.m_length -= numByteConsumed;
decodedSlice.dataToDecode.m_data = (uint8*)decodedSlice.dataToDecode.m_data + numByteConsumed;
m_hasBufferSizeInfo = true;
}
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.u4_ts = std::distance(m_decodedSliceArray.data(), &decodedSlice);
cemu_assert_debug(s_dec_ip.u4_ts < m_decodedSliceArray.size());
s_dec_ip.pv_stream_buffer = (uint8*)decodedSlice.dataToDecode.m_data;
s_dec_ip.u4_num_Bytes = decodedSlice.dataToDecode.m_length;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
BenchmarkTimer bt;
bt.Start();
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED)
{
// resolution change
ResetDecoder();
m_hasBufferSizeInfo = false;
Decode(decodedSlice);
return;
}
else if (status != 0)
{
cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status);
decodedSlice.result.hasFrame = false;
cemu_assert_unimplemented();
return;
}
bt.Stop();
double decodeTime = bt.GetElapsedMilliseconds();
cemu_assert(s_dec_op.u4_frame_decoded_flag);
cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == decodedSlice.dataToDecode.m_length);
cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?)
if (s_dec_op.u4_output_present)
{
cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV);
if (H264_IsBotW())
{
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
}
bt.Start();
PushDecodedFrame(s_dec_op);
bt.Stop();
double copyTime = bt.GetElapsedMilliseconds();
// release buffer
sint32 bufferId = -1;
for (size_t i = 0; i < m_displayBuf.size(); i++)
{
if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size()))
{
bufferId = (sint32)i;
break;
}
}
cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id);
cemu_assert(bufferId >= 0);
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
s_video_rel_disp_ip.u4_disp_buf_id = bufferId;
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
cemu_assert(!status);
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime);
}
else
{
cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms (no frame output)", decodeTime);
}
if (s_dec_op.u4_frame_decoded_flag)
m_numDecodedFrames++;
// get VUI
//ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip;
//ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op;
//s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL;
//s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS;
//s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t);
//s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t);
//status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op);
//cemu_assert(status == 0);
}
void Flush()
{
// set flush mode
ivd_ctl_flush_ip_t s_video_flush_ip{ 0 };
ivd_ctl_flush_op_t s_video_flush_op{ 0 };
s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op);
if (status != 0)
cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status);
// get all frames from the decoder
while (true)
{
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.pv_stream_buffer = NULL;
s_dec_ip.u4_num_Bytes = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0;
s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0)
break;
cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be false?
if(s_dec_op.u4_output_present == 0)
continue;
if (H264_IsBotW())
{
if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088)
s_dec_op.s_disp_frm_buf.u4_y_ht = 1080;
}
PushDecodedFrame(s_dec_op);
}
}
void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo)
{
uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd;
uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht;
size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd;
size_t outputStride = (imageWidth + 0xFF) & ~0xFF;
// copy Y
uint8* yOut = bufOut;
for (uint32 row = 0; row < imageHeight; row++)
{
memcpy(yOut, yIn, imageWidth);
yIn += inputStride;
yOut += outputStride;
}
// copy UV
uint8* uvOut = bufOut + outputStride * imageHeight;
for (uint32 row = 0; row < imageHeight/2; row++)
{
memcpy(uvOut, uvIn, imageWidth);
uvIn += inputStride;
uvOut += outputStride;
}
}
private:
void SetDecoderCoreCount(uint32 coreCount)
{
ih264d_ctl_set_num_cores_ip_t s_set_cores_ip;
ih264d_ctl_set_num_cores_op_t s_set_cores_op;
s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES;
s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4
s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op);
cemu_assert(status == IV_SUCCESS);
}
bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed)
{
numByteConsumed = 0;
UpdateParameters(true);
ivd_video_decode_ip_t s_dec_ip{ 0 };
ivd_video_decode_op_t s_dec_op{ 0 };
s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t);
s_dec_op.u4_size = sizeof(ivd_video_decode_op_t);
s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
s_dec_ip.pv_stream_buffer = (uint8*)data;
s_dec_ip.u4_num_Bytes = length;
s_dec_ip.s_out_buffer.u4_num_bufs = 0;
WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op);
if (status != 0)
{
cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream");
return false;
}
numByteConsumed = s_dec_op.u4_num_bytes_consumed;
cemu_assert(status == 0);
if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0)
return false;
UpdateParameters(false);
ReinitBuffers();
return true;
}
void ReinitBuffers()
{
ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 };
ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 };
WORD32 outlen = 0;
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op);
cemu_assert(!status);
// allocate
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]);
}
// set
ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs
ivd_set_display_frame_op_t s_set_display_frame_op{ 0 };
s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2);
cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0);
s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs;
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2;
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0];
s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1];
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0;
s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0];
}
status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op);
cemu_assert(!status);
// mark all as released (available)
for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
{
ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 };
ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 };
s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
s_video_rel_disp_ip.u4_disp_buf_id = i;
status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op);
cemu_assert(!status);
}
}
void ResetDecoder()
{
ivd_ctl_reset_ip_t s_ctl_ip;
ivd_ctl_reset_op_t s_ctl_op;
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op);
cemu_assert_debug(status == 0);
}
void UpdateParameters(bool headerDecodeOnly)
{
ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 };
ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 };
ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t;
ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t;
ps_ctl_ip->u4_disp_wd = 0;
ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE;
ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT;
ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME;
ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL;
ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t);
ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t);
WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op);
cemu_assert(status == 0);
}
private:
void DecoderThread()
{
while(!m_threadShouldExit)
{
m_decodeSem.decrementWithWait();
std::unique_lock _l(m_decodeQueueMtx);
if (m_decodeQueue.empty())
continue;
uint32 decodeIndex = m_decodeQueue.front();
m_decodeQueue.erase(m_decodeQueue.begin());
_l.unlock();
if(decodeIndex == CMD_FLUSH)
{
Flush();
_l.lock();
cemu_assert_debug(m_decodeQueue.empty()); // after flushing the queue should be empty since the sender is waiting for the flush to complete
_l.unlock();
coreinit::OSSignalEvent(m_flushEvt);
}
else
{
auto& decodedSlice = m_decodedSliceArray[decodeIndex];
Decode(decodedSlice);
}
}
}
iv_obj_t* m_codecCtx{nullptr};
bool m_hasBufferSizeInfo{ false };
bool m_isBufferedMode{ false };
uint32 m_numDecodedFrames{0};
std::vector<std::vector<uint8>> m_displayBuf;
std::thread m_decoderThread;
std::atomic_bool m_threadShouldExit{false};
};
H264DecoderBackend* CreateAVCDecoder()
{
return new H264AVCDecoder();
}
};

View File

@ -0,0 +1,139 @@
#pragma once
#include "util/helpers/Semaphore.h"
#include "Cafe/OS/libs/coreinit/coreinit_Thread.h"
#include "Cafe/OS/libs/coreinit/coreinit_SysHeap.h"
#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h"
namespace H264
{
class H264DecoderBackend
{
protected:
struct DataToDecode
{
uint8* m_data;
uint32 m_length;
std::vector<uint8> m_buffer;
};
static constexpr uint32 CMD_FLUSH = 0xFFFFFFFF;
public:
struct DecodeResult
{
bool isDecoded{false};
bool hasFrame{false}; // set to true if a full frame was successfully decoded
double timestamp{};
void* imageOutput{nullptr};
sint32 frameWidth{0};
sint32 frameHeight{0};
uint32 bytesPerRow{0};
bool cropEnable{false};
sint32 cropTop{0};
sint32 cropBottom{0};
sint32 cropLeft{0};
sint32 cropRight{0};
};
struct DecodedSlice
{
bool isUsed{false};
DecodeResult result;
DataToDecode dataToDecode;
};
H264DecoderBackend()
{
m_displayQueueEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4);
coreinit::OSInitEvent(m_displayQueueEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
m_flushEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4);
coreinit::OSInitEvent(m_flushEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
};
virtual ~H264DecoderBackend()
{
coreinit::OSFreeToSystem(m_displayQueueEvt);
coreinit::OSFreeToSystem(m_flushEvt);
};
virtual void Init(bool isBufferedMode) = 0;
virtual void Destroy() = 0;
void QueueForDecode(uint8* data, uint32 length, double timestamp, void* imagePtr)
{
std::unique_lock _l(m_decodeQueueMtx);
DecodedSlice& ds = GetFreeDecodedSliceEntry();
ds.dataToDecode.m_buffer.assign(data, data + length);
ds.dataToDecode.m_data = ds.dataToDecode.m_buffer.data();
ds.dataToDecode.m_length = length;
ds.result.isDecoded = false;
ds.result.imageOutput = imagePtr;
ds.result.timestamp = timestamp;
m_decodeQueue.push_back(std::distance(m_decodedSliceArray.data(), &ds));
m_decodeSem.increment();
}
void QueueFlush()
{
std::unique_lock _l(m_decodeQueueMtx);
m_decodeQueue.push_back(CMD_FLUSH);
m_decodeSem.increment();
}
bool GetFrameOutputIfReady(DecodeResult& result)
{
std::unique_lock _l(m_decodeQueueMtx);
if(m_displayQueue.empty())
return false;
uint32 sliceIndex = m_displayQueue.front();
DecodedSlice& ds = m_decodedSliceArray[sliceIndex];
cemu_assert_debug(ds.result.isDecoded);
std::swap(result, ds.result);
ds.isUsed = false;
m_displayQueue.erase(m_displayQueue.begin());
return true;
}
coreinit::OSEvent& GetFrameOutputEvent()
{
return *m_displayQueueEvt;
}
coreinit::OSEvent& GetFlushEvent()
{
return *m_flushEvt;
}
protected:
DecodedSlice& GetFreeDecodedSliceEntry()
{
for (auto& slice : m_decodedSliceArray)
{
if (!slice.isUsed)
{
slice.isUsed = true;
return slice;
}
}
cemu_assert_suspicious();
return m_decodedSliceArray[0];
}
std::mutex m_decodeQueueMtx;
std::vector<uint32> m_decodeQueue; // indices into m_decodedSliceArray, in order of decode input
CounterSemaphore m_decodeSem;
std::vector<uint32> m_displayQueue; // indices into m_decodedSliceArray, in order of frame display output
coreinit::OSEvent* m_displayQueueEvt; // signalled when a new frame is ready for display
coreinit::OSEvent* m_flushEvt; // signalled after flush operation finished and all queued slices are decoded
// frame output queue
std::mutex m_frameOutputMtx;
std::array<DecodedSlice, 32> m_decodedSliceArray;
};
}

View File

@ -319,6 +319,17 @@ bool parseNAL_pic_parameter_set_rbsp(h264ParserState_t* h264ParserState, h264Par
return true; return true;
} }
bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps)
{
h264ParserState_t parserState;
RBSPInputBitstream nalStream(data, length);
bool r = parseNAL_seq_parameter_set_rbsp(&parserState, nullptr, nalStream);
if(!r || !parserState.hasSPS)
return false;
sps = parserState.sps;
return true;
}
void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader) void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader)
{ {
if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI()) if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI())
@ -688,9 +699,8 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se
else if (sps.pic_order_cnt_type == 2) else if (sps.pic_order_cnt_type == 2)
{ {
// display order matches decode order // display order matches decode order
uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum; uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum;
;
uint32 FrameNumOffset; uint32 FrameNumOffset;
if (sliceHeader->IdrPicFlag) if (sliceHeader->IdrPicFlag)
{ {
@ -706,9 +716,6 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se
FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum(); FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum();
else else
FrameNumOffset = prevFrameNumOffset; FrameNumOffset = prevFrameNumOffset;
} }
uint32 tempPicOrderCnt; uint32 tempPicOrderCnt;

View File

@ -513,6 +513,8 @@ typedef struct
void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true); void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true);
sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length); sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length);
bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps);
void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4); void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4);
void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8); void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8);

View File

@ -978,7 +978,7 @@ namespace nsyshid
{ {
for (const auto& it : GetListSkylanders()) for (const auto& it : GetListSkylanders())
{ {
if(it.first.first == skyId && it.first.second == skyVar) if (it.first.first == skyId && it.first.second == skyVar)
{ {
return it.second; return it.second;
} }

View File

@ -50,7 +50,7 @@ namespace nsyshid
std::unique_ptr<FileStream> skyFile; std::unique_ptr<FileStream> skyFile;
uint8 status = 0; uint8 status = 0;
std::queue<uint8> queuedStatus; std::queue<uint8> queuedStatus;
std::array<uint8, SKY_BLOCK_SIZE> data{}; std::array<uint8, SKY_FIGURE_SIZE> data{};
uint32 lastId = 0; uint32 lastId = 0;
void Save(); void Save();

View File

@ -107,6 +107,7 @@ CurlRequestHelper::CurlRequestHelper()
curl_easy_setopt(m_curl, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(m_curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(m_curl, CURLOPT_MAXREDIRS, 2); curl_easy_setopt(m_curl, CURLOPT_MAXREDIRS, 2);
curl_easy_setopt(m_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
if(GetConfig().proxy_server.GetValue() != "") if(GetConfig().proxy_server.GetValue() != "")
{ {
@ -263,6 +264,7 @@ CurlSOAPHelper::CurlSOAPHelper(NetworkService service)
m_curl = curl_easy_init(); m_curl = curl_easy_init();
curl_easy_setopt(m_curl, CURLOPT_WRITEFUNCTION, __curlWriteCallback); curl_easy_setopt(m_curl, CURLOPT_WRITEFUNCTION, __curlWriteCallback);
curl_easy_setopt(m_curl, CURLOPT_WRITEDATA, this); curl_easy_setopt(m_curl, CURLOPT_WRITEDATA, this);
curl_easy_setopt(m_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
// SSL // SSL
if (!IsNetworkServiceSSLDisabled(service)) if (!IsNetworkServiceSSLDisabled(service))

View File

@ -6,8 +6,6 @@
#include "Cafe/HW/Espresso/Debugger/GDBStub.h" #include "Cafe/HW/Espresso/Debugger/GDBStub.h"
#include "ExceptionHandler.h" #include "ExceptionHandler.h"
void DebugLogStackTrace(OSThread_t* thread, MPTR sp);
bool crashLogCreated = false; bool crashLogCreated = false;
bool CrashLog_Create() bool CrashLog_Create()
@ -97,7 +95,7 @@ void ExceptionHandler_LogGeneralInfo()
MPTR currentStackVAddr = hCPU->gpr[1]; MPTR currentStackVAddr = hCPU->gpr[1];
CrashLog_WriteLine(""); CrashLog_WriteLine("");
CrashLog_WriteHeader("PPC stack trace"); CrashLog_WriteHeader("PPC stack trace");
DebugLogStackTrace(currentThread, currentStackVAddr); DebugLogStackTrace(currentThread, currentStackVAddr, true);
// stack dump // stack dump
CrashLog_WriteLine(""); CrashLog_WriteLine("");

View File

@ -552,6 +552,9 @@ inline uint32 GetTitleIdLow(uint64 titleId)
#include "Cafe/HW/Espresso/PPCState.h" #include "Cafe/HW/Espresso/PPCState.h"
#include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/HW/Espresso/PPCCallback.h"
// PPC stack trace printer
void DebugLogStackTrace(struct OSThread_t* thread, MPTR sp, bool printSymbols = false);
// generic formatter for enums (to underlying) // generic formatter for enums (to underlying)
template <typename Enum> template <typename Enum>
requires std::is_enum_v<Enum> requires std::is_enum_v<Enum>

View File

@ -520,7 +520,7 @@ struct CemuConfig
struct struct
{ {
ConfigValue<bool> emulate_skylander_portal{false}; ConfigValue<bool> emulate_skylander_portal{false};
ConfigValue<bool> emulate_infinity_base{true}; ConfigValue<bool> emulate_infinity_base{false};
}emulated_usb_devices{}; }emulated_usb_devices{};
private: private:

View File

@ -398,7 +398,7 @@ CreateInfinityFigureDialog::CreateInfinityFigureDialog(wxWindow* parent, uint8 s
{ {
wxMessageDialog idError(this, "Error Converting Figure Number!", "Number Entered is Invalid"); wxMessageDialog idError(this, "Error Converting Figure Number!", "Number Entered is Invalid");
idError.ShowModal(); idError.ShowModal();
this->EndModal(0);; this->EndModal(0);
} }
uint32 figNum = longFigNum & 0xFFFFFFFF; uint32 figNum = longFigNum & 0xFFFFFFFF;
auto figure = nsyshid::g_infinitybase.FindFigure(figNum); auto figure = nsyshid::g_infinitybase.FindFigure(figNum);
@ -408,7 +408,7 @@ CreateInfinityFigureDialog::CreateInfinityFigureDialog(wxWindow* parent, uint8 s
"BIN files (*.bin)|*.bin", wxFD_SAVE | wxFD_OVERWRITE_PROMPT); "BIN files (*.bin)|*.bin", wxFD_SAVE | wxFD_OVERWRITE_PROMPT);
if (saveFileDialog.ShowModal() == wxID_CANCEL) if (saveFileDialog.ShowModal() == wxID_CANCEL)
this->EndModal(0);; this->EndModal(0);
m_filePath = saveFileDialog.GetPath(); m_filePath = saveFileDialog.GetPath();

View File

@ -46,25 +46,25 @@ SymbolListCtrl::SymbolListCtrl(wxWindow* parent, const wxWindowID& id, const wxP
void SymbolListCtrl::OnGameLoaded() void SymbolListCtrl::OnGameLoaded()
{ {
m_data.clear(); m_data.clear();
long itemId = 0;
const auto symbol_map = rplSymbolStorage_lockSymbolMap(); const auto symbol_map = rplSymbolStorage_lockSymbolMap();
for (auto const& [address, symbol_info] : symbol_map) for (auto const& [address, symbol_info] : symbol_map)
{ {
if (symbol_info == nullptr || symbol_info->symbolName == nullptr) if (symbol_info == nullptr || symbol_info->symbolName == nullptr)
continue; continue;
wxString libNameWX = wxString::FromAscii((const char*)symbol_info->libName);
wxString symbolNameWX = wxString::FromAscii((const char*)symbol_info->symbolName);
wxString searchNameWX = libNameWX + symbolNameWX;
searchNameWX.MakeLower();
auto new_entry = m_data.try_emplace( auto new_entry = m_data.try_emplace(
symbol_info->address, symbol_info->address,
(char*)(symbol_info->symbolName), symbolNameWX,
(char*)(symbol_info->libName), libNameWX,
"", searchNameWX,
false false
); );
new_entry.first->second.searchName += new_entry.first->second.name;
new_entry.first->second.searchName += new_entry.first->second.libName;
new_entry.first->second.searchName.MakeLower();
if (m_list_filter.IsEmpty()) if (m_list_filter.IsEmpty())
new_entry.first->second.visible = true; new_entry.first->second.visible = true;
else if (new_entry.first->second.searchName.Contains(m_list_filter)) else if (new_entry.first->second.searchName.Contains(m_list_filter))

View File

@ -277,12 +277,10 @@ void DebugPPCThreadsWindow::RefreshThreadList()
m_thread_list->SetScrollPos(0, scrollPos, true); m_thread_list->SetScrollPos(0, scrollPos, true);
} }
void DebugLogStackTrace(OSThread_t* thread, MPTR sp);
void DebugPPCThreadsWindow::DumpStackTrace(OSThread_t* thread) void DebugPPCThreadsWindow::DumpStackTrace(OSThread_t* thread)
{ {
cemuLog_log(LogType::Force, "Dumping stack trace for thread {0:08x} LR: {1:08x}", memory_getVirtualOffsetFromPointer(thread), _swapEndianU32(thread->context.lr)); cemuLog_log(LogType::Force, "Dumping stack trace for thread {0:08x} LR: {1:08x}", memory_getVirtualOffsetFromPointer(thread), _swapEndianU32(thread->context.lr));
DebugLogStackTrace(thread, _swapEndianU32(thread->context.gpr[1])); DebugLogStackTrace(thread, _swapEndianU32(thread->context.gpr[1]), true);
} }
void DebugPPCThreadsWindow::PresentProfileResults(OSThread_t* thread, const std::unordered_map<VAddr, uint32>& samples) void DebugPPCThreadsWindow::PresentProfileResults(OSThread_t* thread, const std::unordered_map<VAddr, uint32>& samples)

View File

@ -1,4 +1,4 @@
.rodata .section .rodata,"",%progbits
.global g_fontawesome_data, g_fontawesome_size .global g_fontawesome_data, g_fontawesome_size
g_fontawesome_data: g_fontawesome_data:
@ -6,3 +6,4 @@ g_fontawesome_data:
g_fontawesome_size: g_fontawesome_size:
.int g_fontawesome_size - g_fontawesome_data .int g_fontawesome_size - g_fontawesome_data
.section .note.GNU-stack,"",%progbits