diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a2342c27..28efa833 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -239,7 +239,17 @@ jobs: - name: "Install system dependencies" run: | brew update - brew install llvm@15 ninja nasm molten-vk automake libtool + brew install llvm@15 ninja nasm automake libtool + brew install cmake python3 ninja + + - name: "Build and install molten-vk" + run: | + git clone https://github.com/KhronosGroup/MoltenVK.git + cd MoltenVK + git checkout bf097edc74ec3b6dfafdcd5a38d3ce14b11952d6 + ./fetchDependencies --macos + make macos + make install - name: "Setup cmake" uses: jwlawson/actions-setup-cmake@v2 diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 49238c62..82a1989f 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -377,7 +377,9 @@ add_library(CemuCafe OS/libs/gx2/GX2_Texture.h OS/libs/gx2/GX2_TilingAperture.cpp OS/libs/h264_avc/H264Dec.cpp + OS/libs/h264_avc/H264DecBackendAVC.cpp OS/libs/h264_avc/h264dec.h + OS/libs/h264_avc/H264DecInternal.h OS/libs/h264_avc/parser OS/libs/h264_avc/parser/H264Parser.cpp OS/libs/h264_avc/parser/H264Parser.h diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp index 62a5d592..e7369af6 100644 --- a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp +++ b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp @@ -501,8 +501,6 @@ void debugger_createPPCStateSnapshot(PPCInterpreter_t* hCPU) debuggerState.debugSession.ppcSnapshot.cr[i] = hCPU->cr[i]; } -void DebugLogStackTrace(OSThread_t* thread, MPTR sp); - void debugger_enterTW(PPCInterpreter_t* hCPU) { // handle logging points diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index ed97288d..fe9316f0 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -212,11 +212,12 @@ static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode) { - // untested (Don't Starve Giant Edition uses this) + // Seen in Don't Starve Giant Edition and Teslagrad // also used by DS Virtual Console (Super Mario 64 DS) PPC_OPC_TEMPL3_XO(); - hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], hCPU->gpr[rD])); + uint32 result = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], result)); + hCPU->gpr[rD] = result; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); diff --git a/src/Cafe/OS/libs/coreinit/coreinit.cpp b/src/Cafe/OS/libs/coreinit/coreinit.cpp index 49d232f8..00327a97 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit.cpp @@ -1,6 +1,6 @@ #include "Cafe/OS/common/OSCommon.h" #include "Common/SysAllocator.h" -#include "Cafe/OS/RPL/rpl.h" +#include "Cafe/OS/RPL/rpl_symbol_storage.h" #include "Cafe/OS/libs/coreinit/coreinit_Misc.h" @@ -69,7 +69,7 @@ sint32 ScoreStackTrace(OSThread_t* thread, MPTR sp) return score; } -void DebugLogStackTrace(OSThread_t* thread, MPTR sp) +void DebugLogStackTrace(OSThread_t* thread, MPTR sp, bool printSymbols) { // sp might not point to a valid stackframe // scan stack and evaluate which sp is most likely the beginning of the stackframe @@ -107,7 +107,15 @@ void DebugLogStackTrace(OSThread_t* thread, MPTR sp) uint32 returnAddress = 0; returnAddress = memory_readU32(nextStackPtr + 4); - cemuLog_log(LogType::Force, fmt::format("SP {0:08x} ReturnAddr {1:08x}", nextStackPtr, returnAddress)); + + RPLStoredSymbol* symbol = nullptr; + if(printSymbols) + symbol = rplSymbolStorage_getByClosestAddress(returnAddress); + + if(symbol) + cemuLog_log(LogType::Force, fmt::format("SP {:08x} ReturnAddr {:08x} ({}.{}+0x{:x})", nextStackPtr, returnAddress, (const char*)symbol->libName, (const char*)symbol->symbolName, returnAddress - symbol->address)); + else + cemuLog_log(LogType::Force, fmt::format("SP {:08x} ReturnAddr {:08x}", nextStackPtr, returnAddress)); currentStackPtr = nextStackPtr; } diff --git a/src/Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.cpp b/src/Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.cpp index 7ddadcf1..552a610f 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.cpp @@ -2,8 +2,6 @@ #include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/OS/libs/coreinit/coreinit_MEM_ExpHeap.h" -void DebugLogStackTrace(OSThread_t* thread, MPTR sp); - #define EXP_HEAP_GET_FROM_FREE_BLOCKCHAIN(__blockchain__) (MEMExpHeapHead2*)((uintptr_t)__blockchain__ - offsetof(MEMExpHeapHead2, expHeapHead) - offsetof(MEMExpHeapHead40_t, chainFreeBlocks)) namespace coreinit diff --git a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp index e37949d7..2f819c50 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp @@ -14,13 +14,10 @@ namespace coreinit return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment); } - void export_OSAllocFromSystem(PPCInterpreter_t* hCPU) + void OSFreeToSystem(void* ptr) { - ppcDefineParamU32(size, 0); - ppcDefineParamS32(alignment, 1); - MEMPTR mem = OSAllocFromSystem(size, alignment); - cemuLog_logDebug(LogType::Force, "OSAllocFromSystem(0x{:x}, {}) -> 0x{:08x}", size, alignment, mem.GetMPTR()); - osLib_returnFromFunction(hCPU, mem.GetMPTR()); + _sysHeapFreeCounter++; + coreinit::MEMFreeToExpHeap(_sysHeapHandle, ptr); } void InitSysHeap() @@ -34,7 +31,8 @@ namespace coreinit void InitializeSysHeap() { - osLib_addFunction("coreinit", "OSAllocFromSystem", export_OSAllocFromSystem); + cafeExportRegister("h264", OSAllocFromSystem, LogType::CoreinitMem); + cafeExportRegister("h264", OSFreeToSystem, LogType::CoreinitMem); } } diff --git a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h index 428224af..ad115754 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h +++ b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h @@ -4,5 +4,8 @@ namespace coreinit { void InitSysHeap(); + void* OSAllocFromSystem(uint32 size, uint32 alignment); + void OSFreeToSystem(void* ptr); + void InitializeSysHeap(); } \ No newline at end of file diff --git a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp index 024965fd..82db039b 100644 --- a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp +++ b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp @@ -1,17 +1,12 @@ #include "Cafe/OS/common/OSCommon.h" #include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/OS/libs/h264_avc/parser/H264Parser.h" +#include "Cafe/OS/libs/h264_avc/H264DecInternal.h" #include "util/highresolutiontimer/HighResolutionTimer.h" #include "Cafe/CafeSystem.h" #include "h264dec.h" -extern "C" -{ -#include "../dependencies/ih264d/common/ih264_typedefs.h" -#include "../dependencies/ih264d/decoder/ih264d.h" -}; - enum class H264DEC_STATUS : uint32 { SUCCESS = 0x0, @@ -33,10 +28,35 @@ namespace H264 return false; } + struct H264Context + { + struct + { + MEMPTR ptr{ nullptr }; + uint32be length{ 0 }; + float64be timestamp; + }BitStream; + struct + { + MEMPTR outputFunc{ nullptr }; + uint8be outputPerFrame{ 0 }; // whats the default? + MEMPTR userMemoryParam{ nullptr }; + }Param; + // misc + uint32be sessionHandle; + + // decoder state + struct + { + uint32 numFramesInFlight{0}; + }decoderState; + }; + uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut) { if (H264_IsBotW()) { + static_assert(sizeof(H264Context) < 256); *sizeRequirementOut = 256; return 0; } @@ -169,590 +189,47 @@ namespace H264 return H264DEC_STATUS::BAD_STREAM; } - struct H264Context - { - struct - { - MEMPTR ptr{ nullptr }; - uint32be length{ 0 }; - float64be timestamp; - }BitStream; - struct - { - MEMPTR outputFunc{ nullptr }; - uint8be outputPerFrame{ 0 }; // whats the default? - MEMPTR userMemoryParam{ nullptr }; - }Param; - // misc - uint32be sessionHandle; - }; - - class H264AVCDecoder - { - static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size) - { -#ifdef _WIN32 - return _aligned_malloc(size, alignment); -#else - // alignment is atleast sizeof(void*) - alignment = std::max(alignment, sizeof(void*)); - - //smallest multiple of 2 at least as large as alignment - alignment--; - alignment |= alignment << 1; - alignment |= alignment >> 1; - alignment |= alignment >> 2; - alignment |= alignment >> 4; - alignment |= alignment >> 8; - alignment |= alignment >> 16; - alignment ^= (alignment >> 1); - - void* temp; - posix_memalign(&temp, (size_t)alignment, (size_t)size); - return temp; -#endif - } - - static void ivd_aligned_free(void* ctxt, void* buf) - { -#ifdef _WIN32 - _aligned_free(buf); -#else - free(buf); -#endif - return; - } - - public: - struct DecodeResult - { - bool frameReady{ false }; - double timestamp; - void* imageOutput; - ivd_video_decode_op_t decodeOutput; - }; - - void Init(bool isBufferedMode) - { - ih264d_create_ip_t s_create_ip{ 0 }; - ih264d_create_op_t s_create_op{ 0 }; - - s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); - s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; - s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?) - - s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); - s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; - s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; - s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; - s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; - - WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op); - cemu_assert(!status); - - m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; - m_codecCtx->pv_fxns = (void*)&ih264d_api_function; - m_codecCtx->u4_size = sizeof(iv_obj_t); - - SetDecoderCoreCount(1); - - m_isBufferedMode = isBufferedMode; - - UpdateParameters(false); - - m_bufferedResults.clear(); - m_numDecodedFrames = 0; - m_hasBufferSizeInfo = false; - m_timestampIndex = 0; - } - - void Destroy() - { - if (!m_codecCtx) - return; - ih264d_delete_ip_t s_delete_ip{ 0 }; - ih264d_delete_op_t s_delete_op{ 0 }; - s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); - s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; - s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); - WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op); - cemu_assert_debug(!status); - m_codecCtx = nullptr; - } - - void SetDecoderCoreCount(uint32 coreCount) - { - ih264d_ctl_set_num_cores_ip_t s_set_cores_ip; - ih264d_ctl_set_num_cores_op_t s_set_cores_op; - s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES; - s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4 - s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); - s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); - IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op); - cemu_assert(status == IV_SUCCESS); - } - - static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight) - { - // create temporary decoder - ih264d_create_ip_t s_create_ip{ 0 }; - ih264d_create_op_t s_create_op{ 0 }; - s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); - s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; - s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0; - s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); - s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; - s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; - s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; - s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; - iv_obj_t* ctx = nullptr; - WORD32 status = ih264d_api_function(ctx, &s_create_ip, &s_create_op); - cemu_assert_debug(!status); - if (status != IV_SUCCESS) - return false; - ctx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; - ctx->pv_fxns = (void*)&ih264d_api_function; - ctx->u4_size = sizeof(iv_obj_t); - // set header-only mode - ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; - ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; - ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; - ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; - ps_ctl_ip->u4_disp_wd = 0; - ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; - ps_ctl_ip->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; - ps_ctl_ip->e_vid_dec_mode = IVD_DECODE_HEADER; - ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; - ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; - ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); - ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); - status = ih264d_api_function(ctx, &s_h264d_ctl_ip, &s_h264d_ctl_op); - cemu_assert(!status); - // decode stream - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = stream; - s_dec_ip.u4_num_Bytes = length; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - - s_dec_op.u4_raw_wd = 0; - s_dec_op.u4_raw_ht = 0; - - status = ih264d_api_function(ctx, &s_dec_ip, &s_dec_op); - //cemu_assert(status == 0); -> This errors when not both the headers are present, but it will still set the parameters we need - bool isValid = false; - if (true)//status == 0) - { - imageWidth = s_dec_op.u4_raw_wd; - imageHeight = s_dec_op.u4_raw_ht; - cemu_assert_debug(imageWidth != 0 && imageHeight != 0); - isValid = true; - } - // destroy decoder - ih264d_delete_ip_t s_delete_ip{ 0 }; - ih264d_delete_op_t s_delete_op{ 0 }; - s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); - s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; - s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); - status = ih264d_api_function(ctx, &s_delete_ip, &s_delete_op); - cemu_assert_debug(!status); - return isValid; - } - - void Decode(void* data, uint32 length, double timestamp, void* imageOutput, DecodeResult& decodeResult) - { - if (!m_hasBufferSizeInfo) - { - uint32 numByteConsumed = 0; - if (!DetermineBufferSizes(data, length, numByteConsumed)) - { - cemuLog_log(LogType::Force, "H264: Unable to determine picture size. Ignoring decode input"); - decodeResult.frameReady = false; - return; - } - length -= numByteConsumed; - data = (uint8*)data + numByteConsumed; - m_hasBufferSizeInfo = true; - } - - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - - // remember timestamp and associated output buffer - m_timestamps[m_timestampIndex] = timestamp; - m_imageBuffers[m_timestampIndex] = imageOutput; - s_dec_ip.u4_ts = m_timestampIndex; - m_timestampIndex = (m_timestampIndex + 1) % 64; - - s_dec_ip.pv_stream_buffer = (uint8*)data; - s_dec_ip.u4_num_Bytes = length; - - s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - - BenchmarkTimer bt; - bt.Start(); - WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED) - { - // resolution change - ResetDecoder(); - m_hasBufferSizeInfo = false; - Decode(data, length, timestamp, imageOutput, decodeResult); - return; - } - else if (status != 0) - { - cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status); - decodeResult.frameReady = false; - return; - } - - bt.Stop(); - double decodeTime = bt.GetElapsedMilliseconds(); - - cemu_assert(s_dec_op.u4_frame_decoded_flag); - cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == length); - - cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?) - - if (s_dec_op.u4_output_present) - { - cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV); - if (H264_IsBotW()) - { - if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) - s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; - } - DecodeResult tmpResult; - tmpResult.frameReady = s_dec_op.u4_output_present != 0; - tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts]; - tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts]; - tmpResult.decodeOutput = s_dec_op; - AddBufferedResult(tmpResult); - // transfer image to PPC output buffer and also correct stride - bt.Start(); - CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op); - bt.Stop(); - double copyTime = bt.GetElapsedMilliseconds(); - // release buffer - sint32 bufferId = -1; - for (size_t i = 0; i < m_displayBuf.size(); i++) - { - if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size())) - { - bufferId = (sint32)i; - break; - } - } - cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id); - cemu_assert(bufferId >= 0); - ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; - ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; - s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; - s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); - s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); - s_video_rel_disp_ip.u4_disp_buf_id = bufferId; - status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); - cemu_assert(!status); - - cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime); - } - else - { - cemuLog_log(LogType::H264, "H264Bench | DecodeTime{}ms", decodeTime); - } - - if (s_dec_op.u4_frame_decoded_flag) - m_numDecodedFrames++; - - if (m_isBufferedMode) - { - // in buffered mode, always buffer 5 frames regardless of actual reordering and decoder latency - if (m_numDecodedFrames > 5) - GetCurrentBufferedResult(decodeResult); - } - else if(m_numDecodedFrames > 0) - GetCurrentBufferedResult(decodeResult); - - // get VUI - //ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip; - //ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op; - - //s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL; - //s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS; - //s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t); - //s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t); - - //status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op); - //cemu_assert(status == 0); - } - - std::vector Flush() - { - std::vector results; - // set flush mode - ivd_ctl_flush_ip_t s_video_flush_ip{ 0 }; - ivd_ctl_flush_op_t s_video_flush_op{ 0 }; - s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; - s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); - s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t); - WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op); - if (status != 0) - cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status); - // get all frames from the codec - while (true) - { - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = NULL; - s_dec_ip.u4_num_Bytes = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0) - break; - cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be zero? - if(s_dec_op.u4_output_present == 0) - continue; - if (H264_IsBotW()) - { - if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) - s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; - } - DecodeResult tmpResult; - tmpResult.frameReady = s_dec_op.u4_output_present != 0; - tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts]; - tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts]; - tmpResult.decodeOutput = s_dec_op; - AddBufferedResult(tmpResult); - CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op); - } - results = std::move(m_bufferedResults); - return results; - } - - void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo) - { - uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd; - uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht; - - size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd; - size_t outputStride = (imageWidth + 0xFF) & ~0xFF; - - // copy Y - uint8* yOut = bufOut; - for (uint32 row = 0; row < imageHeight; row++) - { - memcpy(yOut, yIn, imageWidth); - yIn += inputStride; - yOut += outputStride; - } - - // copy UV - uint8* uvOut = bufOut + outputStride * imageHeight; - for (uint32 row = 0; row < imageHeight/2; row++) - { - memcpy(uvOut, uvIn, imageWidth); - uvIn += inputStride; - uvOut += outputStride; - } - } - - private: - - bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed) - { - numByteConsumed = 0; - UpdateParameters(true); - - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = (uint8*)data; - s_dec_ip.u4_num_Bytes = length; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0) - { - cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream"); - return false; - } - numByteConsumed = s_dec_op.u4_num_bytes_consumed; - cemu_assert(status == 0); - if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0) - return false; - UpdateParameters(false); - ReinitBuffers(); - return true; - } - - void ReinitBuffers() - { - ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 }; - ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 }; - WORD32 outlen = 0; - - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; - s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); - s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op); - cemu_assert(!status); - - // allocate - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) - { - m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]); - } - // set - ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs - ivd_set_display_frame_op_t s_set_display_frame_op{ 0 }; - - s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; - s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); - s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); - - cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2); - cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0); - - s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs; - - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) - { - s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2; - s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0]; - s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1]; - s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0; - s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0]; - } - - status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op); - cemu_assert(!status); - - - // mark all as released (available) - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) - { - ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; - ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; - - s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; - s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); - s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); - s_video_rel_disp_ip.u4_disp_buf_id = i; - - status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); - cemu_assert(!status); - } - } - - void ResetDecoder() - { - ivd_ctl_reset_ip_t s_ctl_ip; - ivd_ctl_reset_op_t s_ctl_op; - - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; - s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); - s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op); - cemu_assert_debug(status == 0); - } - - void UpdateParameters(bool headerDecodeOnly) - { - ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; - ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; - ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; - ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; - - ps_ctl_ip->u4_disp_wd = 0; - ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; - ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT; - ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME; - ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; - ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; - ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); - ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op); - cemu_assert(status == 0); - } - - /* In non-flush mode we have a delay of (at least?) 5 frames */ - void AddBufferedResult(DecodeResult& decodeResult) - { - if (decodeResult.frameReady) - m_bufferedResults.emplace_back(decodeResult); - } - - void GetCurrentBufferedResult(DecodeResult& decodeResult) - { - cemu_assert(!m_bufferedResults.empty()); - if (m_bufferedResults.empty()) - { - decodeResult.frameReady = false; - return; - } - decodeResult = m_bufferedResults.front(); - m_bufferedResults.erase(m_bufferedResults.begin()); - } - private: - iv_obj_t* m_codecCtx{nullptr}; - bool m_hasBufferSizeInfo{ false }; - bool m_isBufferedMode{ false }; - double m_timestamps[64]; - void* m_imageBuffers[64]; - uint32 m_timestampIndex{0}; - std::vector m_bufferedResults; - uint32 m_numDecodedFrames{0}; - std::vector> m_displayBuf; - }; - H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight) { - cemu_assert(offset <= length); - - uint32 imageWidth, imageHeight; - - if (H264AVCDecoder::GetImageInfo(stream, length, imageWidth, imageHeight)) + if(!stream || length < 4 || !outputWidth || !outputHeight) + return H264DEC_STATUS::INVALID_PARAM; + if( (offset+4) > length ) + return H264DEC_STATUS::INVALID_PARAM; + uint8* cur = stream + offset; + uint8* end = stream + length; + cur += 2; // we access cur[-2] and cur[-1] so we need to start at offset 2 + while(cur < end-2) { - if (H264_IsBotW()) + // check for start code + if(*cur != 1) { - if (imageWidth == 1920 && imageHeight == 1088) - imageHeight = 1080; + cur++; + continue; } - *outputWidth = imageWidth; - *outputHeight = imageHeight; + // check if this is a valid NAL header + if(cur[-2] != 0 || cur[-1] != 0 || cur[0] != 1) + { + cur++; + continue; + } + uint8 nalHeader = cur[1]; + if((nalHeader & 0x1F) != 7) + { + cur++; + continue; + } + h264State_seq_parameter_set_t psp; + bool r = h264Parser_ParseSPS(cur+2, end-cur-2, psp); + if(!r) + { + cemu_assert_suspicious(); // should not happen + return H264DEC_STATUS::BAD_STREAM; + } + *outputWidth = (psp.pic_width_in_mbs_minus1 + 1) * 16; + *outputHeight = (psp.pic_height_in_map_units_minus1 + 1) * 16; // affected by frame_mbs_only_flag? + return H264DEC_STATUS::SUCCESS; } - else - { - *outputWidth = 0; - *outputHeight = 0; - return H264DEC_STATUS::BAD_STREAM; - } - - return H264DEC_STATUS::SUCCESS; + return H264DEC_STATUS::BAD_STREAM; } uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory) @@ -762,26 +239,28 @@ namespace H264 return 0; } - std::unordered_map sDecoderSessions; + std::unordered_map sDecoderSessions; std::mutex sDecoderSessionsMutex; std::atomic_uint32_t sCurrentSessionHandle{ 1 }; - static H264AVCDecoder* _CreateDecoderSession(uint32& handleOut) + H264DecoderBackend* CreateAVCDecoder(); + + static H264DecoderBackend* _CreateDecoderSession(uint32& handleOut) { std::unique_lock _lock(sDecoderSessionsMutex); handleOut = sCurrentSessionHandle.fetch_add(1); - H264AVCDecoder* session = new H264AVCDecoder(); + H264DecoderBackend* session = CreateAVCDecoder(); sDecoderSessions.try_emplace(handleOut, session); return session; } - static H264AVCDecoder* _AcquireDecoderSession(uint32 handle) + static H264DecoderBackend* _AcquireDecoderSession(uint32 handle) { std::unique_lock _lock(sDecoderSessionsMutex); auto it = sDecoderSessions.find(handle); if (it == sDecoderSessions.end()) return nullptr; - H264AVCDecoder* session = it->second; + H264DecoderBackend* session = it->second; if (sDecoderSessions.size() >= 5) { cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions"); @@ -790,7 +269,7 @@ namespace H264 return session; } - static void _ReleaseDecoderSession(H264AVCDecoder* session) + static void _ReleaseDecoderSession(H264DecoderBackend* session) { std::unique_lock _lock(sDecoderSessionsMutex); @@ -802,7 +281,7 @@ namespace H264 auto it = sDecoderSessions.find(handle); if (it == sDecoderSessions.end()) return; - H264AVCDecoder* session = it->second; + H264DecoderBackend* session = it->second; session->Destroy(); delete session; sDecoderSessions.erase(it); @@ -830,45 +309,44 @@ namespace H264 uint32 H264DECBegin(void* workMemory) { H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session"); return 0; } session->Init(ctx->Param.outputPerFrame == 0); + ctx->decoderState.numFramesInFlight = 0; _ReleaseDecoderSession(session); return 0; } - void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult); - - void _async_H264DECEnd(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, std::vector* decodeResultsOut) - { - *decodeResultsOut = session->Flush(); - coreinit::OSSignalEvent(executeDoneEvent); - } + void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult); H264DEC_STATUS H264DECEnd(void* workMemory) { H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session"); return H264DEC_STATUS::SUCCESS; } - StackAllocator executeDoneEvent; - coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); - std::vector results; - auto asyncTask = std::async(std::launch::async, _async_H264DECEnd, executeDoneEvent.GetPointer(), session, ctx, &results); - coreinit::OSWaitEvent(&executeDoneEvent); - _ReleaseDecoderSession(session); - if (!results.empty()) + coreinit::OSEvent* flushEvt = &session->GetFlushEvent(); + coreinit::OSResetEvent(flushEvt); + session->QueueFlush(); + coreinit::OSWaitEvent(flushEvt); + while(true) { - for (auto& itr : results) - H264DoFrameOutputCallback(ctx, itr); + H264DecoderBackend::DecodeResult decodeResult; + if( !session->GetFrameOutputIfReady(decodeResult) ) + break; + // todo - output all frames in a single callback? + H264DoFrameOutputCallback(ctx, decodeResult); + ctx->decoderState.numFramesInFlight--; } + cemu_assert_debug(ctx->decoderState.numFramesInFlight == 0); // no frames should be in flight anymore. Exact behavior is not well understood but we may have to output dummy frames if necessary + _ReleaseDecoderSession(session); return H264DEC_STATUS::SUCCESS; } @@ -930,7 +408,6 @@ namespace H264 return 0; } - struct H264DECFrameOutput { /* +0x00 */ uint32be result; @@ -967,7 +444,7 @@ namespace H264 static_assert(sizeof(H264OutputCBStruct) == 12); - void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult) + void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult) { sint32 outputFrameCount = 1; @@ -984,14 +461,14 @@ namespace H264 frameOutput->imagePtr = (uint8*)decodeResult.imageOutput; frameOutput->result = 100; frameOutput->timestamp = decodeResult.timestamp; - frameOutput->frameWidth = decodeResult.decodeOutput.u4_pic_wd; - frameOutput->frameHeight = decodeResult.decodeOutput.u4_pic_ht; - frameOutput->bytesPerRow = (decodeResult.decodeOutput.u4_pic_wd + 0xFF) & ~0xFF; - frameOutput->cropEnable = decodeResult.decodeOutput.u1_frame_cropping_flag; - frameOutput->cropTop = decodeResult.decodeOutput.u1_frame_cropping_rect_top_ofst; - frameOutput->cropBottom = decodeResult.decodeOutput.u1_frame_cropping_rect_bottom_ofst; - frameOutput->cropLeft = decodeResult.decodeOutput.u1_frame_cropping_rect_left_ofst; - frameOutput->cropRight = decodeResult.decodeOutput.u1_frame_cropping_rect_right_ofst; + frameOutput->frameWidth = decodeResult.frameWidth; + frameOutput->frameHeight = decodeResult.frameHeight; + frameOutput->bytesPerRow = decodeResult.bytesPerRow; + frameOutput->cropEnable = decodeResult.cropEnable; + frameOutput->cropTop = decodeResult.cropTop; + frameOutput->cropBottom = decodeResult.cropBottom; + frameOutput->cropLeft = decodeResult.cropLeft; + frameOutput->cropRight = decodeResult.cropRight; StackAllocator stack_fptrOutputData; stack_fptrOutputData->frameCount = outputFrameCount; @@ -1006,29 +483,41 @@ namespace H264 } } - void _async_H264DECExecute(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, void* imageOutput, H264AVCDecoder::DecodeResult* decodeResult) - { - session->Decode(ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput, *decodeResult); - coreinit::OSSignalEvent(executeDoneEvent); - } - uint32 H264DECExecute(void* workMemory, void* imageOutput) { + BenchmarkTimer bt; + bt.Start(); H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session"); return 0; } - StackAllocator executeDoneEvent; - coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); - H264AVCDecoder::DecodeResult decodeResult; - auto asyncTask = std::async(std::launch::async, _async_H264DECExecute, &executeDoneEvent, session, ctx, imageOutput , &decodeResult); - coreinit::OSWaitEvent(&executeDoneEvent); + // feed data to backend + session->QueueForDecode((uint8*)ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput); + ctx->decoderState.numFramesInFlight++; + // H264DECExecute is synchronous and will return a frame after either every call (non-buffered) or after 6 calls (buffered) + // normally frame decoding happens only during H264DECExecute, but in order to hide the latency of our CPU decoder we will decode asynchronously in buffered mode + uint32 numFramesToBuffer = (ctx->Param.outputPerFrame == 0) ? 5 : 0; + if(ctx->decoderState.numFramesInFlight > numFramesToBuffer) + { + ctx->decoderState.numFramesInFlight--; + while(true) + { + coreinit::OSEvent& evt = session->GetFrameOutputEvent(); + coreinit::OSWaitEvent(&evt); + H264DecoderBackend::DecodeResult decodeResult; + if( !session->GetFrameOutputIfReady(decodeResult) ) + continue; + H264DoFrameOutputCallback(ctx, decodeResult); + break; + } + } _ReleaseDecoderSession(session); - if(decodeResult.frameReady) - H264DoFrameOutputCallback(ctx, decodeResult); + bt.Stop(); + double callTime = bt.GetElapsedMilliseconds(); + cemuLog_log(LogType::H264, "H264Bench | H264DECExecute took {}ms", callTime); return 0x80 | 100; } diff --git a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp new file mode 100644 index 00000000..228f65a5 --- /dev/null +++ b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp @@ -0,0 +1,502 @@ +#include "H264DecInternal.h" +#include "util/highresolutiontimer/HighResolutionTimer.h" + +extern "C" +{ +#include "../dependencies/ih264d/common/ih264_typedefs.h" +#include "../dependencies/ih264d/decoder/ih264d.h" +}; + +namespace H264 +{ + bool H264_IsBotW(); + + class H264AVCDecoder : public H264DecoderBackend + { + static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size) + { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + // alignment is atleast sizeof(void*) + alignment = std::max(alignment, sizeof(void*)); + + //smallest multiple of 2 at least as large as alignment + alignment--; + alignment |= alignment << 1; + alignment |= alignment >> 1; + alignment |= alignment >> 2; + alignment |= alignment >> 4; + alignment |= alignment >> 8; + alignment |= alignment >> 16; + alignment ^= (alignment >> 1); + + void* temp; + posix_memalign(&temp, (size_t)alignment, (size_t)size); + return temp; +#endif + } + + static void ivd_aligned_free(void* ctxt, void* buf) + { +#ifdef _WIN32 + _aligned_free(buf); +#else + free(buf); +#endif + } + + public: + H264AVCDecoder() + { + m_decoderThread = std::thread(&H264AVCDecoder::DecoderThread, this); + } + + ~H264AVCDecoder() + { + m_threadShouldExit = true; + m_decodeSem.increment(); + if (m_decoderThread.joinable()) + m_decoderThread.join(); + } + + void Init(bool isBufferedMode) + { + ih264d_create_ip_t s_create_ip{ 0 }; + ih264d_create_op_t s_create_op{ 0 }; + + s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); + s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; + s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?) + + s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); + s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; + s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; + s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; + s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; + + WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op); + cemu_assert(!status); + + m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; + m_codecCtx->pv_fxns = (void*)&ih264d_api_function; + m_codecCtx->u4_size = sizeof(iv_obj_t); + + SetDecoderCoreCount(1); + + m_isBufferedMode = isBufferedMode; + + UpdateParameters(false); + + m_numDecodedFrames = 0; + m_hasBufferSizeInfo = false; + } + + void Destroy() + { + if (!m_codecCtx) + return; + ih264d_delete_ip_t s_delete_ip{ 0 }; + ih264d_delete_op_t s_delete_op{ 0 }; + s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); + s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; + s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); + WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op); + cemu_assert_debug(!status); + m_codecCtx = nullptr; + } + + void PushDecodedFrame(ivd_video_decode_op_t& s_dec_op) + { + // copy image data outside of lock since its an expensive operation + CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_decodedSliceArray[s_dec_op.u4_ts].result.imageOutput, s_dec_op); + + std::unique_lock _l(m_decodeQueueMtx); + cemu_assert(s_dec_op.u4_ts < m_decodedSliceArray.size()); + auto& result = m_decodedSliceArray[s_dec_op.u4_ts]; + cemu_assert_debug(result.isUsed); + cemu_assert_debug(s_dec_op.u4_output_present != 0); + + result.result.isDecoded = true; + result.result.hasFrame = s_dec_op.u4_output_present != 0; + result.result.frameWidth = s_dec_op.u4_pic_wd; + result.result.frameHeight = s_dec_op.u4_pic_ht; + result.result.bytesPerRow = (s_dec_op.u4_pic_wd + 0xFF) & ~0xFF; + result.result.cropEnable = s_dec_op.u1_frame_cropping_flag; + result.result.cropTop = s_dec_op.u1_frame_cropping_rect_top_ofst; + result.result.cropBottom = s_dec_op.u1_frame_cropping_rect_bottom_ofst; + result.result.cropLeft = s_dec_op.u1_frame_cropping_rect_left_ofst; + result.result.cropRight = s_dec_op.u1_frame_cropping_rect_right_ofst; + + m_displayQueue.push_back(s_dec_op.u4_ts); + + _l.unlock(); + coreinit::OSSignalEvent(m_displayQueueEvt); + } + + // called from async worker thread + void Decode(DecodedSlice& decodedSlice) + { + if (!m_hasBufferSizeInfo) + { + uint32 numByteConsumed = 0; + if (!DetermineBufferSizes(decodedSlice.dataToDecode.m_data, decodedSlice.dataToDecode.m_length, numByteConsumed)) + { + cemuLog_log(LogType::Force, "H264AVC: Unable to determine picture size. Ignoring decode input"); + std::unique_lock _l(m_decodeQueueMtx); + decodedSlice.result.isDecoded = true; + decodedSlice.result.hasFrame = false; + coreinit::OSSignalEvent(m_displayQueueEvt); + return; + } + decodedSlice.dataToDecode.m_length -= numByteConsumed; + decodedSlice.dataToDecode.m_data = (uint8*)decodedSlice.dataToDecode.m_data + numByteConsumed; + m_hasBufferSizeInfo = true; + } + + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + + s_dec_ip.u4_ts = std::distance(m_decodedSliceArray.data(), &decodedSlice); + cemu_assert_debug(s_dec_ip.u4_ts < m_decodedSliceArray.size()); + + s_dec_ip.pv_stream_buffer = (uint8*)decodedSlice.dataToDecode.m_data; + s_dec_ip.u4_num_Bytes = decodedSlice.dataToDecode.m_length; + + s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + + BenchmarkTimer bt; + bt.Start(); + WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED) + { + // resolution change + ResetDecoder(); + m_hasBufferSizeInfo = false; + Decode(decodedSlice); + return; + } + else if (status != 0) + { + cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status); + decodedSlice.result.hasFrame = false; + cemu_assert_unimplemented(); + return; + } + + bt.Stop(); + double decodeTime = bt.GetElapsedMilliseconds(); + + cemu_assert(s_dec_op.u4_frame_decoded_flag); + cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == decodedSlice.dataToDecode.m_length); + + cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?) + + if (s_dec_op.u4_output_present) + { + cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV); + if (H264_IsBotW()) + { + if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) + s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; + } + bt.Start(); + PushDecodedFrame(s_dec_op); + bt.Stop(); + double copyTime = bt.GetElapsedMilliseconds(); + // release buffer + sint32 bufferId = -1; + for (size_t i = 0; i < m_displayBuf.size(); i++) + { + if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size())) + { + bufferId = (sint32)i; + break; + } + } + cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id); + cemu_assert(bufferId >= 0); + ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; + ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = bufferId; + status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); + cemu_assert(!status); + + cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime); + } + else + { + cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms (no frame output)", decodeTime); + } + + if (s_dec_op.u4_frame_decoded_flag) + m_numDecodedFrames++; + // get VUI + //ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip; + //ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op; + + //s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL; + //s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS; + //s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t); + //s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t); + + //status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op); + //cemu_assert(status == 0); + } + + void Flush() + { + // set flush mode + ivd_ctl_flush_ip_t s_video_flush_ip{ 0 }; + ivd_ctl_flush_op_t s_video_flush_op{ 0 }; + s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t); + WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op); + if (status != 0) + cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status); + // get all frames from the decoder + while (true) + { + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_dec_ip.pv_stream_buffer = NULL; + s_dec_ip.u4_num_Bytes = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0) + break; + cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be false? + if(s_dec_op.u4_output_present == 0) + continue; + if (H264_IsBotW()) + { + if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) + s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; + } + PushDecodedFrame(s_dec_op); + } + } + + void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo) + { + uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd; + uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht; + + size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd; + size_t outputStride = (imageWidth + 0xFF) & ~0xFF; + + // copy Y + uint8* yOut = bufOut; + for (uint32 row = 0; row < imageHeight; row++) + { + memcpy(yOut, yIn, imageWidth); + yIn += inputStride; + yOut += outputStride; + } + + // copy UV + uint8* uvOut = bufOut + outputStride * imageHeight; + for (uint32 row = 0; row < imageHeight/2; row++) + { + memcpy(uvOut, uvIn, imageWidth); + uvIn += inputStride; + uvOut += outputStride; + } + } + private: + void SetDecoderCoreCount(uint32 coreCount) + { + ih264d_ctl_set_num_cores_ip_t s_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_set_cores_op; + s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES; + s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4 + s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op); + cemu_assert(status == IV_SUCCESS); + } + + bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed) + { + numByteConsumed = 0; + UpdateParameters(true); + + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_dec_ip.pv_stream_buffer = (uint8*)data; + s_dec_ip.u4_num_Bytes = length; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0) + { + cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream"); + return false; + } + numByteConsumed = s_dec_op.u4_num_bytes_consumed; + cemu_assert(status == 0); + if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0) + return false; + UpdateParameters(false); + ReinitBuffers(); + return true; + } + + void ReinitBuffers() + { + ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 }; + ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 }; + WORD32 outlen = 0; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op); + cemu_assert(!status); + + // allocate + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]); + } + // set + ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs + ivd_set_display_frame_op_t s_set_display_frame_op{ 0 }; + + s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; + s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); + s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); + + cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2); + cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0); + + s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs; + + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2; + s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0]; + s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1]; + s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0; + s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0]; + } + + status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op); + cemu_assert(!status); + + + // mark all as released (available) + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; + ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; + + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = i; + + status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); + cemu_assert(!status); + } + } + + void ResetDecoder() + { + ivd_ctl_reset_ip_t s_ctl_ip; + ivd_ctl_reset_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; + s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op); + cemu_assert_debug(status == 0); + } + + void UpdateParameters(bool headerDecodeOnly) + { + ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; + ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; + ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; + ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; + + ps_ctl_ip->u4_disp_wd = 0; + ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; + ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT; + ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME; + ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; + ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); + ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op); + cemu_assert(status == 0); + } + + private: + void DecoderThread() + { + while(!m_threadShouldExit) + { + m_decodeSem.decrementWithWait(); + std::unique_lock _l(m_decodeQueueMtx); + if (m_decodeQueue.empty()) + continue; + uint32 decodeIndex = m_decodeQueue.front(); + m_decodeQueue.erase(m_decodeQueue.begin()); + _l.unlock(); + if(decodeIndex == CMD_FLUSH) + { + Flush(); + _l.lock(); + cemu_assert_debug(m_decodeQueue.empty()); // after flushing the queue should be empty since the sender is waiting for the flush to complete + _l.unlock(); + coreinit::OSSignalEvent(m_flushEvt); + } + else + { + auto& decodedSlice = m_decodedSliceArray[decodeIndex]; + Decode(decodedSlice); + } + } + } + + iv_obj_t* m_codecCtx{nullptr}; + bool m_hasBufferSizeInfo{ false }; + bool m_isBufferedMode{ false }; + uint32 m_numDecodedFrames{0}; + std::vector> m_displayBuf; + + std::thread m_decoderThread; + std::atomic_bool m_threadShouldExit{false}; + }; + + H264DecoderBackend* CreateAVCDecoder() + { + return new H264AVCDecoder(); + } +}; diff --git a/src/Cafe/OS/libs/h264_avc/H264DecInternal.h b/src/Cafe/OS/libs/h264_avc/H264DecInternal.h new file mode 100644 index 00000000..498cccfa --- /dev/null +++ b/src/Cafe/OS/libs/h264_avc/H264DecInternal.h @@ -0,0 +1,139 @@ +#pragma once + +#include "util/helpers/Semaphore.h" +#include "Cafe/OS/libs/coreinit/coreinit_Thread.h" +#include "Cafe/OS/libs/coreinit/coreinit_SysHeap.h" + +#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h" + +namespace H264 +{ + class H264DecoderBackend + { + protected: + struct DataToDecode + { + uint8* m_data; + uint32 m_length; + std::vector m_buffer; + }; + + static constexpr uint32 CMD_FLUSH = 0xFFFFFFFF; + + public: + struct DecodeResult + { + bool isDecoded{false}; + bool hasFrame{false}; // set to true if a full frame was successfully decoded + double timestamp{}; + void* imageOutput{nullptr}; + sint32 frameWidth{0}; + sint32 frameHeight{0}; + uint32 bytesPerRow{0}; + bool cropEnable{false}; + sint32 cropTop{0}; + sint32 cropBottom{0}; + sint32 cropLeft{0}; + sint32 cropRight{0}; + }; + + struct DecodedSlice + { + bool isUsed{false}; + DecodeResult result; + DataToDecode dataToDecode; + }; + + H264DecoderBackend() + { + m_displayQueueEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4); + coreinit::OSInitEvent(m_displayQueueEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); + m_flushEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4); + coreinit::OSInitEvent(m_flushEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); + }; + + virtual ~H264DecoderBackend() + { + coreinit::OSFreeToSystem(m_displayQueueEvt); + coreinit::OSFreeToSystem(m_flushEvt); + }; + + virtual void Init(bool isBufferedMode) = 0; + virtual void Destroy() = 0; + + void QueueForDecode(uint8* data, uint32 length, double timestamp, void* imagePtr) + { + std::unique_lock _l(m_decodeQueueMtx); + + DecodedSlice& ds = GetFreeDecodedSliceEntry(); + + ds.dataToDecode.m_buffer.assign(data, data + length); + ds.dataToDecode.m_data = ds.dataToDecode.m_buffer.data(); + ds.dataToDecode.m_length = length; + + ds.result.isDecoded = false; + ds.result.imageOutput = imagePtr; + ds.result.timestamp = timestamp; + + m_decodeQueue.push_back(std::distance(m_decodedSliceArray.data(), &ds)); + m_decodeSem.increment(); + } + + void QueueFlush() + { + std::unique_lock _l(m_decodeQueueMtx); + m_decodeQueue.push_back(CMD_FLUSH); + m_decodeSem.increment(); + } + + bool GetFrameOutputIfReady(DecodeResult& result) + { + std::unique_lock _l(m_decodeQueueMtx); + if(m_displayQueue.empty()) + return false; + uint32 sliceIndex = m_displayQueue.front(); + DecodedSlice& ds = m_decodedSliceArray[sliceIndex]; + cemu_assert_debug(ds.result.isDecoded); + std::swap(result, ds.result); + ds.isUsed = false; + m_displayQueue.erase(m_displayQueue.begin()); + return true; + } + + coreinit::OSEvent& GetFrameOutputEvent() + { + return *m_displayQueueEvt; + } + + coreinit::OSEvent& GetFlushEvent() + { + return *m_flushEvt; + } + + protected: + DecodedSlice& GetFreeDecodedSliceEntry() + { + for (auto& slice : m_decodedSliceArray) + { + if (!slice.isUsed) + { + slice.isUsed = true; + return slice; + } + } + cemu_assert_suspicious(); + return m_decodedSliceArray[0]; + } + + std::mutex m_decodeQueueMtx; + std::vector m_decodeQueue; // indices into m_decodedSliceArray, in order of decode input + CounterSemaphore m_decodeSem; + std::vector m_displayQueue; // indices into m_decodedSliceArray, in order of frame display output + coreinit::OSEvent* m_displayQueueEvt; // signalled when a new frame is ready for display + coreinit::OSEvent* m_flushEvt; // signalled after flush operation finished and all queued slices are decoded + + // frame output queue + std::mutex m_frameOutputMtx; + std::array m_decodedSliceArray; + }; +} \ No newline at end of file diff --git a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp index d77e551f..36f70f81 100644 --- a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp +++ b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp @@ -319,6 +319,17 @@ bool parseNAL_pic_parameter_set_rbsp(h264ParserState_t* h264ParserState, h264Par return true; } +bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps) +{ + h264ParserState_t parserState; + RBSPInputBitstream nalStream(data, length); + bool r = parseNAL_seq_parameter_set_rbsp(&parserState, nullptr, nalStream); + if(!r || !parserState.hasSPS) + return false; + sps = parserState.sps; + return true; +} + void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader) { if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI()) @@ -688,9 +699,8 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se else if (sps.pic_order_cnt_type == 2) { // display order matches decode order - uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum; - ; + uint32 FrameNumOffset; if (sliceHeader->IdrPicFlag) { @@ -706,9 +716,6 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum(); else FrameNumOffset = prevFrameNumOffset; - - - } uint32 tempPicOrderCnt; diff --git a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h index ee32ca8b..6f2b3cf6 100644 --- a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h +++ b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h @@ -513,6 +513,8 @@ typedef struct void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true); sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length); +bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps); + void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4); void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8); diff --git a/src/Cafe/OS/libs/nsyshid/Skylander.cpp b/src/Cafe/OS/libs/nsyshid/Skylander.cpp index a9888787..1b4515ef 100644 --- a/src/Cafe/OS/libs/nsyshid/Skylander.cpp +++ b/src/Cafe/OS/libs/nsyshid/Skylander.cpp @@ -978,7 +978,7 @@ namespace nsyshid { for (const auto& it : GetListSkylanders()) { - if(it.first.first == skyId && it.first.second == skyVar) + if (it.first.first == skyId && it.first.second == skyVar) { return it.second; } diff --git a/src/Cafe/OS/libs/nsyshid/Skylander.h b/src/Cafe/OS/libs/nsyshid/Skylander.h index 95eaff0c..986ef185 100644 --- a/src/Cafe/OS/libs/nsyshid/Skylander.h +++ b/src/Cafe/OS/libs/nsyshid/Skylander.h @@ -50,7 +50,7 @@ namespace nsyshid std::unique_ptr skyFile; uint8 status = 0; std::queue queuedStatus; - std::array data{}; + std::array data{}; uint32 lastId = 0; void Save(); diff --git a/src/Cemu/napi/napi_helper.cpp b/src/Cemu/napi/napi_helper.cpp index 164de7e5..182c5371 100644 --- a/src/Cemu/napi/napi_helper.cpp +++ b/src/Cemu/napi/napi_helper.cpp @@ -107,6 +107,7 @@ CurlRequestHelper::CurlRequestHelper() curl_easy_setopt(m_curl, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(m_curl, CURLOPT_MAXREDIRS, 2); + curl_easy_setopt(m_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); if(GetConfig().proxy_server.GetValue() != "") { @@ -263,6 +264,7 @@ CurlSOAPHelper::CurlSOAPHelper(NetworkService service) m_curl = curl_easy_init(); curl_easy_setopt(m_curl, CURLOPT_WRITEFUNCTION, __curlWriteCallback); curl_easy_setopt(m_curl, CURLOPT_WRITEDATA, this); + curl_easy_setopt(m_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); // SSL if (!IsNetworkServiceSSLDisabled(service)) diff --git a/src/Common/ExceptionHandler/ExceptionHandler.cpp b/src/Common/ExceptionHandler/ExceptionHandler.cpp index b6755fd8..7530a2eb 100644 --- a/src/Common/ExceptionHandler/ExceptionHandler.cpp +++ b/src/Common/ExceptionHandler/ExceptionHandler.cpp @@ -6,8 +6,6 @@ #include "Cafe/HW/Espresso/Debugger/GDBStub.h" #include "ExceptionHandler.h" -void DebugLogStackTrace(OSThread_t* thread, MPTR sp); - bool crashLogCreated = false; bool CrashLog_Create() @@ -97,7 +95,7 @@ void ExceptionHandler_LogGeneralInfo() MPTR currentStackVAddr = hCPU->gpr[1]; CrashLog_WriteLine(""); CrashLog_WriteHeader("PPC stack trace"); - DebugLogStackTrace(currentThread, currentStackVAddr); + DebugLogStackTrace(currentThread, currentStackVAddr, true); // stack dump CrashLog_WriteLine(""); diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index 790a001a..61707519 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -552,6 +552,9 @@ inline uint32 GetTitleIdLow(uint64 titleId) #include "Cafe/HW/Espresso/PPCState.h" #include "Cafe/HW/Espresso/PPCCallback.h" +// PPC stack trace printer +void DebugLogStackTrace(struct OSThread_t* thread, MPTR sp, bool printSymbols = false); + // generic formatter for enums (to underlying) template requires std::is_enum_v diff --git a/src/config/CemuConfig.h b/src/config/CemuConfig.h index 0dc23ce1..5f761e01 100644 --- a/src/config/CemuConfig.h +++ b/src/config/CemuConfig.h @@ -520,7 +520,7 @@ struct CemuConfig struct { ConfigValue emulate_skylander_portal{false}; - ConfigValue emulate_infinity_base{true}; + ConfigValue emulate_infinity_base{false}; }emulated_usb_devices{}; private: diff --git a/src/gui/EmulatedUSBDevices/EmulatedUSBDeviceFrame.cpp b/src/gui/EmulatedUSBDevices/EmulatedUSBDeviceFrame.cpp index f4784f35..3a0f534a 100644 --- a/src/gui/EmulatedUSBDevices/EmulatedUSBDeviceFrame.cpp +++ b/src/gui/EmulatedUSBDevices/EmulatedUSBDeviceFrame.cpp @@ -398,7 +398,7 @@ CreateInfinityFigureDialog::CreateInfinityFigureDialog(wxWindow* parent, uint8 s { wxMessageDialog idError(this, "Error Converting Figure Number!", "Number Entered is Invalid"); idError.ShowModal(); - this->EndModal(0);; + this->EndModal(0); } uint32 figNum = longFigNum & 0xFFFFFFFF; auto figure = nsyshid::g_infinitybase.FindFigure(figNum); @@ -408,7 +408,7 @@ CreateInfinityFigureDialog::CreateInfinityFigureDialog(wxWindow* parent, uint8 s "BIN files (*.bin)|*.bin", wxFD_SAVE | wxFD_OVERWRITE_PROMPT); if (saveFileDialog.ShowModal() == wxID_CANCEL) - this->EndModal(0);; + this->EndModal(0); m_filePath = saveFileDialog.GetPath(); diff --git a/src/gui/debugger/SymbolCtrl.cpp b/src/gui/debugger/SymbolCtrl.cpp index cb1f3b1a..aa862987 100644 --- a/src/gui/debugger/SymbolCtrl.cpp +++ b/src/gui/debugger/SymbolCtrl.cpp @@ -46,25 +46,25 @@ SymbolListCtrl::SymbolListCtrl(wxWindow* parent, const wxWindowID& id, const wxP void SymbolListCtrl::OnGameLoaded() { m_data.clear(); - long itemId = 0; const auto symbol_map = rplSymbolStorage_lockSymbolMap(); for (auto const& [address, symbol_info] : symbol_map) { if (symbol_info == nullptr || symbol_info->symbolName == nullptr) continue; + wxString libNameWX = wxString::FromAscii((const char*)symbol_info->libName); + wxString symbolNameWX = wxString::FromAscii((const char*)symbol_info->symbolName); + wxString searchNameWX = libNameWX + symbolNameWX; + searchNameWX.MakeLower(); + auto new_entry = m_data.try_emplace( symbol_info->address, - (char*)(symbol_info->symbolName), - (char*)(symbol_info->libName), - "", + symbolNameWX, + libNameWX, + searchNameWX, false ); - new_entry.first->second.searchName += new_entry.first->second.name; - new_entry.first->second.searchName += new_entry.first->second.libName; - new_entry.first->second.searchName.MakeLower(); - if (m_list_filter.IsEmpty()) new_entry.first->second.visible = true; else if (new_entry.first->second.searchName.Contains(m_list_filter)) diff --git a/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp b/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp index dfbaf76e..f4e5b7af 100644 --- a/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp +++ b/src/gui/windows/PPCThreadsViewer/DebugPPCThreadsWindow.cpp @@ -277,12 +277,10 @@ void DebugPPCThreadsWindow::RefreshThreadList() m_thread_list->SetScrollPos(0, scrollPos, true); } -void DebugLogStackTrace(OSThread_t* thread, MPTR sp); - void DebugPPCThreadsWindow::DumpStackTrace(OSThread_t* thread) { cemuLog_log(LogType::Force, "Dumping stack trace for thread {0:08x} LR: {1:08x}", memory_getVirtualOffsetFromPointer(thread), _swapEndianU32(thread->context.lr)); - DebugLogStackTrace(thread, _swapEndianU32(thread->context.gpr[1])); + DebugLogStackTrace(thread, _swapEndianU32(thread->context.gpr[1]), true); } void DebugPPCThreadsWindow::PresentProfileResults(OSThread_t* thread, const std::unordered_map& samples) diff --git a/src/resource/embedded/fontawesome.S b/src/resource/embedded/fontawesome.S index 29b4f93a..db23e7ae 100644 --- a/src/resource/embedded/fontawesome.S +++ b/src/resource/embedded/fontawesome.S @@ -1,4 +1,4 @@ -.rodata +.section .rodata,"",%progbits .global g_fontawesome_data, g_fontawesome_size g_fontawesome_data: @@ -6,3 +6,4 @@ g_fontawesome_data: g_fontawesome_size: .int g_fontawesome_size - g_fontawesome_data +.section .note.GNU-stack,"",%progbits \ No newline at end of file