From 4519a59d746dc0eb2a427c0f379c6c4c600d89dd Mon Sep 17 00:00:00 2001 From: emiyl Date: Tue, 4 Oct 2022 14:24:14 +0100 Subject: [PATCH] [ih264] per-function target attribute on clang and GCC (#328) --- dependencies/ih264d/CMakeLists.txt | 4 --- .../ih264_chroma_intra_pred_filters_ssse3.c | 11 ++++++++ .../common/x86/ih264_deblk_chroma_ssse3.c | 18 ++++++++++++ .../common/x86/ih264_deblk_luma_ssse3.c | 12 ++++++++ .../x86/ih264_ihadamard_scaling_sse42.c | 8 ++++++ .../x86/ih264_ihadamard_scaling_ssse3.c | 7 +++++ .../x86/ih264_inter_pred_filters_ssse3.c | 16 +++++++++++ .../x86/ih264_iquant_itrans_recon_dc_ssse3.c | 9 ++++++ .../x86/ih264_iquant_itrans_recon_sse42.c | 8 ++++++ .../x86/ih264_iquant_itrans_recon_ssse3.c | 8 ++++++ .../x86/ih264_luma_intra_pred_filters_ssse3.c | 28 +++++++++++++++++++ .../ih264d/common/x86/ih264_mem_fns_ssse3.c | 8 ++++++ .../ih264d/common/x86/ih264_padding_ssse3.c | 10 +++++++ .../common/x86/ih264_resi_trans_quant_sse42.c | 10 +++++++ .../common/x86/ih264_weighted_pred_sse42.c | 12 ++++++++ .../x86/ih264d_function_selector_sse42.c | 7 +++++ .../x86/ih264d_function_selector_ssse3.c | 7 +++++ 17 files changed, 179 insertions(+), 4 deletions(-) diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt index 4f538f69..295b028a 100644 --- a/dependencies/ih264d/CMakeLists.txt +++ b/dependencies/ih264d/CMakeLists.txt @@ -6,10 +6,6 @@ set(LIBAVCDEC_X86_INCLUDES "common/x86" "decoder/x86") include_directories("common/" "decoder/" ${LIBAVCDEC_X86_INCLUDES}) -if((CMAKE_C_COMPILER_ID MATCHES "GNU") OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) - add_compile_options(-mssse3 -mavx2) -endif() - add_library (ih264d "common/ih264_buf_mgr.c" "common/ih264_buf_mgr.h" diff --git a/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c index d43ce207..502420b1 100644 --- a/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c @@ -56,6 +56,11 @@ #include "ih264_platform_macros.h" #include "ih264_intra_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif /*****************************************************************************/ /* Chroma Intra prediction 8x8 filters */ @@ -93,6 +98,8 @@ * ****************************************************************************** */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -169,6 +176,8 @@ void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -237,6 +246,8 @@ void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, * ****************************************************************************** */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c b/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c index a36447a2..d73d9d35 100644 --- a/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c @@ -53,6 +53,12 @@ #include "ih264_deblk_edge_filters.h" #include "ih264_macros.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ @@ -91,6 +97,8 @@ /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -274,6 +282,8 @@ void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -424,6 +434,8 @@ void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -645,6 +657,8 @@ void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -829,6 +843,8 @@ void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -963,6 +979,8 @@ void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, diff --git a/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c b/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c index e29bebbe..c135f6b6 100644 --- a/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c @@ -53,6 +53,12 @@ #include "ih264_deblk_edge_filters.h" #include "ih264_macros.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ @@ -87,6 +93,7 @@ /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -508,6 +515,7 @@ void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -847,6 +855,7 @@ void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1142,6 +1151,7 @@ void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1439,6 +1449,7 @@ void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1758,6 +1769,7 @@ void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, diff --git a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c index 3c4bb1c6..bf8e88ed 100644 --- a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c @@ -52,6 +52,12 @@ #include #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /* ******************************************************************************** * @@ -87,6 +93,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, @@ -202,6 +209,7 @@ void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3); } +ATTRIBUTE_SSE42 void ih264_ihadamard_scaling_2x2_uv_sse42(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, diff --git a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c index b4d483f1..4dc6d827 100644 --- a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -85,6 +91,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, diff --git a/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c index 480a8c7c..7927eeb6 100644 --- a/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c @@ -54,6 +54,12 @@ #include "ih264_platform_macros.h" #include "ih264_inter_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Constant Data variables */ /*****************************************************************************/ @@ -87,6 +93,7 @@ /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -213,6 +220,7 @@ void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -478,6 +486,7 @@ void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -764,6 +773,7 @@ void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1488,6 +1498,7 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1782,6 +1793,7 @@ void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -2107,6 +2119,7 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -2675,6 +2688,7 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -3285,6 +3299,7 @@ void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -3991,6 +4006,7 @@ void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_chroma_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c index bcfe503f..10dd2647 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -98,6 +104,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -224,6 +231,7 @@ void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -385,6 +393,7 @@ void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c index a7b9e824..e97ca4d0 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /* ******************************************************************************** * @@ -97,6 +103,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -348,6 +355,7 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c index 506be495..96773253 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -97,6 +103,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -366,6 +373,7 @@ void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_8x8_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c index a1721d52..417e986b 100644 --- a/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c @@ -75,6 +75,12 @@ #include "ih264_platform_macros.h" #include "ih264_intra_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /******************* LUMA INTRAPREDICTION *******************/ @@ -114,6 +120,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -173,6 +180,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -238,6 +246,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -316,6 +325,7 @@ void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -400,6 +410,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -487,6 +498,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -579,6 +591,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -675,6 +688,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -764,6 +778,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -864,6 +879,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -921,6 +937,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -985,6 +1002,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1078,6 +1096,7 @@ void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1176,6 +1195,7 @@ void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1278,6 +1298,7 @@ void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1398,6 +1419,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1502,6 +1524,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1598,6 +1621,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1699,6 +1723,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1778,6 +1803,7 @@ void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1875,6 +1901,7 @@ void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1998,6 +2025,7 @@ void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c b/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c index 8ca1f3e5..be3d622c 100644 --- a/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c @@ -50,6 +50,12 @@ #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* * @@ -78,6 +84,7 @@ +ATTRIBUTE_SSSE3 void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) { int col; @@ -117,6 +124,7 @@ void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_byte */ +ATTRIBUTE_SSSE3 void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) { int col; diff --git a/dependencies/ih264d/common/x86/ih264_padding_ssse3.c b/dependencies/ih264d/common/x86/ih264_padding_ssse3.c index 43ded8e7..d2aa368a 100644 --- a/dependencies/ih264d/common/x86/ih264_padding_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_padding_ssse3.c @@ -49,6 +49,12 @@ #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* @@ -89,6 +95,7 @@ ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -156,6 +163,7 @@ void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -222,6 +230,7 @@ void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -289,6 +298,7 @@ void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, diff --git a/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c b/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c index f4f5cbfa..fa3442f0 100644 --- a/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c @@ -51,6 +51,12 @@ #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" #include + +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif /** ******************************************************************************* * @@ -103,6 +109,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, @@ -376,6 +383,7 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WORD16 *pi2_out, WORD32 src_strd,WORD32 pred_strd, const UWORD16 *pu2_scale_matrix, @@ -663,6 +671,7 @@ void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WO * */ +ATTRIBUTE_SSE42 void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, @@ -892,6 +901,7 @@ void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, * */ +ATTRIBUTE_SSE42 void ih264_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, diff --git a/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c b/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c index 48f1f542..8e10db28 100644 --- a/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c @@ -50,6 +50,12 @@ #include "ih264_platform_macros.h" #include "ih264_weighted_pred.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /*****************************************************************************/ /* Function definitions . */ /*****************************************************************************/ @@ -82,6 +88,7 @@ /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -245,6 +252,7 @@ void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -375,6 +383,7 @@ void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -605,6 +614,7 @@ void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -814,6 +824,7 @@ void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -1101,6 +1112,7 @@ void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, diff --git a/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c b/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c index 0c493d22..c7636f38 100644 --- a/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c +++ b/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c @@ -60,6 +60,12 @@ #include "ih264d_structs.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /** ******************************************************************************* @@ -79,6 +85,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec) { ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_sse42; diff --git a/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c b/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c index 17862139..cd8043c6 100644 --- a/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c +++ b/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c @@ -60,6 +60,12 @@ #include "ih264d_structs.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* @@ -79,6 +85,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec) {