Per-function target attribute on clang and GCC. (#152)

This commit is contained in:
Tom Lally 2022-09-02 18:10:41 +01:00 committed by GitHub
parent 68fa5b32a1
commit f5972dfbb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 29 deletions

View File

@ -1,9 +1,5 @@
project(CemuCafe)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
add_compile_options(-mssse3 -mavx2)
endif()
file(GLOB_RECURSE CPP_FILES *.cpp)
file(GLOB_RECURSE H_FILES *.h)

View File

@ -3,10 +3,18 @@
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#if BOOST_OS_LINUX
#if __GNUC__
#include <immintrin.h>
#endif
#ifdef __GNUC__
#define ATTRIBUTE_AVX2 __attribute__((target("avx2")))
#define ATTRIBUTE_SSE41 __attribute__((target("sse4.1")))
#else
#define ATTRIBUTE_AVX2
#define ATTRIBUTE_SSE41
#endif
struct
{
const void* lastPtr;
@ -284,10 +292,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
indexMax = std::max(count, 1u) - 1;
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_AVX2
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// using AVX + AVX2 we can process 16 indices at a time
@ -352,14 +357,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_SSE41
void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// SSSE3 & SSE4.1 optimized decoding
@ -423,14 +421,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_AVX2
void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// using AVX + AVX2 we can process 8 indices at a time
@ -497,10 +488,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
template<typename T>
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
{