re3-wiiu/rwsdk/include/d3d8/rtintel.h
2019-05-18 12:39:39 +02:00

1207 lines
49 KiB
C

/**
* Intel specific support toolkit
*/
/**********************************************************************
*
* File : rtintel.h
*
* Abstract : Intel specific support/emulation
*
**********************************************************************
*
* This file is a product of Criterion Software Ltd.
*
* This file is provided as is with no warranties of any kind and is
* provided without any obligation on Criterion Software Ltd. or
* Canon Inc. to assist in its use or modification.
*
* Criterion Software Ltd. will not, under any
* circumstances, be liable for any lost revenue or other damages arising
* from the use of this file.
*
* Copyright (c) 1998 Criterion Software Ltd.
* All Rights Reserved.
*
* RenderWare is a trademark of Canon Inc.
*
************************************************************************/
#ifndef RTINTEL_H
#define RTINTEL_H
/**
* \defgroup rtintel RtIntel
* \ingroup rttool
*
* Intel CPU Toolkit for RenderWare.
*/
/****************************************************************************
Include files
*/
/*
* Pick up
* typedef struct _rwResEntryTag RwResEntry;
* from baresour.h (internal) / rwcore.h (external)
*/
/****************************************************************************
Global Types
*/
#include "rtintel.rpe" /* automatically generated header file */
#if (!defined(RW_FIXED_64))
typedef struct RwFixed64 RwFixed64;
struct RwFixed64
{
RwInt32 msb;
RwUInt32 lsb;
};
#define RW_FIXED_64
#endif /* (!defined(RW_FIXED_64)) */
#define doubleFromRwFixed64(x) \
( ((double)((x).msb))*((double)(1<<16))*((double)(1<<16)) \
+ ((double)((x).lsb)) )
#if (!defined(RPINTELTIMEFUNCTION))
typedef RwBool(*RtIntelTimeFunction) (void *data);
#define RPINTELTIMEFUNCTION
#endif /* (!defined(RPINTELTIMEFUNCTION)) */
typedef struct RtIntelOverload RtIntelOverload;
typedef void (*RwTransformFunction) (RwResEntry * repEntry);
struct RtIntelOverload
{
rwMatrixMultFn MatrixMultiplyFunction;
rwVectorMultFn VectorMultPointFunction;
rwVectorMultFn VectorMultVectorFunction;
RwTransformFunction TransformFunction;
};
#if (defined(__ICL))
#define DEFINED__ICL " __ICL"
#define UNDEFINED__ICL ""
#else /* (defined(__ICL)) */
#define DEFINED__ICL ""
#define UNDEFINED__ICL " __ICL"
#endif /* (defined(__ICL)) */
#if (defined(_MSC_VER))
# pragma comment ( user, "comment:" __FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : " "DEFINED " DEFINED__ICL " ; UNDEFINED " UNDEFINED__ICL )
/* # pragma message (__FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : " "DEFINED " DEFINED__ICL " ; UNDEFINED " UNDEFINED__ICL ) */
/* # pragma comment ( user, "comment:" __FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : " "DEFINED " DEFINED__ICL " ; UNDEFINED " UNDEFINED__ICL ) */
#if (! (defined(__ICL) || defined(XBOX_DRVMODEL_H)) )
#pragma message (__DATE__ " " __TIME__ " " __FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : No MMX intrinsics - defaulting to software emulation")
#pragma message (__DATE__ " " __TIME__ " " __FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : No SSE intrinsics - defaulting to software emulation")
#pragma comment ( user, "comment:" __FILE__ "(" RW_STRINGIFY_EXPANDED(__LINE__) ") : No MMX intrinsics - defaulting to software emulation")
#endif /* (! (defined(__ICL) || defined(XBOX_DRVMODEL_H)) ) */
#endif /* (defined(_MSC_VER)) */
/*
* MMX
*/
#if (defined(__ICL))
#if (!defined(MMINTRIN_H))
#include "mmintrin.h"
#define MMINTRIN_H
#endif /* (!defined(MMINTRIN_H)) */
#else /* (defined(__ICL)) */
#if (defined(_MSC_VER))
#if (!defined(__M64))
typedef __int64 Rt_m64;
#define __M64
#endif /* (!defined(__M64)) */
#else /* (defined(_MSC_VER)) -- e.g. __GNUC__ */
#if (!defined(__M64))
typedef RwInt64 Rt_m64;
#define __M64
#endif /* (!defined(__M64)) */
#endif /* (defined(_MSC_VER)) */
#endif /* (defined(__ICL)) */
/*
* SSE
*/
/*
* From
* ccomp.pdf
* 12 Intel C/C++ Compiler User's Guide
* for Win32 Systems With Katmai New Instruction Support
* --------------------------------------------------------
* Functionality Intrinsics Usage
* You need only define one preprocessor symbol and include the header file
* xmmintrin.h in your application to use the following functionality
* intrinsics:
* #define _MM_FUNCTIONALITY
* #include "xmmintrin.h"
* To encourage the compiler to inline the functionality intrinsic functions for
* better performance, consider using the -Qip and -Qipo compiler switches.
*/
#if (defined(__ICL))
/* #define _MM2_FUNCTIONALITY */
/* #define _MM_FUNCTIONALITY */
/* #define _MM_NO_ABORT */
/* #define _MM_NO_ACCURACY */
/* #define _MM_NO_ALIGN_CHECK */
/* #define _MM_NO_INLINE */
/*
* Undefine "or", since this is valid assembler; e.g. in
* SDK10/include/xmm_func.h
* _asm {
* push eax
* fld f
* fstcw saved_cw
* mov eax, saved_cw
* or eax, 3072
* mov new_cw, eax
* fldcw new_cw
* fistp ret
* fldcw saved_cw
* pop eax
* }
*/
#if (!defined(XMMINTRIN_H))
#include "xmmintrin.h"
#define XMMINTRIN_H
#endif /* (!defined(XMMINTRIN_H)) */
typedef __m64 Rt_m64;
typedef __m128 Rt_m128;
#if (450 <= __ICL)
#if (!defined(EMMINTRIN_H))
#include "emmintrin.h"
#define EMMINTRIN_H
typedef __m128d Rt_m128d;
typedef __m128i Rt_m128i;
#endif /* (!defined(EMMINTRIN_H)) */
#else /* (450 <= __ICL) */
typedef __m128 Rt_m128d;
typedef __m128 Rt_m128i;
#endif /* (450 <= __ICL) */
/*
* Report SSE options as compiler messages and object file comments
*/
#ifdef _MM2_FUNCTIONALITY
#define DEFINED__MM2_FUNCTIONALITY " _MM2_FUNCTIONALITY"
#define UNDEFINED__MM2_FUNCTIONALITY ""
#else /* _MM2_FUNCTIONALITY */
#define DEFINED__MM2_FUNCTIONALITY ""
#define UNDEFINED__MM2_FUNCTIONALITY " _MM2_FUNCTIONALITY"
#endif /* _MM2_FUNCTIONALITY */
#ifdef _MM_FUNCTIONALITY
#define DEFINED__MM_FUNCTIONALITY DEFINED__MM2_FUNCTIONALITY ## " _MM_FUNCTIONALITY"
#define UNDEFINED__MM_FUNCTIONALITY UNDEFINED__MM2_FUNCTIONALITY
#else /* _MM_FUNCTIONALITY */
#define DEFINED__MM_FUNCTIONALITY DEFINED__MM2_FUNCTIONALITY
#define UNDEFINED__MM_FUNCTIONALITY UNDEFINED__MM2_FUNCTIONALITY ## " _MM_FUNCTIONALITY"
#endif /* _MM_FUNCTIONALITY */
#ifdef _MM_NO_ABORT
#define DEFINED__MM_NO_ABORT DEFINED__MM_FUNCTIONALITY ## " _MM_NO_ABORT"
#define UNDEFINED__MM_NO_ABORT UNDEFINED__MM_FUNCTIONALITY
#else /* _MM_NO_ABORT */
#define DEFINED__MM_NO_ABORT DEFINED__MM_FUNCTIONALITY
#define UNDEFINED__MM_NO_ABORT UNDEFINED__MM_FUNCTIONALITY ## " _MM_NO_ABORT"
#endif /* _MM_NO_ABORT */
#ifdef _MM_NO_ACCURACY
#define DEFINED__MM_NO_ACCURACY DEFINED__MM_NO_ABORT ## " _MM_NO_ACCURACY"
#define UNDEFINED__MM_NO_ACCURACY UNDEFINED__MM_NO_ABORT
#else /* _MM_NO_ACCURACY */
#define DEFINED__MM_NO_ACCURACY DEFINED__MM_NO_ABORT
#define UNDEFINED__MM_NO_ACCURACY UNDEFINED__MM_NO_ABORT ## " _MM_NO_ACCURACY"
#endif /* _MM_NO_ACCURACY */
#ifdef _MM_NO_ALIGN_CHECK
#define DEFINED__MM_NO_ALIGN_CHECK DEFINED__MM_NO_ACCURACY ## " _MM_NO_ALIGN_CHECK"
#define UNDEFINED__MM_NO_ALIGN_CHECK UNDEFINED__MM_NO_ACCURACY
#else /* _MM_NO_ALIGN_CHECK */
#define DEFINED__MM_NO_ALIGN_CHECK DEFINED__MM_NO_ACCURACY
#define UNDEFINED__MM_NO_ALIGN_CHECK UNDEFINED__MM_NO_ACCURACY ## " _MM_NO_ALIGN_CHECK"
#endif /* _MM_NO_ALIGN_CHECK */
#ifdef _MM_NO_INLINE
#define DEFINED__MM_NO_INLINE DEFINED__MM_NO_ALIGN_CHECK ## " _MM_NO_INLINE"
#define UNDEFINED__MM_NO_INLINE UNDEFINED__MM_NO_ALIGN_CHECK
#else /* _MM_NO_INLINE */
#define DEFINED__MM_NO_INLINE DEFINED__MM_NO_ALIGN_CHECK
#define UNDEFINED__MM_NO_INLINE UNDEFINED__MM_NO_ALIGN_CHECK ## " _MM_NO_INLINE"
#endif /* _MM_NO_INLINE */
#pragma comment ( user, "comment:" __DATE__" " __TIME__ " - " __FILE__ ":" RW_STRINGIFY_EXPANDED(__LINE__) )
#pragma comment ( user, "comment:" "DEFINED :" DEFINED__MM_NO_INLINE )
#pragma comment ( user, "comment:" "UNDEFINED:" UNDEFINED__MM_NO_INLINE )
#pragma message (__DATE__" " __TIME__ " - " __FILE__ ":" RW_STRINGIFY_EXPANDED(__LINE__) )
#pragma message ("DEFINED :" DEFINED__MM_NO_INLINE )
#pragma message ("UNDEFINED:" UNDEFINED__MM_NO_INLINE )
#else /* (defined(__ICL)) */
#define _MM_HINT_T0 1
#define _MM_HINT_T1 2
#define _MM_HINT_T2 3
#define _MM_HINT_NTA 0
#if (defined(__R5900__))
typedef RwInt128 Rt_m128;
#else /* (defined(__R5900__)) */
#if (!defined(_PAIR__M64))
struct Rt_m128
{
Rt_m64 lo;
Rt_m64 hi;
};
#define _PAIR__M64
typedef struct Rt_m128 Rt_m128;
#endif /* (!defined(_PAIR__M64)) */
#endif /* (defined(__R5900__)) */
typedef Rt_m128 Rt_m128d;
typedef Rt_m128 Rt_m128i;
#endif /* (defined(__ICL)) */
typedef struct RtIntelV4d RtIntelV4d;
struct RtIntelV4d
{
RwReal w;
RwV3d v3d;
};
typedef union _RpSSEOverlayM128 RpSSEOverlayM128;
union _RpSSEOverlayM128
{
float _f[4];
RwInt32 _d[4];
RwUInt32 ud[4];
RwInt16 _w[8];
RwUInt16 uw[8];
RwInt8 _b[16];
RwUInt8 ub[16];
Rt_m64 m64[2];
Rt_m128 m128;
RtIntelV4d v4d;
RwSplitBits bits[4];
};
typedef RpSSEOverlayM128 RpWNIOverlayM128;
typedef union _RpWNIOverlayM128d RpWNIOverlayM128d;
union _RpWNIOverlayM128d
{
double df[2];
float _f[4];
RwInt32 _d[4];
RwUInt32 ud[4];
RwInt16 _w[8];
RwUInt16 uw[8];
RwInt8 _b[16];
RwUInt8 ub[16];
Rt_m64 m64[2];
Rt_m128d m128d;
RtIntelV4d v4d;
RwSplitBits bits[4];
};
typedef union _RpWNIOverlayM128i RpWNIOverlayM128i;
union _RpWNIOverlayM128i
{
double df[2];
float _f[4];
RwInt32 _d[4];
RwUInt32 ud[4];
RwInt16 _w[8];
RwUInt16 uw[8];
RwInt8 _b[16];
RwUInt8 ub[16];
Rt_m64 m64[2];
Rt_m128i m128i;
RtIntelV4d v4d;
RwSplitBits bits[4];
};
#define RWUNALIGNED16BYTE(ptr) (0x0000000FUL & ((RwUInt32)(ptr)))
/*--- Plugin API Functions ---*/
#ifdef __cplusplus
extern "C"
{
#endif /* __cplusplus */
/*
* MMX
*/
/* General support intrinsics */
extern void Rt_m_empty(void);
extern Rt_m64 Rt_m_from_int(int i);
extern int Rt_m_to_int(Rt_m64 m);
extern Rt_m64 Rt_m_packsswb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_packssdw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_packuswb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpckhbw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpckhwd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpckhdq(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpcklbw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpcklwd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_punpckldq(Rt_m64 m1, Rt_m64 m2);
/* Packed arithmetic intrinsics */
extern Rt_m64 Rt_m_paddb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddsb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddsw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddusb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_paddusw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubsb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubsw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubusb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_psubusw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pmaddwd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pmulhw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pmullw(Rt_m64 m1, Rt_m64 m2);
/* Shift intrinsics */
extern Rt_m64 Rt_m_psllw(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psllwi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_pslld(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_pslldi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psllq(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psllqi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psraw(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psrawi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psrad(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psradi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psrlw(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psrlwi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psrld(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psrldi(Rt_m64 m, int count);
extern Rt_m64 Rt_m_psrlq(Rt_m64 m, Rt_m64 count);
extern Rt_m64 Rt_m_psrlqi(Rt_m64 m, int count);
/* Logical intrinsics */
extern Rt_m64 Rt_m_pand(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pandn(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_por(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pxor(Rt_m64 m1, Rt_m64 m2);
/* Comparision intrinsics */
extern Rt_m64 Rt_m_pcmpeqb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pcmpeqw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pcmpeqd(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pcmpgtb(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pcmpgtw(Rt_m64 m1, Rt_m64 m2);
extern Rt_m64 Rt_m_pcmpgtd(Rt_m64 m1, Rt_m64 m2);
/*
* SSE
*/
/*
* Arithmetic Operations
*/
extern Rt_m128 Rt_mm_add_ss(Rt_m128 a, Rt_m128 b) /* ADDSS */ ;
extern Rt_m128 Rt_mm_add_ps(Rt_m128 a, Rt_m128 b) /* ADDPS */ ;
extern Rt_m128 Rt_mm_sub_ss(Rt_m128 a, Rt_m128 b) /* SUBSS */ ;
extern Rt_m128 Rt_mm_sub_ps(Rt_m128 a, Rt_m128 b) /* SUBPS */ ;
extern Rt_m128 Rt_mm_mul_ss(Rt_m128 a, Rt_m128 b) /* MULSS */ ;
extern Rt_m128 Rt_mm_mul_ps(Rt_m128 a, Rt_m128 b) /* MULPS */ ;
extern Rt_m128 Rt_mm_div_ss(Rt_m128 a, Rt_m128 b) /* DIVSS */ ;
extern Rt_m128 Rt_mm_div_ps(Rt_m128 a, Rt_m128 b) /* DIVPS */ ;
extern Rt_m128 Rt_mm_sqrt_ss(Rt_m128 a) /* SQRTSS */ ;
extern Rt_m128 Rt_mm_sqrt_ps(Rt_m128 a) /* SQRTPS */ ;
extern Rt_m128 Rt_mm_rcp_ss(Rt_m128 a) /* RCPSS */ ;
extern Rt_m128 Rt_mm_rcp_ps(Rt_m128 a) /* RCPPS */ ;
extern Rt_m128 Rt_mm_rsqrt_ss(Rt_m128 a) /* RSQRTSS */ ;
extern Rt_m128 Rt_mm_rsqrt_ps(Rt_m128 a) /* RSQRTPS */ ;
extern Rt_m128 Rt_mm_min_ss(Rt_m128 a, Rt_m128 b) /* MINSS */ ;
extern Rt_m128 Rt_mm_min_ps(Rt_m128 a, Rt_m128 b) /* MINPS */ ;
extern Rt_m128 Rt_mm_max_ss(Rt_m128 a, Rt_m128 b) /* MAXSS */ ;
extern Rt_m128 Rt_mm_max_ps(Rt_m128 a, Rt_m128 b) /* MAXPS */ ;
/*
* Logical Operations
*/
extern Rt_m128 Rt_mm_and_ps(Rt_m128 a, Rt_m128 b) /* ANDPS */ ;
extern Rt_m128 Rt_mm_andnot_ps(Rt_m128 a,
Rt_m128 b) /* ANDNPS */ ;
extern Rt_m128 Rt_mm_or_ps(Rt_m128 a, Rt_m128 b) /* ORPS */ ;
extern Rt_m128 Rt_mm_xor_ps(Rt_m128 a, Rt_m128 b) /* XORPS */ ;
/*
* Comparisons
*/
extern Rt_m128 Rt_mm_cmpeq_ss(Rt_m128 a,
Rt_m128 b) /* CMPEQSS */ ;
extern Rt_m128 Rt_mm_cmpeq_ps(Rt_m128 a,
Rt_m128 b) /* CMPEQPS */ ;
extern Rt_m128 Rt_mm_cmplt_ss(Rt_m128 a,
Rt_m128 b) /* CMPLTSS */ ;
extern Rt_m128 Rt_mm_cmplt_ps(Rt_m128 a,
Rt_m128 b) /* CMPLTPS */ ;
extern Rt_m128 Rt_mm_cmple_ss(Rt_m128 a,
Rt_m128 b) /* CMPLESS */ ;
extern Rt_m128 Rt_mm_cmple_ps(Rt_m128 a,
Rt_m128 b) /* CMPLEPS */ ;
extern Rt_m128 Rt_mm_cmpgt_ss(Rt_m128 a, Rt_m128 b) /* CMPLTSS r */
;
extern Rt_m128 Rt_mm_cmpgt_ps(Rt_m128 a, Rt_m128 b) /* CMPLTPS r */
;
extern Rt_m128 Rt_mm_cmpge_ss(Rt_m128 a, Rt_m128 b) /* CMPLESS r */
;
extern Rt_m128 Rt_mm_cmpge_ps(Rt_m128 a, Rt_m128 b) /* CMPLEPS r */
;
extern Rt_m128 Rt_mm_cmpneq_ss(Rt_m128 a,
Rt_m128 b) /* CMPNEQSS */ ;
extern Rt_m128 Rt_mm_cmpneq_ps(Rt_m128 a,
Rt_m128 b) /* CMPNEQPS */ ;
extern Rt_m128 Rt_mm_cmpnlt_ss(Rt_m128 a,
Rt_m128 b) /* CMPNLTSS */ ;
extern Rt_m128 Rt_mm_cmpnlt_ps(Rt_m128 a,
Rt_m128 b) /* CMPNLTPS */ ;
extern Rt_m128 Rt_mm_cmpnle_ss(Rt_m128 a,
Rt_m128 b) /* CMPNLESS */ ;
extern Rt_m128 Rt_mm_cmpnle_ps(Rt_m128 a,
Rt_m128 b) /* CMPNLEPS */ ;
extern Rt_m128 Rt_mm_cmpngt_ss(Rt_m128 a, Rt_m128 b)
/* CMPNLTSS r */ ;
extern Rt_m128 Rt_mm_cmpngt_ps(Rt_m128 a, Rt_m128 b)
/* CMPNLTPS r */ ;
extern Rt_m128 Rt_mm_cmpnge_ss(Rt_m128 a, Rt_m128 b)
/* CMPNLESS r */ ;
extern Rt_m128 Rt_mm_cmpnge_ps(Rt_m128 a, Rt_m128 b)
/* CMPNLEPS r */ ;
extern Rt_m128 Rt_mm_cmpord_ss(Rt_m128 a,
Rt_m128 b) /* CMPORDSS */ ;
extern Rt_m128 Rt_mm_cmpord_ps(Rt_m128 a,
Rt_m128 b) /* CMPORDPS */ ;
extern Rt_m128 Rt_mm_cmpunord_ss(Rt_m128 a,
Rt_m128 b) /* CMPUNORDSS */ ;
extern Rt_m128 Rt_mm_cmpunord_ps(Rt_m128 a,
Rt_m128 b) /* CMPUNORDPS */ ;
extern int Rt_mm_comieq_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_comilt_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_comile_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_comigt_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_comige_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_comineq_ss(Rt_m128 a,
Rt_m128 b) /* COMISS */ ;
extern int Rt_mm_ucomieq_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
extern int Rt_mm_ucomilt_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
extern int Rt_mm_ucomile_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
extern int Rt_mm_ucomigt_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
extern int Rt_mm_ucomige_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
extern int Rt_mm_ucomineq_ss(Rt_m128 a,
Rt_m128 b) /* UCOMISS */ ;
/*
* Conversion Operations
*/
extern int Rt_mm_cvt_ss2si(Rt_m128 a) /* CVTSS2SI */ ;
extern Rt_m64 Rt_mm_cvt_ps2pi(Rt_m128 a) /* CVTPS2PI */ ;
extern int Rt_mm_cvtt_ss2si(Rt_m128 a) /* CVTTSS2SI */ ;
extern Rt_m64 Rt_mm_cvtt_ps2pi(Rt_m128 a) /* CVTTPS2PI */ ;
extern Rt_m128 Rt_mm_cvt_si2ss(Rt_m128 a,
int b) /* CVTSI2SS */ ;
extern Rt_m128 Rt_mm_cvt_pi2ps(Rt_m128 a,
Rt_m64 b) /* CVTPI2PS */ ;
/*
* Miscellaneous
*/
extern Rt_m128 Rt_mm_shuffle_ps(Rt_m128 a, Rt_m128 b,
int i) /* SHUFPS */ ;
extern Rt_m128 Rt_mm_unpackhi_ps(Rt_m128 a,
Rt_m128 b) /* UNPCKHPS */ ;
extern Rt_m128 Rt_mm_unpacklo_ps(Rt_m128 a,
Rt_m128 b) /* UNPCKLPS */ ;
extern Rt_m128 Rt_mm_loadh_pi(Rt_m128 a, Rt_m64 * p) /* MOVHPS reg, mem */
;
extern void Rt_mm_storeh_pi(Rt_m64 * p, Rt_m128 a) /* MOVHPS mem, reg */
;
extern Rt_m128 Rt_mm_movehl_ps(Rt_m128 a,
Rt_m128 b) /* MOVHLPS */ ;
extern Rt_m128 Rt_mm_movelh_ps(Rt_m128 a,
Rt_m128 b) /* MOVLHPS */ ;
extern Rt_m128 Rt_mm_loadl_pi(Rt_m128 a, Rt_m64 * p)
/* MOVLPS reg, mem */ ;
extern void Rt_mm_storel_pi(Rt_m64 * p, Rt_m128 a) /* MOVLPS mem, reg */
;
extern int Rt_mm_movemask_ps(Rt_m128 a) /* MOVMSKPS */ ;
extern unsigned int Rt_mm_getcsr(void) /* STMXCSR */ ;
extern void Rt_mm_setcsr(unsigned int i) /* LDMXCSR */ ;
/*
* Load Operations
*/
extern Rt_m128 Rt_mm_load_ss(float *p) /* MOVSS */ ;
extern Rt_m128 Rt_mm_load_ps1(float *p) /* MOVSS + shuffling */
;
extern Rt_m128 Rt_mm_load_ps(float *p) /* MOVAPS */ ;
extern Rt_m128 Rt_mm_loadu_ps(float *p) /* MOVUPS */ ;
extern Rt_m128 Rt_mm_loadr_ps(float *p)
/* MOVAPS + shuffling */ ;
/*
* Set Operations
*/
extern Rt_m128 Rt_mm_set_ss(float w) /* (composite) */ ;
extern Rt_m128 Rt_mm_set_ps1(float w) /* (composite) */ ;
extern Rt_m128 Rt_mm_set_ps(float z, float y, float x,
float w) /* (composite) */ ;
extern Rt_m128 Rt_mm_setr_ps(float z, float y, float x,
float w) /* (composite) */ ;
extern Rt_m128 Rt_mm_setzero_ps(void) /* (composite) */ ;
/*
* Store Operations
*/
extern void Rt_mm_store_ss(float *p,
Rt_m128 a) /* MOVSS */ ;
extern void Rt_mm_store_ps1(float *p, Rt_m128 a)
/* MOVSS + shuffling */ ;
extern void Rt_mm_store_ps(float *p,
Rt_m128 a) /* MOVAPS */ ;
extern void Rt_mm_storeu_ps(float *p,
Rt_m128 a) /* MOVUPS */ ;
extern void Rt_mm_storer_ps(float *p, Rt_m128 a)
/* MOVAPS + shuffling */ ;
extern Rt_m128 Rt_mm_move_ss(Rt_m128 a,
Rt_m128 b) /* MOVSS */ ;
/*
* Integer Intrinsics
*/
extern int Rt_m_pextrw(Rt_m64 a, int n) /* PEXTRW */ ;
extern Rt_m64 Rt_m_pinsrw(Rt_m64 a, int d,
int n) /* PINSRW */ ;
extern Rt_m64 Rt_m_pmaxsw(Rt_m64 a, Rt_m64 b) /* PMAXSW */ ;
extern Rt_m64 Rt_m_pmaxub(Rt_m64 a, Rt_m64 b) /* PMAXUB */ ;
extern Rt_m64 Rt_m_pminsw(Rt_m64 a, Rt_m64 b) /* PMINSW */ ;
extern Rt_m64 Rt_m_pminub(Rt_m64 a, Rt_m64 b) /* PMINUB */ ;
extern int Rt_m_pmovmskb(Rt_m64 a) /* PMOVMSKB */ ;
extern Rt_m64 Rt_m_pmulhuw(Rt_m64 a, Rt_m64 b) /* PMULHUW */ ;
extern Rt_m64 Rt_m_pshufw(Rt_m64 a, int n) /* PSHUFW */ ;
extern void Rt_m_lwmaskmovq(Rt_m64 d, Rt_m64 n,
char *p) /* MASKMOVQ */ ;
/*
* Cacheability Support
*/
extern void Rt_mm_prefetch(char *p, int i) /* PREFETCH */ ;
extern void Rt_mm_stream_pi(Rt_m64 * p,
Rt_m64 a) /* MOVNTQ */ ;
extern void Rt_mm_stream_ps(float *p,
Rt_m128 a) /* MOVNTPS */ ;
extern void Rt_mm_sfence(void) /* SFENCE */ ;
/*
* WNI
*/
/* Arithmetic Operations */
extern Rt_m128d Rt_mm_add_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_add_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_div_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_div_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_max_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_max_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_min_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_min_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_mul_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_mul_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_sqrt_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_sqrt_pd(Rt_m128d a);
extern Rt_m128d Rt_mm_sub_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_sub_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_andnot_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_and_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_or_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_xor_pd(Rt_m128d a, Rt_m128d b);
/* Comparisons */
extern Rt_m128d Rt_mm_cmpeq_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmplt_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmple_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpgt_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpge_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpord_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpunord_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpneq_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnlt_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnle_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpngt_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnge_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpeq_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmplt_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmple_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpgt_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpge_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpord_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpunord_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpneq_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnlt_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnle_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpngt_sd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_cmpnge_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comieq_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comilt_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comile_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comigt_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comige_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_comineq_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomieq_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomilt_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomile_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomigt_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomige_sd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_ucomineq_sd(Rt_m128d a, Rt_m128d b);
/* Conversion Operations */
extern Rt_m128 Rt_mm_cvtpd_ps(Rt_m128d a);
extern Rt_m128d Rt_mm_cvtps_pd(Rt_m128 a);
extern Rt_m128d Rt_mm_cvtepi32_pd(Rt_m128i a);
extern Rt_m128i Rt_mm_cvtpd_epi32(Rt_m128d a);
extern int Rt_mm_cvtsd_si32(Rt_m128d a);
extern Rt_m128 Rt_mm_cvtsd_ss(Rt_m128 a, Rt_m128d b);
extern Rt_m128d Rt_mm_cvtsi32_sd(Rt_m128d a, int b);
extern Rt_m128d Rt_mm_cvtss_sd(Rt_m128d a, Rt_m128 b);
extern Rt_m128i Rt_mm_cvttpd_epi32(Rt_m128d a);
extern int Rt_mm_cvttsd_si32(Rt_m128d a);
extern Rt_m128 Rt_mm_cvtepi32_ps(Rt_m128i a);
extern Rt_m128i Rt_mm_cvtps_epi32(Rt_m128 a);
extern Rt_m128i Rt_mm_cvttps_epi32(Rt_m128 a);
extern Rt_m64 Rt_mm_cvtpd_pi32(Rt_m128d a);
extern Rt_m64 Rt_mm_cvttpd_pi32(Rt_m128d a);
extern Rt_m128d Rt_mm_cvtpi32_pd(Rt_m64 a);
/* Miscellaneous Operations */
extern Rt_m128d Rt_mm_unpackhi_pd(Rt_m128d a, Rt_m128d b);
extern Rt_m128d Rt_mm_unpacklo_pd(Rt_m128d a, Rt_m128d b);
extern int Rt_mm_movemask_pd(Rt_m128d a);
extern Rt_m128d Rt_mm_shuffle_pd(Rt_m128d a, Rt_m128d b, int i);
extern Rt_m128d Rt_mm_load_pd(const double *p);
extern Rt_m128d Rt_mm_load1_pd(const double *p);
extern Rt_m128d Rt_mm_loadr_pd(const double *p);
extern Rt_m128d Rt_mm_loadu_pd(const double *p);
extern Rt_m128d Rt_mm_load_sd(const double *p);
extern Rt_m128d Rt_mm_loadh_pd(Rt_m128d a, const double *p);
extern Rt_m128d Rt_mm_loadl_pd(Rt_m128d a, const double *p);
extern Rt_m128d Rt_mm_set_sd(double w);
extern Rt_m128d Rt_mm_set1_pd(double w);
extern Rt_m128d Rt_mm_set_pd(double w, double x);
extern Rt_m128d Rt_mm_setr_pd(double w, double x);
extern Rt_m128d Rt_mm_setzero_pd(void);
extern Rt_m128d Rt_mm_move_sd(Rt_m128d a, Rt_m128d b);
extern void Rt_mm_stream_pd(double *p, Rt_m128d a);
extern void Rt_mm_store_sd(double *p, Rt_m128d a);
extern void Rt_mm_store1_pd(double *p, Rt_m128d a);
extern void Rt_mm_store_pd(double *p, Rt_m128d a);
extern void Rt_mm_storeu_pd(double *p, Rt_m128d a);
extern void Rt_mm_storer_pd(double *p, Rt_m128d a);
extern void Rt_mm_storeh_pd(double *p, Rt_m128d a);
extern void Rt_mm_storel_pd(double *p, Rt_m128d a);
extern Rt_m128i Rt_mm_add_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_add_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_add_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m64 Rt_mm_add_si64(Rt_m64 a, Rt_m64 b);
extern Rt_m128i Rt_mm_add_epi64(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_adds_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_adds_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_adds_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_adds_epu16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_avg_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_avg_epu16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_madd_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_max_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_max_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_min_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_min_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_mulhi_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_mulhi_epu16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_mullo_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m64 Rt_mm_mul_su32(Rt_m64 a, Rt_m64 b);
extern Rt_m128i Rt_mm_mul_epu32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_sad_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_sub_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_sub_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_sub_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m64 Rt_mm_sub_si64(Rt_m64 a, Rt_m64 b);
extern Rt_m128i Rt_mm_sub_epi64(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_subs_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_subs_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_subs_epu8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_subs_epu16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_and_si128(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_andnot_si128(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_or_si128(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_xor_si128(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_slli_si128(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_slli_epi16(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_sll_epi16(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_slli_epi32(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_sll_epi32(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_slli_epi64(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_sll_epi64(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_srai_epi16(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_sra_epi16(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_srai_epi32(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_sra_epi32(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_srli_si128(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_srli_epi16(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_srl_epi16(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_srli_epi32(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_srl_epi32(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_srli_epi64(Rt_m128i a, int count);
extern Rt_m128i Rt_mm_srl_epi64(Rt_m128i a, Rt_m128i count);
extern Rt_m128i Rt_mm_cmpeq_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmpeq_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmpeq_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmpgt_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmpgt_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmpgt_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmplt_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmplt_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cmplt_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_cvtsi32_si128(int a);
extern int Rt_mm_cvtsi128_si32(Rt_m128i a);
/* Miscellaneous Operations */
extern Rt_m64 Rt_mm_movepi64_pi64(Rt_m128i a);
extern Rt_m128i Rt_mm_movpi64_epi64(Rt_m64 a);
extern Rt_m128i Rt_mm_move_epi64(Rt_m128i a);
extern Rt_m128i Rt_mm_packs_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_packs_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_packus_epi16(Rt_m128i a, Rt_m128i b);
extern int Rt_mm_extract_epi16(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_insert_epi16(Rt_m128i a, int b, int imm);
extern int Rt_mm_movemask_epi8(Rt_m128i a);
extern Rt_m128i Rt_mm_shuffle_epi32(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_shufflehi_epi16(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_shufflelo_epi16(Rt_m128i a, int imm);
extern Rt_m128i Rt_mm_unpackhi_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpackhi_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpackhi_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpackhi_epi64(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpacklo_epi8(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpacklo_epi16(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpacklo_epi32(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_unpacklo_epi64(Rt_m128i a, Rt_m128i b);
extern Rt_m128i Rt_mm_loadl_epi64(Rt_m128i const *p);
extern Rt_m128i Rt_mm_load_si128(const Rt_m128i * p);
extern Rt_m128i Rt_mm_loadu_si128(const Rt_m128i * p);
extern Rt_m128i Rt_mm_set_epi64(Rt_m64 q1, Rt_m64 q0);
extern Rt_m128i Rt_mm_set_epi32(int i3, int i2, int i1, int i0);
extern Rt_m128i
Rt_mm_set_epi16(short w7, short w6,
short w5, short w4, short w3, short w2,
short w1, short w0);
extern Rt_m128i Rt_mm_set_epi8(char b15, char b14,
char b13, char b12,
char b11, char b10,
char b9, char b8,
char b7, char b6,
char b5, char b4,
char b3, char b2,
char b1, char b0);
extern Rt_m128i Rt_mm_set1_epi64(Rt_m64 q);
extern Rt_m128i Rt_mm_set1_epi32(int i);
extern Rt_m128i Rt_mm_set1_epi16(short w);
extern Rt_m128i Rt_mm_set1_epi8(char b);
extern Rt_m128i Rt_mm_setr_epi64(Rt_m64 q0, Rt_m64 q1);
extern Rt_m128i Rt_mm_setr_epi32(int i0, int i1, int i2,
int i3);
extern Rt_m128i Rt_mm_setr_epi16(short w0, short w1,
short w2, short w3,
short w4, short w5,
short w6, short w7);
extern Rt_m128i Rt_mm_setr_epi8(char b0, char b1,
char b2, char b3,
char b4, char b5,
char b6, char b7,
char b8, char b9,
char b10, char b11,
char b12, char b13,
char b14, char b15);
extern Rt_m128i Rt_mm_setzero_si128(void);
/* Store Operations */
extern void Rt_mm_store_si128(Rt_m128i * p, Rt_m128i a);
extern void Rt_mm_storeu_si128(Rt_m128i * p, Rt_m128i a);
extern void Rt_mm_maskmoveu_si128(Rt_m128i s, Rt_m128i n,
char *p);
extern void Rt_mm_storel_epi64(Rt_m128i * p, Rt_m128i a);
extern void Rt_mm_stream_si128(Rt_m128i * p, Rt_m128i a);
extern void Rt_mm_stream_si32(int *p, int a);
extern void Rt_mm_clflush(void const *p);
extern void Rt_mm_lfence(void);
extern void Rt_mm_mfence(void);
/*
* API
*/
extern RwUInt32 RtIntelRDTSC(void);
extern RwUInt32 RtIntelToggleEFLAGS(int mask);
extern RwUInt32 RtIntelCPUID(RwUInt32 level,
void *pb, void *pc, void *pd);
extern RwUInt32 RtIntelHaveCPUID(void);
extern RwUInt32 RtIntelHaveRDTSC(void);
extern RwUInt32 RtIntelHaveMMX(void);
extern RwUInt32 RtIntelHaveSSE(void);
extern RwUInt32 RtIntelHaveWNI(void);
extern RwUInt32 RtIntelCpuType(void);
extern RwBool RtIntelStartTiming(void * data);
extern RwBool RtIntelStopTiming(void *data);
extern RwBool RtIntelTime(RwFixed64 * result,
RtIntelTimeFunction func,
void *data);
extern RwBool RtIntelPluginAttach(void);
extern RtIntelOverload *_rtIntelOverloadGetHandle(void);
extern RxNodeDefinition *RxNodeDefinitionGetSSETransformCSL(void);
#ifdef __cplusplus
}
#endif /* __cplusplus */
/*
* LEGACY-SUPPORT -- e.g.
* rwsdk/driver/d3d/baintd3d.c
* is locked at time of writing
*/
#define RpIntelRDTSC() RtIntelRDTSC()
#define RpIntelToggleEFLAGS(_mask) RtIntelToggleEFLAGS(_mask)
#define RpIntelCPUID(_level, _pb, _pc, _pd) \
RtIntelCPUID(_level, _pb, _pc, _pd)
#define RpIntelHaveCPUID() RtIntelHaveCPUID()
#define RpIntelHaveRDTSC() RtIntelHaveRDTSC()
#define RpIntelHaveMMX() RtIntelHaveMMX()
#define RpIntelHaveSSE() RtIntelHaveSSE()
#define RpIntelHaveWNI() RtIntelHaveWNI()
#define RpIntelCpuType() RtIntelCpuType()
#define RpIntelStartTiming(_data) RtIntelStartTiming(_data)
#define RpIntelStopTiming(_data) RtIntelStopTiming(_data)
#define RpIntelTime(_result, _func, _data) \
RtIntelTime(_result, _func, _data)
#define RpIntelPluginAttach() RtIntelPluginAttach()
#define RpNodeDefinitionGetSSETransformCSL() \
RxNodeDefinitionGetSSETransformCSL()
typedef RtIntelOverload RwIntelOverload;
typedef RtIntelOverload RpIntelOverload;
#define _rwIntelOverloadGetHandle() _rtIntelOverloadGetHandle()
#define _rpIntelOverloadGetHandle() _rtIntelOverloadGetHandle()
#define RwIntelHaveSSE() RtIntelHaveSSE()
#define RpIntelHaveSSE() RtIntelHaveSSE()
#if (defined(RWEMULATEINTELSIMD) || !defined(__ICL))
/*
* MMX
*/
/* General support intrinsics */
#define _m_empty() Rt_m_empty()
#define _m_from_int(i) Rt_m_from_int(i)
#define _m_to_int(m) Rt_m_to_int(m)
#define _m_packsswb(m1, m2) Rt_m_packsswb(m1, m2)
#define _m_packssdw(m1, m2) Rt_m_packssdw(m1, m2)
#define _m_packuswb(m1, m2) Rt_m_packuswb(m1, m2)
#define _m_punpckhbw(m1, m2) Rt_m_punpckhbw(m1, m2)
#define _m_punpckhwd(m1, m2) Rt_m_punpckhwd(m1, m2)
#define _m_punpckhdq(m1, m2) Rt_m_punpckhdq(m1, m2)
#define _m_punpcklbw(m1, m2) Rt_m_punpcklbw(m1, m2)
#define _m_punpcklwd(m1, m2) Rt_m_punpcklwd(m1, m2)
#define _m_punpckldq(m1, m2) Rt_m_punpckldq(m1, m2)
/* Packed arithmetic intrinsics */
#define _m_paddb(m1, m2) Rt_m_paddb(m1, m2)
#define _m_paddw(m1, m2) Rt_m_paddw(m1, m2)
#define _m_paddd(m1, m2) Rt_m_paddd(m1, m2)
#define _m_paddsb(m1, m2) Rt_m_paddsb(m1, m2)
#define _m_paddsw(m1, m2) Rt_m_paddsw(m1, m2)
#define _m_paddusb(m1, m2) Rt_m_paddusb(m1, m2)
#define _m_paddusw(m1, m2) Rt_m_paddusw(m1, m2)
#define _m_psubb(m1, m2) Rt_m_psubb(m1, m2)
#define _m_psubw(m1, m2) Rt_m_psubw(m1, m2)
#define _m_psubd(m1, m2) Rt_m_psubd(m1, m2)
#define _m_psubsb(m1, m2) Rt_m_psubsb(m1, m2)
#define _m_psubsw(m1, m2) Rt_m_psubsw(m1, m2)
#define _m_psubusb(m1, m2) Rt_m_psubusb(m1, m2)
#define _m_psubusw(m1, m2) Rt_m_psubusw(m1, m2)
#define _m_pmaddwd(m1, m2) Rt_m_pmaddwd(m1, m2)
#define _m_pmulhw(m1, m2) Rt_m_pmulhw(m1, m2)
#define _m_pmullw(m1, m2) Rt_m_pmullw(m1, m2)
/* Shift intrinsics */
#define _m_psllw(m, count) Rt_m_psllw(m, count)
#define _m_psllwi(m, count) Rt_m_psllwi(m, count)
#define _m_pslld(m, count) Rt_m_pslld(m, count)
#define _m_pslldi(m, count) Rt_m_pslldi(m, count)
#define _m_psllq(m, count) Rt_m_psllq(m, count)
#define _m_psllqi(m, count) Rt_m_psllqi(m, count)
#define _m_psraw(m, count) Rt_m_psraw(m, count)
#define _m_psrawi(m, count) Rt_m_psrawi(m, count)
#define _m_psrad(m, count) Rt_m_psrad(m, count)
#define _m_psradi(m, count) Rt_m_psradi(m, count)
#define _m_psrlw(m, count) Rt_m_psrlw(m, count)
#define _m_psrlwi(m, count) Rt_m_psrlwi(m, count)
#define _m_psrld(m, count) Rt_m_psrld(m, count)
#define _m_psrldi(m, count) Rt_m_psrldi(m, count)
#define _m_psrlq(m, count) Rt_m_psrlq(m, count)
#define _m_psrlqi(m, count) Rt_m_psrlqi(m, count)
/* Logical intrinsics */
#define _m_pand(m1, m2) Rt_m_pand(m1, m2)
#define _m_pandn(m1, m2) Rt_m_pandn(m1, m2)
#define _m_por(m1, m2) Rt_m_por(m1, m2)
#define _m_pxor(m1, m2) Rt_m_pxor(m1, m2)
/* Comparison intrinsics */
#define _m_pcmpeqb(m1, m2) Rt_m_pcmpeqb(m1, m2)
#define _m_pcmpeqw(m1, m2) Rt_m_pcmpeqw(m1, m2)
#define _m_pcmpeqd(m1, m2) Rt_m_pcmpeqd(m1, m2)
#define _m_pcmpgtb(m1, m2) Rt_m_pcmpgtb(m1, m2)
#define _m_pcmpgtw(m1, m2) Rt_m_pcmpgtw(m1, m2)
#define _m_pcmpgtd(m1, m2) Rt_m_pcmpgtd(m1, m2)
/*
* SSE
*/
/*
* Arithmetic Operations
*/
#define _mm_add_ss(a, b) Rt_mm_add_ss(a, b)
#define _mm_add_ps(a, b) Rt_mm_add_ps(a, b)
#define _mm_sub_ss(a, b) Rt_mm_sub_ss(a, b)
#define _mm_sub_ps(a, b) Rt_mm_sub_ps(a, b)
#define _mm_mul_ss(a, b) Rt_mm_mul_ss(a, b)
#define _mm_mul_ps(a, b) Rt_mm_mul_ps(a, b)
#define _mm_div_ss(a, b) Rt_mm_div_ss(a, b)
#define _mm_div_ps(a, b) Rt_mm_div_ps(a, b)
#define _mm_sqrt_ss(a) Rt_mm_sqrt_ss(a)
#define _mm_sqrt_ps(a) Rt_mm_sqrt_ps(a)
#define _mm_rcp_ss(a) Rt_mm_rcp_ss(a)
#define _mm_rcp_ps(a) Rt_mm_rcp_ps(a)
#define _mm_rsqrt_ss(a) Rt_mm_rsqrt_ss(a)
#define _mm_rsqrt_ps(a) Rt_mm_rsqrt_ps(a)
#define _mm_min_ss(a, b) Rt_mm_min_ss(a, b)
#define _mm_min_ps(a, b) Rt_mm_min_ps(a, b)
#define _mm_max_ss(a, b) Rt_mm_max_ss(a, b)
#define _mm_max_ps(a, b) Rt_mm_max_ps(a, b)
/*
* Logical Operations
*/
#define _mm_and_ps(a, b) Rt_mm_and_ps(a, b)
#define _mm_andnot_ps(a, b) Rt_mm_andnot_ps(a, b)
#define _mm_or_ps(a, b) Rt_mm_or_ps(a, b)
#define _mm_xor_ps(a, b) Rt_mm_xor_ps(a, b)
/*
* Comparisons
*/
#define _mm_cmpeq_ss(a, b) Rt_mm_cmpeq_ss(a, b)
#define _mm_cmpeq_ps(a, b) Rt_mm_cmpeq_ps(a, b)
#define _mm_cmplt_ss(a, b) Rt_mm_cmplt_ss(a, b)
#define _mm_cmplt_ps(a, b) Rt_mm_cmplt_ps(a, b)
#define _mm_cmple_ss(a, b) Rt_mm_cmple_ss(a, b)
#define _mm_cmple_ps(a, b) Rt_mm_cmple_ps(a, b)
#define _mm_cmpgt_ss(a, b) Rt_mm_cmpgt_ss(a, b)
#define _mm_cmpgt_ps(a, b) Rt_mm_cmpgt_ps(a, b)
#define _mm_cmpge_ss(a, b) Rt_mm_cmpge_ss(a, b)
#define _mm_cmpge_ps(a, b) Rt_mm_cmpge_ps(a, b)
#define _mm_cmpneq_ss(a, b) Rt_mm_cmpneq_ss(a, b)
#define _mm_cmpneq_ps(a, b) Rt_mm_cmpneq_ps(a, b)
#define _mm_cmpnlt_ss(a, b) Rt_mm_cmpnlt_ss(a, b)
#define _mm_cmpnlt_ps(a, b) Rt_mm_cmpnlt_ps(a, b)
#define _mm_cmpnle_ss(a, b) Rt_mm_cmpnle_ss(a, b)
#define _mm_cmpnle_ps(a, b) Rt_mm_cmpnle_ps(a, b)
#define _mm_cmpngt_ss(a, b) Rt_mm_cmpngt_ss(a, b)
#define _mm_cmpngt_ps(a, b) Rt_mm_cmpngt_ps(a, b)
#define _mm_cmpnge_ss(a, b) Rt_mm_cmpnge_ss(a, b)
#define _mm_cmpnge_ps(a, b) Rt_mm_cmpnge_ps(a, b)
#define _mm_cmpord_ss(a, b) Rt_mm_cmpord_ss(a, b)
#define _mm_cmpord_ps(a, b) Rt_mm_cmpord_ps(a, b)
#define _mm_cmpunord_ss(a, b) Rt_mm_cmpunord_ss(a, b)
#define _mm_cmpunord_ps(a, b) Rt_mm_cmpunord_ps(a, b)
#define _mm_comieq_ss(a, b) Rt_mm_comieq_ss(a, b)
#define _mm_comilt_ss(a, b) Rt_mm_comilt_ss(a, b)
#define _mm_comile_ss(a, b) Rt_mm_comile_ss(a, b)
#define _mm_comigt_ss(a, b) Rt_mm_comigt_ss(a, b)
#define _mm_comige_ss(a, b) Rt_mm_comige_ss(a, b)
#define _mm_comineq_ss(a, b) Rt_mm_comineq_ss(a, b)
#define _mm_ucomieq_ss(a, b) Rt_mm_ucomieq_ss(a, b)
#define _mm_ucomilt_ss(a, b) Rt_mm_ucomilt_ss(a, b)
#define _mm_ucomile_ss(a, b) Rt_mm_ucomile_ss(a, b)
#define _mm_ucomigt_ss(a, b) Rt_mm_ucomigt_ss(a, b)
#define _mm_ucomige_ss(a, b) Rt_mm_ucomige_ss(a, b)
#define _mm_ucomineq_ss(a, b) Rt_mm_ucomineq_ss(a, b)
/*
* Conversion Operations
*/
#define _mm_cvt_ss2si(a) Rt_mm_cvt_ss2si(a)
#define _mm_cvt_ps2pi(a) Rt_mm_cvt_ps2pi(a)
#define _mm_cvtt_ss2si(a) Rt_mm_cvtt_ss2si(a)
#define _mm_cvtt_ps2pi(a) Rt_mm_cvtt_ps2pi(a)
#define _mm_cvt_si2ss(a, b) Rt_mm_cvt_si2ss(a, b)
#define _mm_cvt_pi2ps(a, b) Rt_mm_cvt_pi2ps(a, b)
/*
* Miscellaneous
*/
#define _mm_shuffle_ps(a, b, i) Rt_mm_shuffle_ps(a, b, i)
#define _mm_unpackhi_ps(a, b) Rt_mm_unpackhi_ps(a, b)
#define _mm_unpacklo_ps(a, b) Rt_mm_unpacklo_ps(a, b)
#define _mm_loadh_pi(a, p) Rt_mm_loadh_pi(a, p)
#define _mm_storeh_pi(p, a) Rt_mm_storeh_pi(p, a)
#define _mm_movehl_ps(a, b) Rt_mm_movehl_ps(a, b)
#define _mm_movelh_ps(a, b) Rt_mm_movelh_ps(a, b)
#define _mm_loadl_pi(a, p) Rt_mm_loadl_pi(a, p)
#define _mm_storel_pi(p, a) Rt_mm_storel_pi(p, a)
#define _mm_movemask_ps(a) Rt_mm_movemask_ps(a)
#define _mm_getcsr() Rt_mm_getcsr()
#define _mm_setcsr(i) Rt_mm_setcsr(i)
/*
*Load Operations
*/
#define _mm_load_ss(p) Rt_mm_load_ss(p)
#define _mm_load_ps1(p) Rt_mm_load_ps1(p)
#define _mm_load_ps(p) Rt_mm_load_ps(p)
#define _mm_loadu_ps(p) Rt_mm_loadu_ps(p)
#define _mm_loadr_ps(p) Rt_mm_loadr_ps(p)
/*
* Set Operations
*/
#define _mm_set_ss(w) Rt_mm_set_ss(w)
#define _mm_set_ps1(w) Rt_mm_set_ps1(w)
#define _mm_set_ps(z, y, x, w) Rt_mm_set_ps(z, y, x, w)
#define _mm_setr_ps(z, y, x, w) Rt_mm_setr_ps(z, y, x, w)
#define _mm_setzero_ps() Rt_mm_setzero_ps()
/*
* Store Operations
*/
#define _mm_store_ss(p, a) Rt_mm_store_ss(p, a)
#define _mm_store_ps1(p, a) Rt_mm_store_ps1(p, a)
#define _mm_store_ps(p, a) Rt_mm_store_ps(p, a)
#define _mm_storeu_ps(p, a) Rt_mm_storeu_ps(p, a)
#define _mm_storer_ps(p, a) Rt_mm_storer_ps(p, a)
#define _mm_move_ss(a, b) Rt_mm_move_ss(a, b)
/*
* Integer Intrinsics
*/
#define _m_pextrw(a, n) Rt_m_pextrw(a, n)
#define _m_pinsrw(a, d, n) Rt_m_pinsrw(a, d, n)
#define _m_pmaxsw(a, b) Rt_m_pmaxsw(a, b)
#define _m_pmaxub(a, b) Rt_m_pmaxub(a, b)
#define _m_pminsw(a, b) Rt_m_pminsw(a, b)
#define _m_pminub(a, b) Rt_m_pminub(a, b)
#define _m_pmovmskb(a) Rt_m_pmovmskb(a)
#define _m_pmulhuw(a, b) Rt_m_pmulhuw(a, b)
#define _m_pshufw(a, n) Rt_m_pshufw(a, n)
#define _m_lwmaskmovq(d, n, p) Rt_m_lwmaskmovq(d, n, p)
/*
* Cacheability Support
*/
#define _mm_prefetch(p, i) Rt_mm_prefetch(p, i)
#define _mm_stream_pi(p, a) Rt_mm_stream_pi(p, a)
#define _mm_stream_ps(p, a) Rt_mm_stream_ps(p, a)
#define _mm_sfence() Rt_mm_sfence()
#endif /* (defined(RWEMULATEINTELSIMD) || !defined(__ICL)) */
#endif /* RTINTEL_H */