mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-22 03:57:17 +01:00
Turn the X86 emitter into a class, so the code pointer is no longer a global, yay! Created XCodeBlock that derives from XEmitter, and the Jit now derives from XCodeBlock so it can call all ADD SUB JNZ etc without having to prefix them with "emit.". I think someone's gonna like this.
There's some cleanup still to be done, but hey, it works. There shouldn't be a noticable speed difference. I hope GCC doesn't have a problem with the "member function pointers" I used. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1594 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
b5dcdcf779
commit
104acd5bc1
@ -25,7 +25,7 @@ using namespace Gen;
|
|||||||
// ====================================
|
// ====================================
|
||||||
|
|
||||||
// Sets up a __cdecl function.
|
// Sets up a __cdecl function.
|
||||||
void ABI_EmitPrologue(int maxCallParams)
|
void XEmitter::ABI_EmitPrologue(int maxCallParams)
|
||||||
{
|
{
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
// Don't really need to do anything
|
// Don't really need to do anything
|
||||||
@ -40,7 +40,8 @@ void ABI_EmitPrologue(int maxCallParams)
|
|||||||
#error Arch not supported
|
#error Arch not supported
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void ABI_EmitEpilogue(int maxCallParams)
|
|
||||||
|
void XEmitter::ABI_EmitEpilogue(int maxCallParams)
|
||||||
{
|
{
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
RET();
|
RET();
|
||||||
@ -60,14 +61,14 @@ void ABI_EmitEpilogue(int maxCallParams)
|
|||||||
// Shared code between Win32 and Unix32
|
// Shared code between Win32 and Unix32
|
||||||
// ====================================
|
// ====================================
|
||||||
|
|
||||||
void ABI_CallFunctionC(void *func, u32 param1) {
|
void XEmitter::ABI_CallFunctionC(void *func, u32 param1) {
|
||||||
ABI_AlignStack(1 * 4);
|
ABI_AlignStack(1 * 4);
|
||||||
PUSH(32, Imm32(param1));
|
PUSH(32, Imm32(param1));
|
||||||
CALL(func);
|
CALL(func);
|
||||||
ABI_RestoreStack(1 * 4);
|
ABI_RestoreStack(1 * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
|
void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
|
||||||
ABI_AlignStack(2 * 4);
|
ABI_AlignStack(2 * 4);
|
||||||
PUSH(32, Imm32(param2));
|
PUSH(32, Imm32(param2));
|
||||||
PUSH(32, Imm32(param1));
|
PUSH(32, Imm32(param1));
|
||||||
@ -76,14 +77,14 @@ void ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pass a register as a paremeter.
|
// Pass a register as a paremeter.
|
||||||
void ABI_CallFunctionR(void *func, X64Reg reg1) {
|
void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
|
||||||
ABI_AlignStack(1 * 4);
|
ABI_AlignStack(1 * 4);
|
||||||
PUSH(32, R(reg1));
|
PUSH(32, R(reg1));
|
||||||
CALL(func);
|
CALL(func);
|
||||||
ABI_RestoreStack(1 * 4);
|
ABI_RestoreStack(1 * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
|
void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
|
||||||
{
|
{
|
||||||
ABI_AlignStack(2 * 4);
|
ABI_AlignStack(2 * 4);
|
||||||
PUSH(32, R(reg2));
|
PUSH(32, R(reg2));
|
||||||
@ -92,7 +93,7 @@ void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
|
|||||||
ABI_RestoreStack(2 * 4);
|
ABI_RestoreStack(2 * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
||||||
{
|
{
|
||||||
ABI_AlignStack(2 * 4);
|
ABI_AlignStack(2 * 4);
|
||||||
PUSH(32, arg1);
|
PUSH(32, arg1);
|
||||||
@ -101,7 +102,7 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
|||||||
ABI_RestoreStack(2 * 4);
|
ABI_RestoreStack(2 * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
||||||
// Note: 4 * 4 = 16 bytes, so alignment is preserved.
|
// Note: 4 * 4 = 16 bytes, so alignment is preserved.
|
||||||
PUSH(EBP);
|
PUSH(EBP);
|
||||||
PUSH(EBX);
|
PUSH(EBX);
|
||||||
@ -109,14 +110,14 @@ void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|||||||
PUSH(EDI);
|
PUSH(EDI);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
||||||
POP(EDI);
|
POP(EDI);
|
||||||
POP(ESI);
|
POP(ESI);
|
||||||
POP(EBX);
|
POP(EBX);
|
||||||
POP(EBP);
|
POP(EBP);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
||||||
frameSize += 4; // reserve space for return address
|
frameSize += 4; // reserve space for return address
|
||||||
unsigned int alignedSize =
|
unsigned int alignedSize =
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
@ -128,7 +129,7 @@ unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ABI_AlignStack(unsigned int frameSize) {
|
void XEmitter::ABI_AlignStack(unsigned int frameSize) {
|
||||||
// Mac OS X requires the stack to be 16-byte aligned before every call.
|
// Mac OS X requires the stack to be 16-byte aligned before every call.
|
||||||
// Linux requires the stack to be 16-byte aligned before calls that put SSE
|
// Linux requires the stack to be 16-byte aligned before calls that put SSE
|
||||||
// vectors on the stack, but since we do not keep track of which calls do that,
|
// vectors on the stack, but since we do not keep track of which calls do that,
|
||||||
@ -145,7 +146,7 @@ void ABI_AlignStack(unsigned int frameSize) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_RestoreStack(unsigned int frameSize) {
|
void XEmitter::ABI_RestoreStack(unsigned int frameSize) {
|
||||||
unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize);
|
unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize);
|
||||||
alignedSize -= 4; // return address is POPped at end of call
|
alignedSize -= 4; // return address is POPped at end of call
|
||||||
if (alignedSize != 0) {
|
if (alignedSize != 0) {
|
||||||
@ -155,26 +156,26 @@ void ABI_RestoreStack(unsigned int frameSize) {
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void ABI_CallFunctionC(void *func, u32 param1) {
|
void XEmitter::ABI_CallFunctionC(void *func, u32 param1) {
|
||||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
|
void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) {
|
||||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pass a register as a paremeter.
|
// Pass a register as a paremeter.
|
||||||
void ABI_CallFunctionR(void *func, X64Reg reg1) {
|
void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
|
||||||
if (reg1 != ABI_PARAM1)
|
if (reg1 != ABI_PARAM1)
|
||||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pass a register as a paremeter.
|
// Pass a register as a paremeter.
|
||||||
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
|
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
|
||||||
if (reg1 != ABI_PARAM1)
|
if (reg1 != ABI_PARAM1)
|
||||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||||
if (reg2 != ABI_PARAM2)
|
if (reg2 != ABI_PARAM2)
|
||||||
@ -182,7 +183,7 @@ void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
|
|||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
||||||
{
|
{
|
||||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||||
MOV(32, R(ABI_PARAM1), arg1);
|
MOV(32, R(ABI_PARAM1), arg1);
|
||||||
@ -190,21 +191,21 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
|
|||||||
CALL(func);
|
CALL(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
|
||||||
return frameSize;
|
return frameSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_AlignStack(unsigned int /*frameSize*/) {
|
void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_RestoreStack(unsigned int /*frameSize*/) {
|
void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
// Win64 Specific Code
|
// Win64 Specific Code
|
||||||
// ====================================
|
// ====================================
|
||||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
||||||
//we only want to do this once
|
//we only want to do this once
|
||||||
PUSH(RBX);
|
PUSH(RBX);
|
||||||
PUSH(RSI);
|
PUSH(RSI);
|
||||||
@ -218,7 +219,7 @@ void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|||||||
SUB(64, R(RSP), Imm8(0x28));
|
SUB(64, R(RSP), Imm8(0x28));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
||||||
ADD(64, R(RSP), Imm8(0x28));
|
ADD(64, R(RSP), Imm8(0x28));
|
||||||
POP(R15);
|
POP(R15);
|
||||||
POP(R14);
|
POP(R14);
|
||||||
@ -232,7 +233,7 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
|||||||
|
|
||||||
// Win64 Specific Code
|
// Win64 Specific Code
|
||||||
// ====================================
|
// ====================================
|
||||||
void ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
||||||
PUSH(RCX);
|
PUSH(RCX);
|
||||||
PUSH(RDX);
|
PUSH(RDX);
|
||||||
PUSH(RSI);
|
PUSH(RSI);
|
||||||
@ -245,7 +246,7 @@ void ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
|||||||
SUB(64, R(RSP), Imm8(0x28));
|
SUB(64, R(RSP), Imm8(0x28));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
||||||
ADD(64, R(RSP), Imm8(0x28));
|
ADD(64, R(RSP), Imm8(0x28));
|
||||||
POP(R11);
|
POP(R11);
|
||||||
POP(R10);
|
POP(R10);
|
||||||
@ -260,7 +261,7 @@ void ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
|||||||
#else
|
#else
|
||||||
// Unix64 Specific Code
|
// Unix64 Specific Code
|
||||||
// ====================================
|
// ====================================
|
||||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
||||||
PUSH(RBX);
|
PUSH(RBX);
|
||||||
PUSH(RBP);
|
PUSH(RBP);
|
||||||
PUSH(R12);
|
PUSH(R12);
|
||||||
@ -270,7 +271,7 @@ void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|||||||
PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
|
PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
||||||
POP(R15);
|
POP(R15);
|
||||||
POP(R15);
|
POP(R15);
|
||||||
POP(R14);
|
POP(R14);
|
||||||
@ -280,7 +281,7 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
|||||||
POP(RBX);
|
POP(RBX);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
||||||
PUSH(RCX);
|
PUSH(RCX);
|
||||||
PUSH(RDX);
|
PUSH(RDX);
|
||||||
PUSH(RSI);
|
PUSH(RSI);
|
||||||
@ -292,7 +293,7 @@ void ABI_PushAllCallerSavedRegsAndAdjustStack() {
|
|||||||
PUSH(R11);
|
PUSH(R11);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
|
||||||
POP(R11);
|
POP(R11);
|
||||||
POP(R11);
|
POP(R11);
|
||||||
POP(R10);
|
POP(R10);
|
||||||
|
@ -18,8 +18,6 @@
|
|||||||
#ifndef _JIT_ABI_H
|
#ifndef _JIT_ABI_H
|
||||||
#define _JIT_ABI_H
|
#define _JIT_ABI_H
|
||||||
|
|
||||||
#include "x64Emitter.h"
|
|
||||||
|
|
||||||
// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||||
// All convensions return values in EAX (+ possibly EDX).
|
// All convensions return values in EAX (+ possibly EDX).
|
||||||
|
|
||||||
@ -81,42 +79,5 @@
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Utility functions
|
|
||||||
// These only support u32 parameters, but that's enough for a lot of uses.
|
|
||||||
// These will destroy the 1 or 2 first "parameter regs".
|
|
||||||
void ABI_CallFunctionC(void *func, u32 param1);
|
|
||||||
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2);
|
|
||||||
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
|
|
||||||
|
|
||||||
// Pass a register as a paremeter.
|
|
||||||
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);
|
|
||||||
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2);
|
|
||||||
|
|
||||||
// A function that doesn't have any control over what it will do to regs,
|
|
||||||
// such as the dispatcher, should be surrounded by these.
|
|
||||||
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
|
||||||
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
|
||||||
|
|
||||||
// A function that doesn't know anything about it's surroundings, should
|
|
||||||
// be surrounded by these to establish a safe environment, where it can roam free.
|
|
||||||
// An example is a backpatch injected function.
|
|
||||||
void ABI_PushAllCallerSavedRegsAndAdjustStack();
|
|
||||||
void ABI_PopAllCallerSavedRegsAndAdjustStack();
|
|
||||||
|
|
||||||
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
|
|
||||||
void ABI_AlignStack(unsigned int frameSize);
|
|
||||||
void ABI_RestoreStack(unsigned int frameSize);
|
|
||||||
|
|
||||||
// Sets up a __cdecl function.
|
|
||||||
// Only x64 really needs the parameter.
|
|
||||||
void ABI_EmitPrologue(int maxCallParams);
|
|
||||||
void ABI_EmitEpilogue(int maxCallParams);
|
|
||||||
|
|
||||||
#ifdef _M_IX86
|
|
||||||
inline int ABI_GetNumXMMRegs() { return 8; }
|
|
||||||
#else
|
|
||||||
inline int ABI_GetNumXMMRegs() { return 16; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // _JIT_ABI_H
|
#endif // _JIT_ABI_H
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
// This is purposedely not a full wrapper for virtualalloc/mmap, but it
|
// This is purposedely not a full wrapper for virtualalloc/mmap, but it
|
||||||
// provides exactly the primitive operations that Dolphin needs.
|
// provides exactly the primitive operations that Dolphin needs.
|
||||||
|
|
||||||
void* AllocateExecutableMemory(int size, bool low)
|
void* AllocateExecutableMemory(size_t size, bool low)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||||
@ -71,7 +71,7 @@ void* AllocateExecutableMemory(int size, bool low)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void* AllocateMemoryPages(int size)
|
void* AllocateMemoryPages(size_t size)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE);
|
||||||
@ -99,7 +99,7 @@ void* AllocateMemoryPages(int size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void FreeMemoryPages(void* ptr, int size)
|
void FreeMemoryPages(void* ptr, size_t size)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
if (ptr)
|
if (ptr)
|
||||||
@ -113,7 +113,7 @@ void FreeMemoryPages(void* ptr, int size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void WriteProtectMemory(void* ptr, int size, bool allowExecute)
|
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, 0);
|
VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, 0);
|
||||||
@ -123,7 +123,7 @@ void WriteProtectMemory(void* ptr, int size, bool allowExecute)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void UnWriteProtectMemory(void* ptr, int size, bool allowExecute)
|
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READONLY, 0);
|
VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READONLY, 0);
|
||||||
|
@ -18,14 +18,14 @@
|
|||||||
#ifndef _MEMORYUTIL_H
|
#ifndef _MEMORYUTIL_H
|
||||||
#define _MEMORYUTIL_H
|
#define _MEMORYUTIL_H
|
||||||
|
|
||||||
void* AllocateExecutableMemory(int size, bool low = true);
|
void* AllocateExecutableMemory(size_t size, bool low = true);
|
||||||
void* AllocateMemoryPages(int size);
|
void* AllocateMemoryPages(size_t size);
|
||||||
void FreeMemoryPages(void* ptr, int size);
|
void FreeMemoryPages(void* ptr, size_t size);
|
||||||
void WriteProtectMemory(void* ptr, int size, bool executable = false);
|
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
|
||||||
void UnWriteProtectMemory(void* ptr, int size, bool allowExecute);
|
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute);
|
||||||
|
|
||||||
|
|
||||||
inline int GetPageSize() {return(4096);}
|
inline int GetPageSize() {return 4096;}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,33 +18,29 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "Thunk.h"
|
|
||||||
#include "x64Emitter.h"
|
#include "x64Emitter.h"
|
||||||
#include "MemoryUtil.h"
|
#include "MemoryUtil.h"
|
||||||
#include "ABI.h"
|
#include "ABI.h"
|
||||||
|
#include "Thunk.h"
|
||||||
|
|
||||||
using namespace Gen;
|
ThunkManager thunks;
|
||||||
|
|
||||||
#define THUNK_ARENA_SIZE 1024*1024*1
|
#define THUNK_ARENA_SIZE 1024*1024*1
|
||||||
|
|
||||||
namespace {
|
namespace
|
||||||
static std::map<void *, const u8 *> thunks;
|
|
||||||
u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
|
|
||||||
u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
|
|
||||||
|
|
||||||
static u8 *thunk_memory;
|
|
||||||
static u8 *thunk_code;
|
|
||||||
static const u8 *save_regs;
|
|
||||||
static const u8 *load_regs;
|
|
||||||
static u16 saved_mxcsr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Thunk_Init()
|
|
||||||
{
|
{
|
||||||
thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE);
|
|
||||||
thunk_code = thunk_memory;
|
|
||||||
|
|
||||||
GenContext ctx(&thunk_code);
|
static u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
|
||||||
|
static u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
|
||||||
|
static u16 saved_mxcsr;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
using namespace Gen;
|
||||||
|
|
||||||
|
void ThunkManager::Init()
|
||||||
|
{
|
||||||
|
AllocCodeSpace(THUNK_ARENA_SIZE);
|
||||||
save_regs = GetCodePtr();
|
save_regs = GetCodePtr();
|
||||||
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
|
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
|
||||||
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
|
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
|
||||||
@ -89,31 +85,27 @@ void Thunk_Init()
|
|||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Thunk_Reset()
|
void ThunkManager::Reset()
|
||||||
{
|
{
|
||||||
thunks.clear();
|
thunks.clear();
|
||||||
thunk_code = thunk_memory;
|
ResetCodePtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Thunk_Shutdown()
|
void ThunkManager::Shutdown()
|
||||||
{
|
{
|
||||||
Thunk_Reset();
|
Reset();
|
||||||
FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE);
|
FreeCodeSpace();
|
||||||
thunk_memory = 0;
|
|
||||||
thunk_code = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void *ProtectFunction(void *function, int num_params)
|
void *ThunkManager::ProtectFunction(void *function, int num_params)
|
||||||
{
|
{
|
||||||
std::map<void *, const u8 *>::iterator iter;
|
std::map<void *, const u8 *>::iterator iter;
|
||||||
iter = thunks.find(function);
|
iter = thunks.find(function);
|
||||||
if (iter != thunks.end())
|
if (iter != thunks.end())
|
||||||
return (void *)iter->second;
|
return (void *)iter->second;
|
||||||
|
if (!region)
|
||||||
if (!thunk_memory)
|
|
||||||
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
|
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
|
||||||
|
|
||||||
GenContext gen(&thunk_code);
|
|
||||||
const u8 *call_point = GetCodePtr();
|
const u8 *call_point = GetCodePtr();
|
||||||
// Make sure to align stack.
|
// Make sure to align stack.
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
|
@ -18,6 +18,11 @@
|
|||||||
#ifndef _THUNK_H
|
#ifndef _THUNK_H
|
||||||
#define _THUNK_H
|
#define _THUNK_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "Common.h"
|
||||||
|
#include "x64Emitter.h"
|
||||||
|
|
||||||
// This simple class creates a wrapper around a C/C++ function that saves all fp state
|
// This simple class creates a wrapper around a C/C++ function that saves all fp state
|
||||||
// before entering it, and restores it upon exit. This is required to be able to selectively
|
// before entering it, and restores it upon exit. This is required to be able to selectively
|
||||||
// call functions from generated code, without inflicting the performance hit and increase
|
// call functions from generated code, without inflicting the performance hit and increase
|
||||||
@ -30,10 +35,21 @@
|
|||||||
// NOT THREAD SAFE. This may only be used from the CPU thread.
|
// NOT THREAD SAFE. This may only be used from the CPU thread.
|
||||||
// Any other thread using this stuff will be FATAL.
|
// Any other thread using this stuff will be FATAL.
|
||||||
|
|
||||||
void Thunk_Init();
|
class ThunkManager : public Gen::XCodeBlock
|
||||||
void Thunk_Reset();
|
{
|
||||||
void Thunk_Shutdown();
|
std::map<void *, const u8 *> thunks;
|
||||||
|
|
||||||
void *ProtectFunction(void *function, int num_params);
|
const u8 *save_regs;
|
||||||
|
const u8 *load_regs;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void Init();
|
||||||
|
void Reset();
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
|
void *ProtectFunction(void *function, int num_params);
|
||||||
|
};
|
||||||
|
|
||||||
|
extern ThunkManager thunks;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -21,11 +21,13 @@
|
|||||||
#define _DOLPHIN_INTEL_CODEGEN
|
#define _DOLPHIN_INTEL_CODEGEN
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
#include "MemoryUtil.h"
|
||||||
|
|
||||||
namespace Gen
|
namespace Gen
|
||||||
{
|
{
|
||||||
enum X64Reg
|
|
||||||
{
|
enum X64Reg
|
||||||
|
{
|
||||||
EAX = 0, EBX = 3, ECX = 1, EDX = 2,
|
EAX = 0, EBX = 3, ECX = 1, EDX = 2,
|
||||||
ESI = 6, EDI = 7, EBP = 5, ESP = 4,
|
ESI = 6, EDI = 7, EBP = 5, ESP = 4,
|
||||||
|
|
||||||
@ -44,10 +46,10 @@ namespace Gen
|
|||||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
||||||
|
|
||||||
INVALID_REG = 0xFFFFFFFF
|
INVALID_REG = 0xFFFFFFFF
|
||||||
};
|
};
|
||||||
|
|
||||||
enum CCFlags
|
enum CCFlags
|
||||||
{
|
{
|
||||||
CC_O = 0,
|
CC_O = 0,
|
||||||
CC_NO = 1,
|
CC_NO = 1,
|
||||||
CC_B = 2, CC_C = 2, CC_NAE = 2,
|
CC_B = 2, CC_C = 2, CC_NAE = 2,
|
||||||
@ -64,16 +66,16 @@ namespace Gen
|
|||||||
CC_NL = 0xD, CC_GE = 0xD,
|
CC_NL = 0xD, CC_GE = 0xD,
|
||||||
CC_LE = 0xE, CC_NG = 0xE,
|
CC_LE = 0xE, CC_NG = 0xE,
|
||||||
CC_NLE = 0xF, CC_G = 0xF
|
CC_NLE = 0xF, CC_G = 0xF
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
NUMGPRs = 16,
|
NUMGPRs = 16,
|
||||||
NUMXMMs = 16,
|
NUMXMMs = 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
SCALE_NONE = 0,
|
SCALE_NONE = 0,
|
||||||
SCALE_1 = 1,
|
SCALE_1 = 1,
|
||||||
SCALE_2 = 2,
|
SCALE_2 = 2,
|
||||||
@ -85,37 +87,9 @@ namespace Gen
|
|||||||
SCALE_IMM16 = 0xF1,
|
SCALE_IMM16 = 0xF1,
|
||||||
SCALE_IMM32 = 0xF2,
|
SCALE_IMM32 = 0xF2,
|
||||||
SCALE_IMM64 = 0xF3,
|
SCALE_IMM64 = 0xF3,
|
||||||
};
|
};
|
||||||
|
|
||||||
void SetCodePtr(u8 *ptr);
|
enum NormalOp {
|
||||||
void ReserveCodeSpace(int bytes);
|
|
||||||
const u8 *AlignCode4();
|
|
||||||
const u8 *AlignCode16();
|
|
||||||
const u8 *AlignCodePage();
|
|
||||||
const u8 *GetCodePtr();
|
|
||||||
u8 *GetWritableCodePtr();
|
|
||||||
|
|
||||||
|
|
||||||
// Safe way to temporarily redirect the code generator.
|
|
||||||
class GenContext
|
|
||||||
{
|
|
||||||
u8 **code_ptr_ptr;
|
|
||||||
u8 *saved_ptr;
|
|
||||||
public:
|
|
||||||
GenContext(u8 **code_ptr_ptr_)
|
|
||||||
{
|
|
||||||
saved_ptr = GetWritableCodePtr();
|
|
||||||
code_ptr_ptr = code_ptr_ptr_;
|
|
||||||
SetCodePtr(*code_ptr_ptr);
|
|
||||||
}
|
|
||||||
~GenContext()
|
|
||||||
{
|
|
||||||
*code_ptr_ptr = GetWritableCodePtr();
|
|
||||||
SetCodePtr(saved_ptr);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
enum NormalOp {
|
|
||||||
nrmADD,
|
nrmADD,
|
||||||
nrmADC,
|
nrmADC,
|
||||||
nrmSUB,
|
nrmSUB,
|
||||||
@ -127,15 +101,14 @@ namespace Gen
|
|||||||
nrmTEST,
|
nrmTEST,
|
||||||
nrmCMP,
|
nrmCMP,
|
||||||
nrmXCHG,
|
nrmXCHG,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Make the generation routine examine which direction to go
|
class XEmitter;
|
||||||
// probably has to be a static
|
|
||||||
|
|
||||||
// RIP addressing does not benefit from micro op fusion on Core arch
|
// RIP addressing does not benefit from micro op fusion on Core arch
|
||||||
struct OpArg
|
struct OpArg
|
||||||
{
|
{
|
||||||
OpArg() {} //dummy op arg, used for storage
|
OpArg() {} // dummy op arg, used for storage
|
||||||
OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
|
OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
|
||||||
{
|
{
|
||||||
operandReg = 0;
|
operandReg = 0;
|
||||||
@ -145,14 +118,14 @@ namespace Gen
|
|||||||
//if scale == 0 never mind offseting
|
//if scale == 0 never mind offseting
|
||||||
offset = _offset;
|
offset = _offset;
|
||||||
}
|
}
|
||||||
void WriteRex(bool op64, int customOp = -1) const;
|
void WriteRex(XEmitter *emit, bool op64, int customOp = -1) const;
|
||||||
void WriteRest(int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF) const;
|
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF) const;
|
||||||
void WriteSingleByteOp(u8 op, X64Reg operandReg, int bits);
|
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
|
||||||
//This one is public - must be written to
|
// This one is public - must be written to
|
||||||
u64 offset; //use RIP-relative as much as possible - avoid 64-bit immediates at all costs
|
u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
|
||||||
u8 operandReg;
|
u8 operandReg;
|
||||||
|
|
||||||
void WriteNormalOp(bool toRM, NormalOp op, const OpArg &operand, int bits) const;
|
void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
|
||||||
bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;}
|
bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;}
|
||||||
bool IsSimpleReg() const {return scale == SCALE_NONE;}
|
bool IsSimpleReg() const {return scale == SCALE_NONE;}
|
||||||
bool IsSimpleReg(X64Reg reg) const {
|
bool IsSimpleReg(X64Reg reg) const {
|
||||||
@ -160,6 +133,7 @@ namespace Gen
|
|||||||
return false;
|
return false;
|
||||||
return GetSimpleReg() == reg;
|
return GetSimpleReg() == reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CanDoOpWith(const OpArg &other) const
|
bool CanDoOpWith(const OpArg &other) const
|
||||||
{
|
{
|
||||||
if (IsSimpleReg()) return true;
|
if (IsSimpleReg()) return true;
|
||||||
@ -178,6 +152,7 @@ namespace Gen
|
|||||||
default: return -1;
|
default: return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Reg GetSimpleReg() const
|
X64Reg GetSimpleReg() const
|
||||||
{
|
{
|
||||||
if (scale == SCALE_NONE)
|
if (scale == SCALE_NONE)
|
||||||
@ -185,160 +160,39 @@ namespace Gen
|
|||||||
else
|
else
|
||||||
return INVALID_REG;
|
return INVALID_REG;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
u8 scale;
|
u8 scale;
|
||||||
u8 offsetOrBaseReg;
|
u8 offsetOrBaseReg;
|
||||||
u8 indexReg;
|
u8 indexReg;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
|
inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
|
||||||
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
|
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
|
||||||
inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
|
inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
|
||||||
inline OpArg MDisp(X64Reg value, int offset) {
|
inline OpArg MDisp(X64Reg value, int offset) {
|
||||||
return OpArg((u32)offset, SCALE_ATREG, value); }
|
return OpArg((u32)offset, SCALE_ATREG, value); }
|
||||||
inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
|
inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
|
||||||
{
|
{
|
||||||
return OpArg(offset, scale, base, scaled);
|
return OpArg(offset, scale, base, scaled);
|
||||||
}
|
}
|
||||||
inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);}
|
inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);}
|
||||||
inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
|
inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
|
||||||
inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
|
inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
|
||||||
inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
|
inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
inline OpArg ImmPtr(void* imm) {return Imm64((u64)imm);}
|
inline OpArg ImmPtr(void* imm) {return Imm64((u64)imm);}
|
||||||
#else
|
#else
|
||||||
inline OpArg ImmPtr(void* imm) {return Imm32((u32)imm);}
|
inline OpArg ImmPtr(void* imm) {return Imm32((u32)imm);}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void INT3();
|
struct FixupBranch
|
||||||
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
|
{
|
||||||
void PAUSE();
|
|
||||||
void RET();
|
|
||||||
void STC();
|
|
||||||
void CLC();
|
|
||||||
void CMC();
|
|
||||||
void PUSH(X64Reg reg);
|
|
||||||
void POP(X64Reg reg);
|
|
||||||
void PUSH(int bits, const OpArg ®);
|
|
||||||
void POP(int bits, const OpArg ®);
|
|
||||||
void PUSHF();
|
|
||||||
void POPF();
|
|
||||||
|
|
||||||
typedef const u8* JumpTarget;
|
|
||||||
|
|
||||||
struct FixupBranch
|
|
||||||
{
|
|
||||||
u8 *ptr;
|
u8 *ptr;
|
||||||
int type; //0 = 8bit 1 = 32bit
|
int type; //0 = 8bit 1 = 32bit
|
||||||
};
|
};
|
||||||
|
|
||||||
FixupBranch J(bool force5bytes = false);
|
enum SSECompare
|
||||||
|
{
|
||||||
void JMP(const u8 * addr, bool force5Bytes = false);
|
|
||||||
void JMP(OpArg arg);
|
|
||||||
void JMPptr(const OpArg &arg);
|
|
||||||
void JMPself(); //infinite loop!
|
|
||||||
|
|
||||||
void CALL(void *fnptr);
|
|
||||||
void CALLptr(OpArg arg);
|
|
||||||
|
|
||||||
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
|
|
||||||
void J_CC(CCFlags conditionCode, JumpTarget target);
|
|
||||||
void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
|
|
||||||
|
|
||||||
void SetJumpTarget(const FixupBranch &branch);
|
|
||||||
|
|
||||||
//WARNING - INC and DEC slow on Intel Core, but not on AMD, since it creates
|
|
||||||
//false flags dependencies because they only update a subset of the flags
|
|
||||||
|
|
||||||
// ector - I hereby BAN inc and dec due to their horribleness :P
|
|
||||||
// void INC(int bits, OpArg arg);
|
|
||||||
// void DEC(int bits, OpArg arg);
|
|
||||||
|
|
||||||
void SETcc(CCFlags flag, OpArg dest);
|
|
||||||
// Note: CMOV brings small if any benefit on current cpus, unfortunately.
|
|
||||||
void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
|
|
||||||
|
|
||||||
void LFENCE();
|
|
||||||
void MFENCE();
|
|
||||||
void SFENCE();
|
|
||||||
|
|
||||||
void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
|
|
||||||
void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
|
|
||||||
|
|
||||||
//These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
|
|
||||||
|
|
||||||
void LAHF(); // 3 cycle vector path
|
|
||||||
void SAHF(); // direct path fast
|
|
||||||
|
|
||||||
//Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
|
|
||||||
//LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
|
|
||||||
|
|
||||||
//Actually REP MOVSD could be useful :P
|
|
||||||
|
|
||||||
void MOVNTI(int bits, OpArg dest, X64Reg src);
|
|
||||||
|
|
||||||
void MUL(int bits, OpArg src); //UNSIGNED
|
|
||||||
void DIV(int bits, OpArg src);
|
|
||||||
void IMUL(int bits, OpArg src); //SIGNED
|
|
||||||
void IDIV(int bits, OpArg src);
|
|
||||||
void IMUL(int bits, X64Reg regOp, OpArg src);
|
|
||||||
void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
|
|
||||||
|
|
||||||
|
|
||||||
void NEG(int bits, OpArg src);
|
|
||||||
void NOT(int bits, OpArg src);
|
|
||||||
|
|
||||||
void ROL(int bits, OpArg dest, OpArg shift);
|
|
||||||
void ROR(int bits, OpArg dest, OpArg shift);
|
|
||||||
void RCL(int bits, OpArg dest, OpArg shift);
|
|
||||||
void RCR(int bits, OpArg dest, OpArg shift);
|
|
||||||
void SHL(int bits, OpArg dest, OpArg shift);
|
|
||||||
void SHR(int bits, OpArg dest, OpArg shift);
|
|
||||||
void SAR(int bits, OpArg dest, OpArg shift);
|
|
||||||
|
|
||||||
|
|
||||||
void CWD(int bits = 16);
|
|
||||||
inline void CDQ() {CWD(32);}
|
|
||||||
inline void CQO() {CWD(64);}
|
|
||||||
void CBW(int bits = 8);
|
|
||||||
inline void CWDE() {CBW(16);}
|
|
||||||
inline void CDQE() {CBW(32);}
|
|
||||||
|
|
||||||
void LEA(int bits, X64Reg dest, OpArg src);
|
|
||||||
|
|
||||||
|
|
||||||
enum PrefetchLevel
|
|
||||||
{
|
|
||||||
PF_NTA, //Non-temporal (data used once and only once)
|
|
||||||
PF_T0, //All cache levels
|
|
||||||
PF_T1, //Levels 2+ (aliased to T0 on AMD)
|
|
||||||
PF_T2, //Levels 3+ (aliased to T0 on AMD)
|
|
||||||
};
|
|
||||||
void PREFETCH(PrefetchLevel level, OpArg arg);
|
|
||||||
|
|
||||||
|
|
||||||
void ADD (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void ADC (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void SUB (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void SBB (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void AND (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void OR (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void XOR (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void MOV (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void TEST(int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
void CMP (int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
|
|
||||||
// XCHG is SLOW and should be avoided.
|
|
||||||
//void XCHG(int bits, const OpArg &a1, const OpArg &a2);
|
|
||||||
|
|
||||||
void XCHG_AHAL();
|
|
||||||
void BSWAP(int bits, X64Reg reg);
|
|
||||||
void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
|
|
||||||
void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
|
|
||||||
|
|
||||||
enum SSECompare
|
|
||||||
{
|
|
||||||
EQ = 0,
|
EQ = 0,
|
||||||
LT,
|
LT,
|
||||||
LE,
|
LE,
|
||||||
@ -347,27 +201,198 @@ namespace Gen
|
|||||||
NLT,
|
NLT,
|
||||||
NLE,
|
NLE,
|
||||||
ORD,
|
ORD,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef const u8* JumpTarget;
|
||||||
|
|
||||||
|
class XEmitter
|
||||||
|
{
|
||||||
|
friend struct OpArg; // for Write8 etc
|
||||||
|
private:
|
||||||
|
u8 *code;
|
||||||
|
|
||||||
|
void Rex(int w, int r, int x, int b);
|
||||||
|
void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
|
||||||
|
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
|
||||||
|
void WriteMulDivType(int bits, OpArg src, int ext);
|
||||||
|
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
|
||||||
|
void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
|
||||||
|
void WriteMXCSR(OpArg arg, int ext);
|
||||||
|
void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
|
||||||
|
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
inline void Write8(u8 value) {*code++ = value;}
|
||||||
|
inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
|
||||||
|
inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
|
||||||
|
inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
|
||||||
|
|
||||||
|
public:
|
||||||
|
XEmitter() { code = NULL; }
|
||||||
|
XEmitter(u8 *code_ptr) { code = code_ptr; }
|
||||||
|
|
||||||
|
void WriteModRM(int mod, int rm, int reg);
|
||||||
|
void WriteSIB(int scale, int index, int base);
|
||||||
|
|
||||||
|
void SetCodePtr(u8 *ptr);
|
||||||
|
void ReserveCodeSpace(int bytes);
|
||||||
|
const u8 *AlignCode4();
|
||||||
|
const u8 *AlignCode16();
|
||||||
|
const u8 *AlignCodePage();
|
||||||
|
const u8 *GetCodePtr() const;
|
||||||
|
u8 *GetWritableCodePtr();
|
||||||
|
|
||||||
|
// Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
|
||||||
|
// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
|
||||||
|
// INC and DEC are slow on Intel Core, but not on AMD. They create a
|
||||||
|
// false flag dependency because they only update a subset of the flags.
|
||||||
|
// XCHG is SLOW and should be avoided.
|
||||||
|
|
||||||
|
// Debug breakpoint
|
||||||
|
void INT3();
|
||||||
|
|
||||||
|
// Do nothing
|
||||||
|
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
|
||||||
|
|
||||||
|
// Save energy in wait-loops on P4 only. Probably not too useful.
|
||||||
|
void PAUSE();
|
||||||
|
|
||||||
|
// Flag control
|
||||||
|
void STC();
|
||||||
|
void CLC();
|
||||||
|
void CMC();
|
||||||
|
|
||||||
|
// These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD!
|
||||||
|
void LAHF(); // 3 cycle vector path
|
||||||
|
void SAHF(); // direct path fast
|
||||||
|
|
||||||
|
|
||||||
|
// Stack control
|
||||||
|
void PUSH(X64Reg reg);
|
||||||
|
void POP(X64Reg reg);
|
||||||
|
void PUSH(int bits, const OpArg ®);
|
||||||
|
void POP(int bits, const OpArg ®);
|
||||||
|
void PUSHF();
|
||||||
|
void POPF();
|
||||||
|
|
||||||
|
// Flow control
|
||||||
|
void RET();
|
||||||
|
void RET_FAST();
|
||||||
|
void UD2();
|
||||||
|
FixupBranch J(bool force5bytes = false);
|
||||||
|
|
||||||
|
void JMP(const u8 * addr, bool force5Bytes = false);
|
||||||
|
void JMP(OpArg arg);
|
||||||
|
void JMPptr(const OpArg &arg);
|
||||||
|
void JMPself(); //infinite loop!
|
||||||
|
|
||||||
|
void CALL(const void *fnptr);
|
||||||
|
void CALLptr(OpArg arg);
|
||||||
|
|
||||||
|
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
|
||||||
|
void J_CC(CCFlags conditionCode, JumpTarget target);
|
||||||
|
void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
|
||||||
|
|
||||||
|
void SetJumpTarget(const FixupBranch &branch);
|
||||||
|
|
||||||
|
void SETcc(CCFlags flag, OpArg dest);
|
||||||
|
// Note: CMOV brings small if any benefit on current cpus.
|
||||||
|
void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
|
||||||
|
|
||||||
|
// Fences
|
||||||
|
void LFENCE();
|
||||||
|
void MFENCE();
|
||||||
|
void SFENCE();
|
||||||
|
|
||||||
|
// Bit scan
|
||||||
|
void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
|
||||||
|
void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
|
||||||
|
|
||||||
|
// Cache control
|
||||||
|
enum PrefetchLevel
|
||||||
|
{
|
||||||
|
PF_NTA, //Non-temporal (data used once and only once)
|
||||||
|
PF_T0, //All cache levels
|
||||||
|
PF_T1, //Levels 2+ (aliased to T0 on AMD)
|
||||||
|
PF_T2, //Levels 3+ (aliased to T0 on AMD)
|
||||||
};
|
};
|
||||||
|
void PREFETCH(PrefetchLevel level, OpArg arg);
|
||||||
|
void MOVNTI(int bits, OpArg dest, X64Reg src);
|
||||||
|
void MOVNTDQ(OpArg arg, X64Reg regOp);
|
||||||
|
void MOVNTPS(OpArg arg, X64Reg regOp);
|
||||||
|
void MOVNTPD(OpArg arg, X64Reg regOp);
|
||||||
|
|
||||||
|
// Multiplication / division
|
||||||
|
void MUL(int bits, OpArg src); //UNSIGNED
|
||||||
|
void IMUL(int bits, OpArg src); //SIGNED
|
||||||
|
void IMUL(int bits, X64Reg regOp, OpArg src);
|
||||||
|
void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
|
||||||
|
void DIV(int bits, OpArg src);
|
||||||
|
void IDIV(int bits, OpArg src);
|
||||||
|
|
||||||
|
// Shift
|
||||||
|
void ROL(int bits, OpArg dest, OpArg shift);
|
||||||
|
void ROR(int bits, OpArg dest, OpArg shift);
|
||||||
|
void RCL(int bits, OpArg dest, OpArg shift);
|
||||||
|
void RCR(int bits, OpArg dest, OpArg shift);
|
||||||
|
void SHL(int bits, OpArg dest, OpArg shift);
|
||||||
|
void SHR(int bits, OpArg dest, OpArg shift);
|
||||||
|
void SAR(int bits, OpArg dest, OpArg shift);
|
||||||
|
|
||||||
|
// Extend EAX into EDX in various ways
|
||||||
|
void CWD(int bits = 16);
|
||||||
|
inline void CDQ() {CWD(32);}
|
||||||
|
inline void CQO() {CWD(64);}
|
||||||
|
void CBW(int bits = 8);
|
||||||
|
inline void CWDE() {CBW(16);}
|
||||||
|
inline void CDQE() {CBW(32);}
|
||||||
|
|
||||||
|
// Load effective address
|
||||||
|
void LEA(int bits, X64Reg dest, OpArg src);
|
||||||
|
|
||||||
|
// Integer arithmetic
|
||||||
|
void NEG (int bits, OpArg src);
|
||||||
|
void ADD (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void ADC (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void SUB (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void SBB (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void AND (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void CMP (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
|
||||||
|
// Bit operations
|
||||||
|
void NOT (int bits, OpArg src);
|
||||||
|
void OR (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void XOR (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void MOV (int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void TEST(int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
|
||||||
|
// Are these useful at all? Consider removing.
|
||||||
|
void XCHG(int bits, const OpArg &a1, const OpArg &a2);
|
||||||
|
void XCHG_AHAL();
|
||||||
|
|
||||||
|
// Byte swapping (32 and 64-bit only).
|
||||||
|
void BSWAP(int bits, X64Reg reg);
|
||||||
|
|
||||||
|
// Sign/zero extension
|
||||||
|
void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
|
||||||
|
void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
|
||||||
|
|
||||||
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
|
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
|
||||||
void STMXCSR(OpArg memloc);
|
void STMXCSR(OpArg memloc);
|
||||||
void LDMXCSR(OpArg memloc);
|
void LDMXCSR(OpArg memloc);
|
||||||
|
|
||||||
// Regular SSE/SSE2 instructions
|
// Prefixes
|
||||||
|
void LOCK();
|
||||||
|
void REP();
|
||||||
|
void REPNE();
|
||||||
|
|
||||||
|
void FWAIT();
|
||||||
|
|
||||||
|
// SSE/SSE2: Floating point arithmetic
|
||||||
void ADDSS(X64Reg regOp, OpArg arg);
|
void ADDSS(X64Reg regOp, OpArg arg);
|
||||||
void ADDSD(X64Reg regOp, OpArg arg);
|
void ADDSD(X64Reg regOp, OpArg arg);
|
||||||
void SUBSS(X64Reg regOp, OpArg arg);
|
void SUBSS(X64Reg regOp, OpArg arg);
|
||||||
void SUBSD(X64Reg regOp, OpArg arg);
|
void SUBSD(X64Reg regOp, OpArg arg);
|
||||||
void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
|
|
||||||
void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
|
|
||||||
void ANDSS(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDSD(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDNSS(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDNSD(X64Reg regOp, OpArg arg);
|
|
||||||
void ORSS(X64Reg regOp, OpArg arg);
|
|
||||||
void ORSD(X64Reg regOp, OpArg arg);
|
|
||||||
void XORSS(X64Reg regOp, OpArg arg);
|
|
||||||
void XORSD(X64Reg regOp, OpArg arg);
|
|
||||||
void MULSS(X64Reg regOp, OpArg arg);
|
void MULSS(X64Reg regOp, OpArg arg);
|
||||||
void MULSD(X64Reg regOp, OpArg arg);
|
void MULSD(X64Reg regOp, OpArg arg);
|
||||||
void DIVSS(X64Reg regOp, OpArg arg);
|
void DIVSS(X64Reg regOp, OpArg arg);
|
||||||
@ -381,23 +406,25 @@ namespace Gen
|
|||||||
void RSQRTSS(X64Reg regOp, OpArg arg);
|
void RSQRTSS(X64Reg regOp, OpArg arg);
|
||||||
void RSQRTSD(X64Reg regOp, OpArg arg);
|
void RSQRTSD(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
void COMISS(X64Reg regOp, OpArg arg);
|
// SSE/SSE2: Floating point bitwise (yes)
|
||||||
void COMISD(X64Reg regOp, OpArg arg);
|
void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
|
||||||
|
void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
|
||||||
|
void ANDSS(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDSD(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDNSS(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDNSD(X64Reg regOp, OpArg arg);
|
||||||
|
void ORSS(X64Reg regOp, OpArg arg);
|
||||||
|
void ORSD(X64Reg regOp, OpArg arg);
|
||||||
|
void XORSS(X64Reg regOp, OpArg arg);
|
||||||
|
void XORSD(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
|
||||||
void ADDPS(X64Reg regOp, OpArg arg);
|
void ADDPS(X64Reg regOp, OpArg arg);
|
||||||
void ADDPD(X64Reg regOp, OpArg arg);
|
void ADDPD(X64Reg regOp, OpArg arg);
|
||||||
void SUBPS(X64Reg regOp, OpArg arg);
|
void SUBPS(X64Reg regOp, OpArg arg);
|
||||||
void SUBPD(X64Reg regOp, OpArg arg);
|
void SUBPD(X64Reg regOp, OpArg arg);
|
||||||
void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
|
void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
|
||||||
void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
|
void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
|
||||||
void ANDPS(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDPD(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDNPS(X64Reg regOp, OpArg arg);
|
|
||||||
void ANDNPD(X64Reg regOp, OpArg arg);
|
|
||||||
void ORPS(X64Reg regOp, OpArg arg);
|
|
||||||
void ORPD(X64Reg regOp, OpArg arg);
|
|
||||||
void XORPS(X64Reg regOp, OpArg arg);
|
|
||||||
void XORPD(X64Reg regOp, OpArg arg);
|
|
||||||
void MULPS(X64Reg regOp, OpArg arg);
|
void MULPS(X64Reg regOp, OpArg arg);
|
||||||
void MULPD(X64Reg regOp, OpArg arg);
|
void MULPD(X64Reg regOp, OpArg arg);
|
||||||
void DIVPS(X64Reg regOp, OpArg arg);
|
void DIVPS(X64Reg regOp, OpArg arg);
|
||||||
@ -410,16 +437,34 @@ namespace Gen
|
|||||||
void SQRTPD(X64Reg regOp, OpArg arg);
|
void SQRTPD(X64Reg regOp, OpArg arg);
|
||||||
void RSQRTPS(X64Reg regOp, OpArg arg);
|
void RSQRTPS(X64Reg regOp, OpArg arg);
|
||||||
void RSQRTPD(X64Reg regOp, OpArg arg);
|
void RSQRTPD(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
// SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
|
||||||
|
void ANDPS(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDPD(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDNPS(X64Reg regOp, OpArg arg);
|
||||||
|
void ANDNPD(X64Reg regOp, OpArg arg);
|
||||||
|
void ORPS(X64Reg regOp, OpArg arg);
|
||||||
|
void ORPD(X64Reg regOp, OpArg arg);
|
||||||
|
void XORPS(X64Reg regOp, OpArg arg);
|
||||||
|
void XORPD(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
// SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
|
||||||
void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
|
void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
|
||||||
void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
|
void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
|
||||||
|
|
||||||
|
// SSE/SSE2: Useful alternative to shuffle in some cases.
|
||||||
void MOVDDUP(X64Reg regOp, OpArg arg);
|
void MOVDDUP(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
void UNPCKLPD(X64Reg dest, OpArg src);
|
||||||
|
void UNPCKHPD(X64Reg dest, OpArg src);
|
||||||
|
|
||||||
|
// SSE/SSE2: Compares.
|
||||||
void COMISS(X64Reg regOp, OpArg arg);
|
void COMISS(X64Reg regOp, OpArg arg);
|
||||||
void COMISD(X64Reg regOp, OpArg arg);
|
void COMISD(X64Reg regOp, OpArg arg);
|
||||||
void UCOMISS(X64Reg regOp, OpArg arg);
|
void UCOMISS(X64Reg regOp, OpArg arg);
|
||||||
void UCOMISD(X64Reg regOp, OpArg arg);
|
void UCOMISD(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
// SSE/SSE2: Moves. Use the right data type for your data, in most cases.
|
||||||
void MOVAPS(X64Reg regOp, OpArg arg);
|
void MOVAPS(X64Reg regOp, OpArg arg);
|
||||||
void MOVAPD(X64Reg regOp, OpArg arg);
|
void MOVAPD(X64Reg regOp, OpArg arg);
|
||||||
void MOVAPS(OpArg arg, X64Reg regOp);
|
void MOVAPS(OpArg arg, X64Reg regOp);
|
||||||
@ -435,20 +480,20 @@ namespace Gen
|
|||||||
void MOVSS(OpArg arg, X64Reg regOp);
|
void MOVSS(OpArg arg, X64Reg regOp);
|
||||||
void MOVSD(OpArg arg, X64Reg regOp);
|
void MOVSD(OpArg arg, X64Reg regOp);
|
||||||
|
|
||||||
void MOVMSKPS(X64Reg dest, OpArg arg);
|
|
||||||
void MOVMSKPD(X64Reg dest, OpArg arg);
|
|
||||||
|
|
||||||
void MOVD_xmm(X64Reg dest, const OpArg &arg);
|
void MOVD_xmm(X64Reg dest, const OpArg &arg);
|
||||||
void MOVQ_xmm(X64Reg dest, OpArg arg);
|
void MOVQ_xmm(X64Reg dest, OpArg arg);
|
||||||
void MOVD_xmm(const OpArg &arg, X64Reg src);
|
void MOVD_xmm(const OpArg &arg, X64Reg src);
|
||||||
void MOVQ_xmm(OpArg arg, X64Reg src);
|
void MOVQ_xmm(OpArg arg, X64Reg src);
|
||||||
|
|
||||||
|
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
|
||||||
|
void MOVMSKPS(X64Reg dest, OpArg arg);
|
||||||
|
void MOVMSKPD(X64Reg dest, OpArg arg);
|
||||||
|
|
||||||
|
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
|
||||||
void MASKMOVDQU(X64Reg dest, X64Reg src);
|
void MASKMOVDQU(X64Reg dest, X64Reg src);
|
||||||
void LDDQU(X64Reg dest, OpArg src);
|
void LDDQU(X64Reg dest, OpArg src);
|
||||||
|
|
||||||
void UNPCKLPD(X64Reg dest, OpArg src);
|
// SSE/SSE2: Data type conversions.
|
||||||
void UNPCKHPD(X64Reg dest, OpArg src);
|
|
||||||
|
|
||||||
void CVTPS2PD(X64Reg dest, OpArg src);
|
void CVTPS2PD(X64Reg dest, OpArg src);
|
||||||
void CVTPD2PS(X64Reg dest, OpArg src);
|
void CVTPD2PS(X64Reg dest, OpArg src);
|
||||||
void CVTSS2SD(X64Reg dest, OpArg src);
|
void CVTSS2SD(X64Reg dest, OpArg src);
|
||||||
@ -458,7 +503,7 @@ namespace Gen
|
|||||||
void CVTPD2DQ(X64Reg regOp, OpArg arg);
|
void CVTPD2DQ(X64Reg regOp, OpArg arg);
|
||||||
void CVTDQ2PS(X64Reg regOp, const OpArg &arg);
|
void CVTDQ2PS(X64Reg regOp, const OpArg &arg);
|
||||||
|
|
||||||
//Integer SSE instructions
|
// SSE2: Packed integer instructions
|
||||||
void PACKSSDW(X64Reg dest, OpArg arg);
|
void PACKSSDW(X64Reg dest, OpArg arg);
|
||||||
void PACKSSWB(X64Reg dest, OpArg arg);
|
void PACKSSWB(X64Reg dest, OpArg arg);
|
||||||
//void PACKUSDW(X64Reg dest, OpArg arg);
|
//void PACKUSDW(X64Reg dest, OpArg arg);
|
||||||
@ -528,42 +573,138 @@ namespace Gen
|
|||||||
|
|
||||||
void RTDSC();
|
void RTDSC();
|
||||||
|
|
||||||
void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2);
|
// Utility functions
|
||||||
void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
|
// These only support u32 parameters, but that's enough for a lot of uses.
|
||||||
void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
|
// These will destroy the 1 or 2 first "parameter regs".
|
||||||
void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
|
void ABI_CallFunctionC(void *func, u32 param1);
|
||||||
|
void ABI_CallFunctionCC(void *func, u32 param1, u32 param2);
|
||||||
|
void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
|
||||||
|
|
||||||
|
// Pass a register as a paremeter.
|
||||||
|
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);
|
||||||
|
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2);
|
||||||
|
|
||||||
|
// A function that doesn't have any control over what it will do to regs,
|
||||||
|
// such as the dispatcher, should be surrounded by these.
|
||||||
|
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
|
||||||
|
// A function that doesn't know anything about it's surroundings, should
|
||||||
|
// be surrounded by these to establish a safe environment, where it can roam free.
|
||||||
|
// An example is a backpatch injected function.
|
||||||
|
void ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||||
|
void ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||||
|
|
||||||
|
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
|
||||||
|
void ABI_AlignStack(unsigned int frameSize);
|
||||||
|
void ABI_RestoreStack(unsigned int frameSize);
|
||||||
|
|
||||||
|
// Sets up a __cdecl function.
|
||||||
|
// Only x64 really needs the parameter.
|
||||||
|
void ABI_EmitPrologue(int maxCallParams);
|
||||||
|
void ABI_EmitEpilogue(int maxCallParams);
|
||||||
|
|
||||||
|
#ifdef _M_IX86
|
||||||
|
inline int ABI_GetNumXMMRegs() { return 8; }
|
||||||
|
#else
|
||||||
|
inline int ABI_GetNumXMMRegs() { return 16; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Strange call wrappers.
|
||||||
|
void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2);
|
||||||
|
void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
|
||||||
|
void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
|
||||||
|
void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
|
||||||
|
|
||||||
#if defined(_M_IX86) || !defined(_WIN32)
|
#if defined(_M_IX86) || !defined(_WIN32)
|
||||||
|
|
||||||
#define CallCdeclFunction3_I(a,b,c,d) CallCdeclFunction3((void *)(a), (b), (c), (d))
|
#define CallCdeclFunction3_I(a,b,c,d) CallCdeclFunction3((void *)(a), (b), (c), (d))
|
||||||
#define CallCdeclFunction4_I(a,b,c,d,e) CallCdeclFunction4((void *)(a), (b), (c), (d), (e))
|
#define CallCdeclFunction4_I(a,b,c,d,e) CallCdeclFunction4((void *)(a), (b), (c), (d), (e))
|
||||||
#define CallCdeclFunction5_I(a,b,c,d,e,f) CallCdeclFunction5((void *)(a), (b), (c), (d), (e), (f))
|
#define CallCdeclFunction5_I(a,b,c,d,e,f) CallCdeclFunction5((void *)(a), (b), (c), (d), (e), (f))
|
||||||
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) CallCdeclFunction6((void *)(a), (b), (c), (d), (e), (f), (g))
|
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) CallCdeclFunction6((void *)(a), (b), (c), (d), (e), (f), (g))
|
||||||
|
|
||||||
#define DECLARE_IMPORT(x)
|
#define DECLARE_IMPORT(x)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// Comments from VertexLoader.cpp about these horrors:
|
// Comments from VertexLoader.cpp about these horrors:
|
||||||
|
|
||||||
// This is a horrible hack that is necessary in 64-bit mode because Opengl32.dll is based way, way above the 32-bit
|
// This is a horrible hack that is necessary in 64-bit mode because Opengl32.dll is based way, way above the 32-bit
|
||||||
// address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we
|
// address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we
|
||||||
// want to grab the function pointers from the import table instead.
|
// want to grab the function pointers from the import table instead.
|
||||||
|
|
||||||
void ___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2);
|
void ___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2);
|
||||||
void ___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
|
void ___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
|
||||||
void ___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
|
void ___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
|
||||||
void ___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
|
void ___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
|
||||||
|
|
||||||
#define CallCdeclFunction3_I(a,b,c,d) ___CallCdeclImport3(&__imp_##a,b,c,d)
|
#define CallCdeclFunction3_I(a,b,c,d) ___CallCdeclImport3(&__imp_##a,b,c,d)
|
||||||
#define CallCdeclFunction4_I(a,b,c,d,e) ___CallCdeclImport4(&__imp_##a,b,c,d,e)
|
#define CallCdeclFunction4_I(a,b,c,d,e) ___CallCdeclImport4(&__imp_##a,b,c,d,e)
|
||||||
#define CallCdeclFunction5_I(a,b,c,d,e,f) ___CallCdeclImport5(&__imp_##a,b,c,d,e,f)
|
#define CallCdeclFunction5_I(a,b,c,d,e,f) ___CallCdeclImport5(&__imp_##a,b,c,d,e,f)
|
||||||
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) ___CallCdeclImport6(&__imp_##a,b,c,d,e,f,g)
|
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) ___CallCdeclImport6(&__imp_##a,b,c,d,e,f,g)
|
||||||
|
|
||||||
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
|
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
}; // class XEmitter
|
||||||
|
|
||||||
}
|
|
||||||
|
// Everything that needs to generate X86 code should inherit from this.
|
||||||
|
// You get memory management for free, plus, you can use all the MOV etc functions without
|
||||||
|
// having to prefix them with gen-> or something similar.
|
||||||
|
class XCodeBlock : public XEmitter
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
u8 *region;
|
||||||
|
size_t region_size;
|
||||||
|
|
||||||
|
public:
|
||||||
|
XCodeBlock() : region(NULL), region_size(0) {}
|
||||||
|
virtual ~XCodeBlock() { if (region) FreeCodeSpace(); }
|
||||||
|
|
||||||
|
// Call this before you generate any code.
|
||||||
|
void AllocCodeSpace(int size)
|
||||||
|
{
|
||||||
|
region_size = size;
|
||||||
|
region = (u8*)AllocateExecutableMemory(region_size);
|
||||||
|
SetCodePtr(region);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always clear code space with breakpoints, so that if someone accidentally executes
|
||||||
|
// uninitialized, it just breaks into the debugger.
|
||||||
|
void ClearCodeSpace()
|
||||||
|
{
|
||||||
|
// x86/64: 0xCC = breakpoint
|
||||||
|
memset(region, 0xCC, region_size);
|
||||||
|
ResetCodePtr();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
|
||||||
|
void FreeCodeSpace()
|
||||||
|
{
|
||||||
|
FreeMemoryPages(region, region_size);
|
||||||
|
region = NULL;
|
||||||
|
region_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot currently be undone. Will write protect the entire code region.
|
||||||
|
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
|
||||||
|
void WriteProtect()
|
||||||
|
{
|
||||||
|
WriteProtectMemory(region, region_size, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResetCodePtr()
|
||||||
|
{
|
||||||
|
SetCodePtr(region);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetSpaceLeft() const
|
||||||
|
{
|
||||||
|
return region_size - (GetCodePtr() - region);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -46,7 +46,7 @@ namespace HW
|
|||||||
{
|
{
|
||||||
CoreTiming::Init();
|
CoreTiming::Init();
|
||||||
|
|
||||||
Thunk_Init(); // not really hw, but this way we know it's inited early :P
|
thunks.Init(); // not really hw, but this way we know it's inited early :P
|
||||||
State_Init();
|
State_Init();
|
||||||
|
|
||||||
// Init the whole Hardware
|
// Init the whole Hardware
|
||||||
@ -88,7 +88,7 @@ namespace HW
|
|||||||
}
|
}
|
||||||
|
|
||||||
State_Shutdown();
|
State_Shutdown();
|
||||||
Thunk_Shutdown();
|
thunks.Shutdown();
|
||||||
CoreTiming::Shutdown();
|
CoreTiming::Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
|
|||||||
|
|
||||||
//We could emulate the memory accesses here, but then they would still be around to take up
|
//We could emulate the memory accesses here, but then they would still be around to take up
|
||||||
//execution resources. Instead, we backpatch into a generic memory call and retry.
|
//execution resources. Instead, we backpatch into a generic memory call and retry.
|
||||||
u8 *new_rip = jit.BackPatch(codePtr, accessType, emAddress, ctx);
|
const u8 *new_rip = jit.BackPatch(codePtr, accessType, emAddress, ctx);
|
||||||
|
|
||||||
// Rip/Eip needs to be updated.
|
// Rip/Eip needs to be updated.
|
||||||
if (new_rip)
|
if (new_rip)
|
||||||
|
@ -164,6 +164,8 @@ ps_adds1
|
|||||||
Jit64 jit;
|
Jit64 jit;
|
||||||
PPCAnalyst::CodeBuffer code_buffer(32000);
|
PPCAnalyst::CodeBuffer code_buffer(32000);
|
||||||
|
|
||||||
|
int CODE_SIZE = 1024*1024*16;
|
||||||
|
|
||||||
namespace CPUCompare
|
namespace CPUCompare
|
||||||
{
|
{
|
||||||
extern u32 m_BlockStart;
|
extern u32 m_BlockStart;
|
||||||
@ -171,6 +173,11 @@ namespace CPUCompare
|
|||||||
|
|
||||||
void Jit64::Init()
|
void Jit64::Init()
|
||||||
{
|
{
|
||||||
|
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||||
|
{
|
||||||
|
CODE_SIZE = 1024*1024*8*8;
|
||||||
|
}
|
||||||
|
|
||||||
jo.optimizeStack = true;
|
jo.optimizeStack = true;
|
||||||
jo.enableBlocklink = true; // Speed boost, but not 100% safe
|
jo.enableBlocklink = true; // Speed boost, but not 100% safe
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
@ -182,6 +189,23 @@ namespace CPUCompare
|
|||||||
jo.fpAccurateFlags = true;
|
jo.fpAccurateFlags = true;
|
||||||
jo.optimizeGatherPipe = true;
|
jo.optimizeGatherPipe = true;
|
||||||
jo.fastInterrupts = false;
|
jo.fastInterrupts = false;
|
||||||
|
|
||||||
|
gpr.SetEmitter(this);
|
||||||
|
fpr.SetEmitter(this);
|
||||||
|
|
||||||
|
trampolines.Init();
|
||||||
|
AllocCodeSpace(CODE_SIZE);
|
||||||
|
InitCache();
|
||||||
|
asm_routines.Init();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::Shutdown()
|
||||||
|
{
|
||||||
|
FreeCodeSpace();
|
||||||
|
ShutdownCache();
|
||||||
|
|
||||||
|
trampolines.Shutdown();
|
||||||
|
asm_routines.Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::WriteCallInterpreter(UGeckoInstruction _inst)
|
void Jit64::WriteCallInterpreter(UGeckoInstruction _inst)
|
||||||
@ -271,7 +295,7 @@ namespace CPUCompare
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, M(&PC), Imm32(destination));
|
MOV(32, M(&PC), Imm32(destination));
|
||||||
JMP(Asm::dispatcher, true);
|
JMP(asm_routines.dispatcher, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,7 +304,7 @@ namespace CPUCompare
|
|||||||
MOV(32, M(&PC), R(EAX));
|
MOV(32, M(&PC), R(EAX));
|
||||||
Cleanup();
|
Cleanup();
|
||||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||||
JMP(Asm::dispatcher, true);
|
JMP(asm_routines.dispatcher, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::WriteRfiExitDestInEAX()
|
void Jit64::WriteRfiExitDestInEAX()
|
||||||
@ -288,7 +312,7 @@ namespace CPUCompare
|
|||||||
MOV(32, M(&PC), R(EAX));
|
MOV(32, M(&PC), R(EAX));
|
||||||
Cleanup();
|
Cleanup();
|
||||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||||
JMP(Asm::testExceptions, true);
|
JMP(asm_routines.testExceptions, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::WriteExceptionExit(u32 exception)
|
void Jit64::WriteExceptionExit(u32 exception)
|
||||||
@ -296,7 +320,7 @@ namespace CPUCompare
|
|||||||
Cleanup();
|
Cleanup();
|
||||||
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception));
|
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception));
|
||||||
MOV(32, M(&PC), Imm32(js.compilerPC + 4));
|
MOV(32, M(&PC), Imm32(js.compilerPC + 4));
|
||||||
JMP(Asm::testExceptions, true);
|
JMP(asm_routines.testExceptions, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* Jit64::DoJit(u32 emaddress, JitBlock &b)
|
const u8* Jit64::DoJit(u32 emaddress, JitBlock &b)
|
||||||
@ -326,11 +350,13 @@ namespace CPUCompare
|
|||||||
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
|
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
|
||||||
FixupBranch skip = J_CC(CC_NBE);
|
FixupBranch skip = J_CC(CC_NBE);
|
||||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||||
JMP(Asm::doTiming, true); // downcount hit zero - go doTiming.
|
JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming.
|
||||||
SetJumpTarget(skip);
|
SetJumpTarget(skip);
|
||||||
|
|
||||||
const u8 *normalEntry = GetCodePtr();
|
const u8 *normalEntry = GetCodePtr();
|
||||||
if (ImHereDebug) CALL((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
|
|
||||||
|
if (ImHereDebug)
|
||||||
|
CALL((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
|
||||||
|
|
||||||
if (js.fpa.any)
|
if (js.fpa.any)
|
||||||
{
|
{
|
||||||
@ -338,7 +364,7 @@ namespace CPUCompare
|
|||||||
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit
|
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit
|
||||||
FixupBranch b1 = J_CC(CC_NZ);
|
FixupBranch b1 = J_CC(CC_NZ);
|
||||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||||
JMP(Asm::fpException, true);
|
JMP(asm_routines.fpException, true);
|
||||||
SetJumpTarget(b1);
|
SetJumpTarget(b1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -348,7 +374,7 @@ namespace CPUCompare
|
|||||||
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
|
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
|
||||||
FixupBranch b1 = J_CC(CC_Z);
|
FixupBranch b1 = J_CC(CC_Z);
|
||||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||||
JMP(Asm::testExceptions, true);
|
JMP(asm_routines.testExceptions, true);
|
||||||
SetJumpTarget(b1);
|
SetJumpTarget(b1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -404,7 +430,7 @@ namespace CPUCompare
|
|||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||||
{
|
{
|
||||||
js.fifoBytesThisBlock -= 32;
|
js.fifoBytesThisBlock -= 32;
|
||||||
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
CALL(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
PPCTables::CompileInstruction(ops[i].inst);
|
PPCTables::CompileInstruction(ops[i].inst);
|
||||||
|
@ -24,7 +24,9 @@
|
|||||||
|
|
||||||
#include "../PPCAnalyst.h"
|
#include "../PPCAnalyst.h"
|
||||||
#include "JitCache.h"
|
#include "JitCache.h"
|
||||||
|
#include "JitRegCache.h"
|
||||||
#include "x64Emitter.h"
|
#include "x64Emitter.h"
|
||||||
|
#include "x64Analyzer.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
@ -47,8 +49,24 @@ struct CONTEXT
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class Jit64
|
|
||||||
|
class TrampolineCache : public Gen::XCodeBlock
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
void Init();
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
|
const u8 *GetReadTrampoline(const InstructionInfo &info);
|
||||||
|
const u8 *GetWriteTrampoline(const InstructionInfo &info);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Jit64 : public Gen::XCodeBlock
|
||||||
|
{
|
||||||
|
TrampolineCache trampolines;
|
||||||
|
GPRRegCache gpr;
|
||||||
|
FPURegCache fpr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef void (*CompiledCode)();
|
typedef void (*CompiledCode)();
|
||||||
|
|
||||||
@ -157,7 +175,7 @@ public:
|
|||||||
bool RangeIntersect(int s1, int e1, int s2, int e2) const;
|
bool RangeIntersect(int s1, int e1, int s2, int e2) const;
|
||||||
bool IsInJitCode(const u8 *codePtr);
|
bool IsInJitCode(const u8 *codePtr);
|
||||||
|
|
||||||
u8 *BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx);
|
const u8 *BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx);
|
||||||
|
|
||||||
#define JIT_OPCODE 0
|
#define JIT_OPCODE 0
|
||||||
|
|
||||||
@ -165,6 +183,7 @@ public:
|
|||||||
const u8* DoJit(u32 emaddress, JitBlock &b);
|
const u8* DoJit(u32 emaddress, JitBlock &b);
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
|
void Shutdown();
|
||||||
|
|
||||||
// Utilities for use by opcodes
|
// Utilities for use by opcodes
|
||||||
|
|
||||||
@ -188,10 +207,10 @@ public:
|
|||||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||||
void JitClearCA();
|
void JitClearCA();
|
||||||
void JitSetCA();
|
void JitSetCA();
|
||||||
void tri_op(int d, int a, int b, bool reversible, void (*op)(Gen::X64Reg, Gen::OpArg));
|
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||||
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (*op)(Gen::X64Reg, Gen::OpArg));
|
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||||
|
|
||||||
|
|
||||||
// OPCODES
|
// OPCODES
|
||||||
|
@ -31,27 +31,12 @@
|
|||||||
#include "../../HW/CPUCompare.h"
|
#include "../../HW/CPUCompare.h"
|
||||||
#include "../../HW/GPFifo.h"
|
#include "../../HW/GPFifo.h"
|
||||||
#include "../../Core.h"
|
#include "../../Core.h"
|
||||||
|
#include "JitAsm.h"
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
int blocksExecuted;
|
int blocksExecuted;
|
||||||
|
|
||||||
namespace Asm
|
static int temp32;
|
||||||
{
|
|
||||||
const u8 *enterCode;
|
|
||||||
const u8 *testExceptions;
|
|
||||||
const u8 *fpException;
|
|
||||||
const u8 *doTiming;
|
|
||||||
const u8 *dispatcher;
|
|
||||||
const u8 *dispatcherNoCheck;
|
|
||||||
const u8 *dispatcherPcInEAX;
|
|
||||||
const u8 *computeRc;
|
|
||||||
const u8 *computeRcFp;
|
|
||||||
|
|
||||||
const u8 *fifoDirectWrite8;
|
|
||||||
const u8 *fifoDirectWrite16;
|
|
||||||
const u8 *fifoDirectWrite32;
|
|
||||||
const u8 *fifoDirectWriteFloat;
|
|
||||||
const u8 *fifoDirectWriteXmm64;
|
|
||||||
|
|
||||||
bool compareEnabled = false;
|
bool compareEnabled = false;
|
||||||
|
|
||||||
@ -72,16 +57,15 @@ static bool enableStatistics = false;
|
|||||||
//RBX - Base pointer of memory
|
//RBX - Base pointer of memory
|
||||||
//R15 - Pointer to array of block pointers
|
//R15 - Pointer to array of block pointers
|
||||||
|
|
||||||
|
AsmRoutineManager asm_routines;
|
||||||
|
|
||||||
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
||||||
// dynarec buffer
|
// dynarec buffer
|
||||||
// At this offset - 4, there is an int specifying the block number.
|
// At this offset - 4, there is an int specifying the block number.
|
||||||
|
|
||||||
|
|
||||||
void GenerateCommon();
|
|
||||||
|
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
void Generate()
|
void AsmRoutineManager::Generate()
|
||||||
{
|
{
|
||||||
enterCode = AlignCode16();
|
enterCode = AlignCode16();
|
||||||
PUSH(EBP);
|
PUSH(EBP);
|
||||||
@ -129,7 +113,6 @@ void Generate()
|
|||||||
ADD(32, M(&PowerPC::ppcState.DebugCount), Imm8(1));
|
ADD(32, M(&PowerPC::ppcState.DebugCount), Imm8(1));
|
||||||
}
|
}
|
||||||
//grab from list and jump to it
|
//grab from list and jump to it
|
||||||
//INT3();
|
|
||||||
MOV(32, R(EDX), ImmPtr(jit.GetCodePointers()));
|
MOV(32, R(EDX), ImmPtr(jit.GetCodePointers()));
|
||||||
JMPptr(MComplex(EDX, EAX, 4, 0));
|
JMPptr(MComplex(EDX, EAX, 4, 0));
|
||||||
SetJumpTarget(notfound);
|
SetJumpTarget(notfound);
|
||||||
@ -180,12 +163,14 @@ void Generate()
|
|||||||
|
|
||||||
#elif defined(_M_X64)
|
#elif defined(_M_X64)
|
||||||
|
|
||||||
void Generate()
|
void AsmRoutineManager::Generate()
|
||||||
{
|
{
|
||||||
enterCode = AlignCode16();
|
enterCode = AlignCode16();
|
||||||
|
|
||||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||||
|
|
||||||
|
if (!jit.GetCodePointers() || !Memory::base)
|
||||||
|
PanicAlert("Memory::base and jit.GetCodePointers() must return valid values");
|
||||||
MOV(64, R(RBX), Imm64((u64)Memory::base));
|
MOV(64, R(RBX), Imm64((u64)Memory::base));
|
||||||
MOV(64, R(R15), Imm64((u64)jit.GetCodePointers())); //It's below 2GB so 32 bits are good enough
|
MOV(64, R(R15), Imm64((u64)jit.GetCodePointers())); //It's below 2GB so 32 bits are good enough
|
||||||
const u8 *outerLoop = GetCodePtr();
|
const u8 *outerLoop = GetCodePtr();
|
||||||
@ -264,7 +249,7 @@ void Generate()
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GenFifoWrite(int size)
|
void AsmRoutineManager::GenFifoWrite(int size)
|
||||||
{
|
{
|
||||||
// Assume value in ABI_PARAM1
|
// Assume value in ABI_PARAM1
|
||||||
PUSH(ESI);
|
PUSH(ESI);
|
||||||
@ -287,8 +272,7 @@ void GenFifoWrite(int size)
|
|||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int temp32;
|
void AsmRoutineManager::GenFifoFloatWrite()
|
||||||
void GenFifoFloatWrite()
|
|
||||||
{
|
{
|
||||||
// Assume value in XMM0
|
// Assume value in XMM0
|
||||||
PUSH(ESI);
|
PUSH(ESI);
|
||||||
@ -306,7 +290,7 @@ void GenFifoFloatWrite()
|
|||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenFifoXmm64Write()
|
void AsmRoutineManager::GenFifoXmm64Write()
|
||||||
{
|
{
|
||||||
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||||
PUSH(ESI);
|
PUSH(ESI);
|
||||||
@ -319,7 +303,7 @@ void GenFifoXmm64Write()
|
|||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenerateCommon()
|
void AsmRoutineManager::GenerateCommon()
|
||||||
{
|
{
|
||||||
// USES_CR
|
// USES_CR
|
||||||
computeRc = AlignCode16();
|
computeRc = AlignCode16();
|
||||||
@ -364,5 +348,3 @@ void GenerateCommon()
|
|||||||
SetJumpTarget(skip_fast_write);
|
SetJumpTarget(skip_fast_write);
|
||||||
CALL((void *)&Memory::Write_U8);*/
|
CALL((void *)&Memory::Write_U8);*/
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Asm
|
|
||||||
|
@ -14,33 +14,71 @@
|
|||||||
|
|
||||||
// Official SVN repository and contact information can be found at
|
// Official SVN repository and contact information can be found at
|
||||||
// http://code.google.com/p/dolphin-emu/
|
// http://code.google.com/p/dolphin-emu/
|
||||||
|
|
||||||
#ifndef _JITASM_H
|
#ifndef _JITASM_H
|
||||||
#define _JITASM_H
|
#define _JITASM_H
|
||||||
|
|
||||||
namespace Asm
|
#include "x64Emitter.h"
|
||||||
|
|
||||||
|
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
|
||||||
|
// code at runtime. In the case of fixed code like this, after writing it, we write
|
||||||
|
// protect the memory, essentially making it work just like precompiled code.
|
||||||
|
|
||||||
|
// There are some advantages to this approach:
|
||||||
|
// 1) No need to setup an external assembler in the build.
|
||||||
|
// 2) Cross platform, as long as it's x86/x64.
|
||||||
|
// 3) Can optimize code at runtime for the specific CPU model.
|
||||||
|
// There aren't really any disadvantages other than having to maintain a x86 emitter,
|
||||||
|
// which we have to do anyway :)
|
||||||
|
//
|
||||||
|
// To add a new asm routine, just add another const here, and add the code to Generate.
|
||||||
|
// Also, possibly increase the size of the code buffer.
|
||||||
|
|
||||||
|
class AsmRoutineManager : public Gen::XCodeBlock
|
||||||
{
|
{
|
||||||
extern const u8 *enterCode;
|
private:
|
||||||
|
|
||||||
extern const u8 *dispatcher;
|
|
||||||
extern const u8 *dispatcherNoCheck;
|
|
||||||
extern const u8 *dispatcherPcInEAX;
|
|
||||||
|
|
||||||
extern const u8 *fpException;
|
|
||||||
extern const u8 *computeRc;
|
|
||||||
extern const u8 *computeRcFp;
|
|
||||||
extern const u8 *testExceptions;
|
|
||||||
extern const u8 *dispatchPcInEAX;
|
|
||||||
extern const u8 *doTiming;
|
|
||||||
|
|
||||||
extern const u8 *fifoDirectWrite8;
|
|
||||||
extern const u8 *fifoDirectWrite16;
|
|
||||||
extern const u8 *fifoDirectWrite32;
|
|
||||||
extern const u8 *fifoDirectWriteFloat;
|
|
||||||
extern const u8 *fifoDirectWriteXmm64;
|
|
||||||
|
|
||||||
extern bool compareEnabled;
|
|
||||||
void Generate();
|
void Generate();
|
||||||
}
|
void GenerateCommon();
|
||||||
|
void GenFifoWrite(int size);
|
||||||
|
void GenFifoFloatWrite();
|
||||||
|
void GenFifoXmm64Write();
|
||||||
|
|
||||||
|
public:
|
||||||
|
void Init() {
|
||||||
|
AllocCodeSpace(8192);
|
||||||
|
Generate();
|
||||||
|
WriteProtect();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Shutdown() {
|
||||||
|
FreeCodeSpace();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Public generated functions. Just CALL(M((void*)func)) them.
|
||||||
|
|
||||||
|
const u8 *enterCode;
|
||||||
|
|
||||||
|
const u8 *dispatcher;
|
||||||
|
const u8 *dispatcherNoCheck;
|
||||||
|
const u8 *dispatcherPcInEAX;
|
||||||
|
|
||||||
|
const u8 *fpException;
|
||||||
|
const u8 *computeRc;
|
||||||
|
const u8 *computeRcFp;
|
||||||
|
const u8 *testExceptions;
|
||||||
|
const u8 *dispatchPcInEAX;
|
||||||
|
const u8 *doTiming;
|
||||||
|
|
||||||
|
const u8 *fifoDirectWrite8;
|
||||||
|
const u8 *fifoDirectWrite16;
|
||||||
|
const u8 *fifoDirectWrite32;
|
||||||
|
const u8 *fifoDirectWriteFloat;
|
||||||
|
const u8 *fifoDirectWriteXmm64;
|
||||||
|
|
||||||
|
bool compareEnabled;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern AsmRoutineManager asm_routines;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -51,17 +51,105 @@ void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void TrampolineCache::Init()
|
||||||
|
{
|
||||||
|
AllocCodeSpace(1024 * 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TrampolineCache::Shutdown()
|
||||||
|
{
|
||||||
|
AllocCodeSpace(1024 * 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
|
||||||
|
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
|
||||||
|
{
|
||||||
|
if (GetSpaceLeft() < 1024)
|
||||||
|
PanicAlert("Trampoline cache full");
|
||||||
|
|
||||||
|
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||||
|
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||||
|
const u8 *trampoline = GetCodePtr();
|
||||||
|
#ifdef _M_X64
|
||||||
|
// It's a read. Easy.
|
||||||
|
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||||
|
if (addrReg != ABI_PARAM1)
|
||||||
|
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
|
||||||
|
if (info.displacement) {
|
||||||
|
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
||||||
|
}
|
||||||
|
switch (info.operandSize) {
|
||||||
|
case 4:
|
||||||
|
CALL(thunks.ProtectFunction((void *)&Memory::Read_U32, 1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||||
|
MOV(32, R(dataReg), R(EAX));
|
||||||
|
RET();
|
||||||
|
#endif
|
||||||
|
return trampoline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
|
||||||
|
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
|
||||||
|
{
|
||||||
|
if (GetSpaceLeft() < 1024)
|
||||||
|
PanicAlert("Trampoline cache full");
|
||||||
|
|
||||||
|
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||||
|
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||||
|
if (dataReg != EAX)
|
||||||
|
PanicAlert("Backpatch write - not through EAX");
|
||||||
|
|
||||||
|
const u8 *trampoline = GetCodePtr();
|
||||||
|
|
||||||
|
#ifdef _M_X64
|
||||||
|
|
||||||
|
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
|
||||||
|
// hardware access - we can take shortcuts.
|
||||||
|
//if (emAddress == 0xCC008000)
|
||||||
|
// PanicAlert("caught a fifo write");
|
||||||
|
CMP(32, R(addrReg), Imm32(0xCC008000));
|
||||||
|
FixupBranch skip_fast = J_CC(CC_NE, false);
|
||||||
|
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||||
|
CALL((void*)asm_routines.fifoDirectWrite32);
|
||||||
|
RET();
|
||||||
|
SetJumpTarget(skip_fast);
|
||||||
|
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||||
|
if (addrReg != ABI_PARAM1) {
|
||||||
|
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||||
|
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||||
|
} else {
|
||||||
|
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||||
|
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||||
|
}
|
||||||
|
if (info.displacement) {
|
||||||
|
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
||||||
|
}
|
||||||
|
switch (info.operandSize) {
|
||||||
|
case 4:
|
||||||
|
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||||
|
RET();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return trampoline;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// This generates some fairly heavy trampolines, but:
|
// This generates some fairly heavy trampolines, but:
|
||||||
// 1) It's really necessary. We don't know anything about the context.
|
// 1) It's really necessary. We don't know anything about the context.
|
||||||
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
|
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
|
||||||
// that many of them in a typical program/game.
|
// that many of them in a typical program/game.
|
||||||
u8 *Jit64::BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx)
|
const u8 *Jit64::BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx)
|
||||||
{
|
{
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
if (!IsInJitCode(codePtr))
|
if (!IsInJitCode(codePtr))
|
||||||
return 0; // this will become a regular crash real soon after this
|
return 0; // this will become a regular crash real soon after this
|
||||||
|
|
||||||
u8 *oldCodePtr = GetWritableCodePtr();
|
|
||||||
InstructionInfo info;
|
InstructionInfo info;
|
||||||
if (!DisassembleMov(codePtr, info, accessType)) {
|
if (!DisassembleMov(codePtr, info, accessType)) {
|
||||||
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
||||||
@ -81,108 +169,42 @@ u8 *Jit64::BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx)
|
|||||||
BackPatchError(StringFromFormat("BackPatch - no support for operand size %i", info.operandSize), codePtr, emAddress);
|
BackPatchError(StringFromFormat("BackPatch - no support for operand size %i", info.operandSize), codePtr, emAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
|
||||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
|
||||||
if (info.otherReg != RBX)
|
if (info.otherReg != RBX)
|
||||||
PanicAlert("BackPatch : Base reg not RBX."
|
PanicAlert("BackPatch : Base reg not RBX."
|
||||||
"\n\nAttempted to access %08x.", emAddress);
|
"\n\nAttempted to access %08x.", emAddress);
|
||||||
//if (accessType == OP_ACCESS_WRITE)
|
|
||||||
// PanicAlert("BackPatch : Currently only supporting reads."
|
|
||||||
// "\n\nAttempted to write to %08x.", emAddress);
|
|
||||||
|
|
||||||
// OK, let's write a trampoline, and a jump to it.
|
if (accessType == OP_ACCESS_WRITE)
|
||||||
// Later, let's share trampolines.
|
PanicAlert("BackPatch : Currently only supporting reads."
|
||||||
|
"\n\nAttempted to write to %08x.", emAddress);
|
||||||
|
|
||||||
// In the first iteration, we assume that all accesses are 32-bit. We also only deal with reads.
|
// In the first iteration, we assume that all accesses are 32-bit. We also only deal with reads.
|
||||||
// Next step - support writes, special case FIFO writes. Also, support 32-bit mode.
|
|
||||||
u8 *trampoline = trampolineCodePtr;
|
|
||||||
SetCodePtr(trampolineCodePtr);
|
|
||||||
|
|
||||||
if (accessType == 0)
|
if (accessType == 0)
|
||||||
{
|
{
|
||||||
// It's a read. Easy.
|
XEmitter emitter(codePtr);
|
||||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
|
||||||
if (addrReg != ABI_PARAM1)
|
|
||||||
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
|
|
||||||
if (info.displacement) {
|
|
||||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
|
||||||
}
|
|
||||||
switch (info.operandSize) {
|
|
||||||
case 4:
|
|
||||||
CALL(ProtectFunction((void *)&Memory::Read_U32, 1));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
|
||||||
MOV(32, R(dataReg), R(EAX));
|
|
||||||
RET();
|
|
||||||
trampolineCodePtr = GetWritableCodePtr();
|
|
||||||
|
|
||||||
SetCodePtr(codePtr);
|
|
||||||
int bswapNopCount;
|
int bswapNopCount;
|
||||||
// Check the following BSWAP for REX byte
|
// Check the following BSWAP for REX byte
|
||||||
if ((GetCodePtr()[info.instructionSize] & 0xF0) == 0x40)
|
if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
|
||||||
bswapNopCount = 3;
|
bswapNopCount = 3;
|
||||||
else
|
else
|
||||||
bswapNopCount = 2;
|
bswapNopCount = 2;
|
||||||
CALL(trampoline);
|
const u8 *trampoline = trampolines.GetReadTrampoline(info);
|
||||||
NOP((int)info.instructionSize + bswapNopCount - 5);
|
emitter.CALL((void *)trampoline);
|
||||||
SetCodePtr(oldCodePtr);
|
emitter.NOP((int)info.instructionSize + bswapNopCount - 5);
|
||||||
|
|
||||||
return codePtr;
|
return codePtr;
|
||||||
}
|
}
|
||||||
else if (accessType == 1)
|
else if (accessType == 1)
|
||||||
{
|
{
|
||||||
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
|
// TODO: special case FIFO writes. Also, support 32-bit mode.
|
||||||
// hardware access - we can take shortcuts.
|
// Also, debug this so that it actually works correctly :P
|
||||||
//if (emAddress == 0xCC008000)
|
XEmitter emitter(codePtr - 2);
|
||||||
// PanicAlert("caught a fifo write");
|
|
||||||
if (dataReg != EAX)
|
|
||||||
PanicAlert("Backpatch write - not through EAX");
|
|
||||||
CMP(32, R(addrReg), Imm32(0xCC008000));
|
|
||||||
FixupBranch skip_fast = J_CC(CC_NE, false);
|
|
||||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
|
||||||
CALL((void*)Asm::fifoDirectWrite32);
|
|
||||||
RET();
|
|
||||||
SetJumpTarget(skip_fast);
|
|
||||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
|
||||||
if (addrReg != ABI_PARAM1) {
|
|
||||||
//INT3();
|
|
||||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
|
||||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
|
||||||
} else {
|
|
||||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
|
||||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
|
||||||
}
|
|
||||||
if (info.displacement) {
|
|
||||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
|
||||||
}
|
|
||||||
switch (info.operandSize) {
|
|
||||||
case 4:
|
|
||||||
CALL(ProtectFunction((void *)&Memory::Write_U32, 2));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
|
||||||
RET();
|
|
||||||
|
|
||||||
trampolineCodePtr = GetWritableCodePtr();
|
|
||||||
|
|
||||||
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
|
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
|
||||||
SetCodePtr(codePtr - 2);
|
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
|
||||||
CALL(trampoline);
|
emitter.CALL((void *)trampoline);
|
||||||
NOP((int)info.instructionSize - 3);
|
emitter.NOP((int)info.instructionSize - 3);
|
||||||
if (info.instructionSize < 3)
|
if (info.instructionSize < 3)
|
||||||
PanicAlert("instruction too small");
|
PanicAlert("instruction too small");
|
||||||
SetCodePtr(oldCodePtr);
|
|
||||||
|
|
||||||
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
|
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
|
||||||
ctx->Rax = Common::swap32(ctx->Rax);
|
ctx->Rax = Common::swap32(ctx->Rax);
|
||||||
|
|
||||||
return codePtr - 2;
|
return codePtr - 2;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -56,19 +56,15 @@ using namespace Gen;
|
|||||||
op_agent_t agent;
|
op_agent_t agent;
|
||||||
#endif
|
#endif
|
||||||
static u8 *codeCache;
|
static u8 *codeCache;
|
||||||
static u8 *genFunctions;
|
|
||||||
static u8 *trampolineCache;
|
static u8 *trampolineCache;
|
||||||
u8 *trampolineCodePtr;
|
u8 *trampolineCodePtr;
|
||||||
#define INVALID_EXIT 0xFFFFFFFF
|
#define INVALID_EXIT 0xFFFFFFFF
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
//CODE_SIZE = 1024*1024*8,
|
|
||||||
GEN_SIZE = 4096,
|
|
||||||
TRAMPOLINE_SIZE = 1024*1024,
|
TRAMPOLINE_SIZE = 1024*1024,
|
||||||
//MAX_NUM_BLOCKS = 65536,
|
|
||||||
};
|
};
|
||||||
int CODE_SIZE = 1024*1024*16;
|
|
||||||
int MAX_NUM_BLOCKS = 65536*2;
|
int MAX_NUM_BLOCKS = 65536*2;
|
||||||
|
|
||||||
static u8 **blockCodePointers;
|
static u8 **blockCodePointers;
|
||||||
@ -89,36 +85,22 @@ using namespace Gen;
|
|||||||
|
|
||||||
void Jit64::InitCache()
|
void Jit64::InitCache()
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||||
{
|
{
|
||||||
CODE_SIZE = 1024*1024*8*8;
|
|
||||||
MAX_NUM_BLOCKS = 65536*8;
|
MAX_NUM_BLOCKS = 65536*8;
|
||||||
}
|
}
|
||||||
|
|
||||||
codeCache = (u8*)AllocateExecutableMemory(CODE_SIZE);
|
|
||||||
genFunctions = (u8*)AllocateExecutableMemory(GEN_SIZE);
|
|
||||||
trampolineCache = (u8*)AllocateExecutableMemory(TRAMPOLINE_SIZE);
|
|
||||||
trampolineCodePtr = trampolineCache;
|
|
||||||
|
|
||||||
#ifdef OPROFILE_REPORT
|
#ifdef OPROFILE_REPORT
|
||||||
agent = op_open_agent();
|
agent = op_open_agent();
|
||||||
#endif
|
#endif
|
||||||
blocks = new JitBlock[MAX_NUM_BLOCKS];
|
blocks = new JitBlock[MAX_NUM_BLOCKS];
|
||||||
blockCodePointers = new u8*[MAX_NUM_BLOCKS];
|
blockCodePointers = new u8*[MAX_NUM_BLOCKS];
|
||||||
|
|
||||||
ClearCache();
|
ClearCache();
|
||||||
SetCodePtr(genFunctions);
|
|
||||||
Asm::Generate();
|
|
||||||
// Protect the generated functions
|
|
||||||
WriteProtectMemory(genFunctions, GEN_SIZE, true);
|
|
||||||
SetCodePtr(codeCache);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::ShutdownCache()
|
void Jit64::ShutdownCache()
|
||||||
{
|
{
|
||||||
UnWriteProtectMemory(genFunctions, GEN_SIZE, true);
|
|
||||||
FreeMemoryPages(codeCache, CODE_SIZE);
|
|
||||||
FreeMemoryPages(genFunctions, GEN_SIZE);
|
|
||||||
FreeMemoryPages(trampolineCache, TRAMPOLINE_SIZE);
|
|
||||||
delete [] blocks;
|
delete [] blocks;
|
||||||
delete [] blockCodePointers;
|
delete [] blockCodePointers;
|
||||||
blocks = 0;
|
blocks = 0;
|
||||||
@ -135,21 +117,23 @@ using namespace Gen;
|
|||||||
{
|
{
|
||||||
Core::DisplayMessage("Cleared code cache.", 3000);
|
Core::DisplayMessage("Cleared code cache.", 3000);
|
||||||
// Is destroying the blocks really necessary?
|
// Is destroying the blocks really necessary?
|
||||||
for (int i = 0; i < numBlocks; i++) {
|
for (int i = 0; i < numBlocks; i++)
|
||||||
|
{
|
||||||
DestroyBlock(i, false);
|
DestroyBlock(i, false);
|
||||||
}
|
}
|
||||||
links_to.clear();
|
links_to.clear();
|
||||||
trampolineCodePtr = trampolineCache;
|
|
||||||
numBlocks = 0;
|
numBlocks = 0;
|
||||||
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
|
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
|
||||||
memset(codeCache, 0xCC, CODE_SIZE);
|
|
||||||
SetCodePtr(codeCache);
|
trampolines.ClearCodeSpace();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::DestroyBlocksWithFlag(BlockFlag death_flag)
|
void Jit64::DestroyBlocksWithFlag(BlockFlag death_flag)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < numBlocks; i++) {
|
for (int i = 0; i < numBlocks; i++)
|
||||||
if (blocks[i].flags & death_flag) {
|
{
|
||||||
|
if (blocks[i].flags & death_flag)
|
||||||
|
{
|
||||||
DestroyBlock(i, false);
|
DestroyBlock(i, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -190,10 +174,10 @@ using namespace Gen;
|
|||||||
|
|
||||||
const u8 *Jit64::Jit(u32 emAddress)
|
const u8 *Jit64::Jit(u32 emAddress)
|
||||||
{
|
{
|
||||||
if (GetCodePtr() >= codeCache + CODE_SIZE - 0x10000 || numBlocks >= MAX_NUM_BLOCKS - 1)
|
if (GetSpaceLeft() < 0x10000 || numBlocks >= MAX_NUM_BLOCKS - 1)
|
||||||
{
|
{
|
||||||
LOG(DYNA_REC, "JIT cache full - clearing.")
|
LOG(DYNA_REC, "JIT cache full - clearing.")
|
||||||
if(Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||||
{
|
{
|
||||||
PanicAlert("What? JIT cache still full - clearing.");
|
PanicAlert("What? JIT cache still full - clearing.");
|
||||||
}
|
}
|
||||||
@ -221,10 +205,8 @@ using namespace Gen;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *oldCodePtr = GetWritableCodePtr();
|
|
||||||
LinkBlock(numBlocks);
|
LinkBlock(numBlocks);
|
||||||
LinkBlockExits(numBlocks);
|
LinkBlockExits(numBlocks);
|
||||||
SetCodePtr(oldCodePtr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OPROFILE_REPORT
|
#ifdef OPROFILE_REPORT
|
||||||
@ -257,7 +239,7 @@ using namespace Gen;
|
|||||||
|
|
||||||
void Jit64::EnterFastRun()
|
void Jit64::EnterFastRun()
|
||||||
{
|
{
|
||||||
CompiledCode pExecAddr = (CompiledCode)Asm::enterCode;
|
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
|
||||||
pExecAddr();
|
pExecAddr();
|
||||||
//Will return when PowerPC::state changes
|
//Will return when PowerPC::state changes
|
||||||
}
|
}
|
||||||
@ -336,8 +318,8 @@ using namespace Gen;
|
|||||||
int destinationBlock = GetBlockNumberFromAddress(b.exitAddress[e]);
|
int destinationBlock = GetBlockNumberFromAddress(b.exitAddress[e]);
|
||||||
if (destinationBlock != -1)
|
if (destinationBlock != -1)
|
||||||
{
|
{
|
||||||
SetCodePtr(b.exitPtrs[e]);
|
XEmitter emit(b.exitPtrs[e]);
|
||||||
JMP(blocks[destinationBlock].checkedEntry, true);
|
emit.JMP(blocks[destinationBlock].checkedEntry, true);
|
||||||
b.linkStatus[e] = true;
|
b.linkStatus[e] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -345,6 +327,7 @@ using namespace Gen;
|
|||||||
}
|
}
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
void Jit64::LinkBlock(int i)
|
void Jit64::LinkBlock(int i)
|
||||||
{
|
{
|
||||||
LinkBlockExits(i);
|
LinkBlockExits(i);
|
||||||
@ -386,15 +369,15 @@ using namespace Gen;
|
|||||||
// Not entirely ideal, but .. pretty good.
|
// Not entirely ideal, but .. pretty good.
|
||||||
|
|
||||||
// TODO - make sure that the below stuff really is safe.
|
// TODO - make sure that the below stuff really is safe.
|
||||||
u8 *prev_code = GetWritableCodePtr();
|
|
||||||
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
||||||
SetCodePtr((u8*)b.checkedEntry);
|
XEmitter emit((u8*)b.checkedEntry);
|
||||||
MOV(32, M(&PC), Imm32(b.originalAddress));
|
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
|
||||||
JMP(Asm::dispatcher, true);
|
emit.JMP(asm_routines.dispatcher, true);
|
||||||
SetCodePtr(blockCodePointers[blocknum]);
|
|
||||||
MOV(32, M(&PC), Imm32(b.originalAddress));
|
emit.SetCodePtr(blockCodePointers[blocknum]);
|
||||||
JMP(Asm::dispatcher, true);
|
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
|
||||||
SetCodePtr(prev_code); // reset code pointer
|
emit.JMP(asm_routines.dispatcher, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,6 +19,6 @@
|
|||||||
|
|
||||||
#include "../Gekko.h"
|
#include "../Gekko.h"
|
||||||
|
|
||||||
// Will soon introduced the JitBlockCache class here.
|
// Will soon introduce the JitBlockCache class here.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -34,13 +34,12 @@ namespace JitCore
|
|||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
jit.Init();
|
jit.Init();
|
||||||
jit.InitCache();
|
asm_routines.compareEnabled = ::Core::g_CoreStartupParameter.bRunCompareClient;
|
||||||
Asm::compareEnabled = ::Core::g_CoreStartupParameter.bRunCompareClient;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shutdown()
|
void Shutdown()
|
||||||
{
|
{
|
||||||
jit.ShutdownCache();
|
jit.Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SingleStep()
|
void SingleStep()
|
||||||
|
@ -27,8 +27,6 @@ using namespace Gen;
|
|||||||
using namespace PowerPC;
|
using namespace PowerPC;
|
||||||
|
|
||||||
|
|
||||||
GPRRegCache gpr;
|
|
||||||
FPURegCache fpr;
|
|
||||||
|
|
||||||
void RegCache::Start(PPCAnalyst::BlockRegStats &stats)
|
void RegCache::Start(PPCAnalyst::BlockRegStats &stats)
|
||||||
{
|
{
|
||||||
@ -267,7 +265,7 @@ using namespace PowerPC;
|
|||||||
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
|
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
|
||||||
OpArg newloc = ::Gen::R(xr);
|
OpArg newloc = ::Gen::R(xr);
|
||||||
if (doLoad || regs[i].location.IsImm())
|
if (doLoad || regs[i].location.IsImm())
|
||||||
MOV(32, newloc, regs[i].location);
|
emit->MOV(32, newloc, regs[i].location);
|
||||||
for (int j = 0; j < 32; j++)
|
for (int j = 0; j < 32; j++)
|
||||||
{
|
{
|
||||||
if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr)
|
if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr)
|
||||||
@ -309,7 +307,7 @@ using namespace PowerPC;
|
|||||||
}
|
}
|
||||||
OpArg newLoc = GetDefaultLocation(i);
|
OpArg newLoc = GetDefaultLocation(i);
|
||||||
// if (doStore) //<-- Breaks JIT compilation
|
// if (doStore) //<-- Breaks JIT compilation
|
||||||
MOV(32, newLoc, regs[i].location);
|
emit->MOV(32, newLoc, regs[i].location);
|
||||||
regs[i].location = newLoc;
|
regs[i].location = newLoc;
|
||||||
regs[i].away = false;
|
regs[i].away = false;
|
||||||
}
|
}
|
||||||
@ -327,11 +325,13 @@ using namespace PowerPC;
|
|||||||
xregs[xr].free = false;
|
xregs[xr].free = false;
|
||||||
xregs[xr].dirty = makeDirty;
|
xregs[xr].dirty = makeDirty;
|
||||||
OpArg newloc = ::Gen::R(xr);
|
OpArg newloc = ::Gen::R(xr);
|
||||||
if (doLoad) {
|
if (doLoad)
|
||||||
if (!regs[i].location.IsImm() && (regs[i].location.offset & 0xF)) {
|
{
|
||||||
|
if (!regs[i].location.IsImm() && (regs[i].location.offset & 0xF))
|
||||||
|
{
|
||||||
PanicAlert("WARNING - misaligned fp register location %i", i);
|
PanicAlert("WARNING - misaligned fp register location %i", i);
|
||||||
}
|
}
|
||||||
MOVAPD(xr, regs[i].location);
|
emit->MOVAPD(xr, regs[i].location);
|
||||||
}
|
}
|
||||||
regs[i].location = newloc;
|
regs[i].location = newloc;
|
||||||
regs[i].away = true;
|
regs[i].away = true;
|
||||||
@ -352,7 +352,7 @@ using namespace PowerPC;
|
|||||||
xregs[xr].dirty = false;
|
xregs[xr].dirty = false;
|
||||||
xregs[xr].ppcReg = -1;
|
xregs[xr].ppcReg = -1;
|
||||||
OpArg newLoc = GetDefaultLocation(i);
|
OpArg newLoc = GetDefaultLocation(i);
|
||||||
MOVAPD(newLoc, xr);
|
emit->MOVAPD(newLoc, xr);
|
||||||
regs[i].location = newLoc;
|
regs[i].location = newLoc;
|
||||||
regs[i].away = false;
|
regs[i].away = false;
|
||||||
}
|
}
|
||||||
|
@ -73,9 +73,14 @@
|
|||||||
void DiscardRegContentsIfCached(int preg);
|
void DiscardRegContentsIfCached(int preg);
|
||||||
virtual const int *GetAllocationOrder(int &count) = 0;
|
virtual const int *GetAllocationOrder(int &count) = 0;
|
||||||
|
|
||||||
|
XEmitter *emit;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual ~RegCache() {}
|
virtual ~RegCache() {}
|
||||||
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
|
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
|
||||||
|
|
||||||
|
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
||||||
|
|
||||||
void FlushR(X64Reg reg);
|
void FlushR(X64Reg reg);
|
||||||
void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);}
|
void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);}
|
||||||
void FlushLockX(X64Reg reg) {
|
void FlushLockX(X64Reg reg) {
|
||||||
@ -142,8 +147,5 @@
|
|||||||
OpArg GetDefaultLocation(int reg) const;
|
OpArg GetDefaultLocation(int reg) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern GPRRegCache gpr;
|
|
||||||
extern FPURegCache fpr;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -33,39 +33,39 @@
|
|||||||
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (*op)(Gen::X64Reg, Gen::OpArg))
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
fpr.Lock(d, a, b);
|
||||||
if (d == a)
|
if (d == a)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, true);
|
fpr.LoadToX64(d, true);
|
||||||
op(fpr.RX(d), fpr.R(b));
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
else if (d == b && reversible)
|
else if (d == b && reversible)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, true);
|
fpr.LoadToX64(d, true);
|
||||||
op(fpr.RX(d), fpr.R(a));
|
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||||
}
|
}
|
||||||
else if (a != d && b != d)
|
else if (a != d && b != d)
|
||||||
{
|
{
|
||||||
// Sources different from d, can use rather quick solution
|
// Sources different from d, can use rather quick solution
|
||||||
fpr.LoadToX64(d, !dupe);
|
fpr.LoadToX64(d, !dupe);
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
op(fpr.RX(d), fpr.R(b));
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
else if (b != d)
|
else if (b != d)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, !dupe);
|
fpr.LoadToX64(d, !dupe);
|
||||||
MOVSD(XMM0, fpr.R(b));
|
MOVSD(XMM0, fpr.R(b));
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
op(fpr.RX(d), Gen::R(XMM0));
|
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||||
}
|
}
|
||||||
else // Other combo, must use two temps :(
|
else // Other combo, must use two temps :(
|
||||||
{
|
{
|
||||||
MOVSD(XMM0, fpr.R(a));
|
MOVSD(XMM0, fpr.R(a));
|
||||||
MOVSD(XMM1, fpr.R(b));
|
MOVSD(XMM1, fpr.R(b));
|
||||||
fpr.LoadToX64(d, !dupe);
|
fpr.LoadToX64(d, !dupe);
|
||||||
op(XMM0, Gen::R(XMM1));
|
(this->*op)(XMM0, Gen::R(XMM1));
|
||||||
MOVSD(fpr.RX(d), Gen::R(XMM0));
|
MOVSD(fpr.RX(d), Gen::R(XMM0));
|
||||||
}
|
}
|
||||||
if (dupe) {
|
if (dupe) {
|
||||||
@ -86,16 +86,16 @@
|
|||||||
bool dupe = inst.OPCD == 59;
|
bool dupe = inst.OPCD == 59;
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &DIVSD); break; //div
|
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
|
||||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &SUBSD); break; //sub
|
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
|
||||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &ADDSD); break; //add
|
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
|
||||||
case 23: //sel
|
case 23: //sel
|
||||||
Default(inst);
|
Default(inst);
|
||||||
break;
|
break;
|
||||||
case 24: //res
|
case 24: //res
|
||||||
Default(inst);
|
Default(inst);
|
||||||
break;
|
break;
|
||||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &MULSD); break; //mul
|
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
u32 And(u32 a, u32 b) {return a & b;}
|
u32 And(u32 a, u32 b) {return a & b;}
|
||||||
u32 Xor(u32 a, u32 b) {return a ^ b;}
|
u32 Xor(u32 a, u32 b) {return a ^ b;}
|
||||||
|
|
||||||
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
|
||||||
{
|
{
|
||||||
gpr.Lock(d, a);
|
gpr.Lock(d, a);
|
||||||
if (a || binary || carry) // yeh nasty special case addic
|
if (a || binary || carry) // yeh nasty special case addic
|
||||||
@ -57,7 +57,7 @@
|
|||||||
{
|
{
|
||||||
if (gpr.R(d).IsImm())
|
if (gpr.R(d).IsImm())
|
||||||
gpr.LoadToX64(d, false);
|
gpr.LoadToX64(d, false);
|
||||||
op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
|
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
|
||||||
if (carry)
|
if (carry)
|
||||||
GenerateCarry(EAX);
|
GenerateCarry(EAX);
|
||||||
}
|
}
|
||||||
@ -66,7 +66,7 @@
|
|||||||
{
|
{
|
||||||
gpr.LoadToX64(d, false);
|
gpr.LoadToX64(d, false);
|
||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
|
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
|
||||||
if (carry)
|
if (carry)
|
||||||
GenerateCarry(EAX);
|
GenerateCarry(EAX);
|
||||||
}
|
}
|
||||||
@ -84,7 +84,7 @@
|
|||||||
{
|
{
|
||||||
// Todo - special case immediates.
|
// Todo - special case immediates.
|
||||||
MOV(32, R(EAX), gpr.R(d));
|
MOV(32, R(EAX), gpr.R(d));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
@ -109,22 +109,22 @@
|
|||||||
MOV(32, gpr.R(d), gpr.R(a));
|
MOV(32, gpr.R(d), gpr.R(a));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
} else {
|
} else {
|
||||||
regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD); //addi
|
regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD); //addi
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, ADD); break; //addis
|
case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); break; //addis
|
||||||
case 24:
|
case 24:
|
||||||
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
|
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
|
||||||
{NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one.
|
{NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one.
|
||||||
regimmop(a, s, true, inst.UIMM, Or, OR);
|
regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR);
|
||||||
break; //ori
|
break; //ori
|
||||||
case 25: regimmop(a, s, true, inst.UIMM << 16, Or, OR, false); break;//oris
|
case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris
|
||||||
case 28: regimmop(a, s, true, inst.UIMM, And, AND, true); break;
|
case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break;
|
||||||
case 29: regimmop(a, s, true, inst.UIMM << 16, And, AND, true); break;
|
case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break;
|
||||||
case 26: regimmop(a, s, true, inst.UIMM, Xor, XOR, false); break; //xori
|
case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori
|
||||||
case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, XOR, false); break; //xoris
|
case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris
|
||||||
case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD, false, true); //addic
|
case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, false, true); //addic
|
||||||
case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, ADD, true, true); //addic_rc
|
case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, true, true); //addic_rc
|
||||||
default:
|
default:
|
||||||
Default(inst);
|
Default(inst);
|
||||||
break;
|
break;
|
||||||
@ -295,7 +295,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,7 +328,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,7 +353,7 @@
|
|||||||
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
// result is already in eax
|
// result is already in eax
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -374,7 +374,7 @@
|
|||||||
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
|
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -394,7 +394,7 @@
|
|||||||
MOVSX(32, 16, gpr.RX(a), gpr.R(s));
|
MOVSX(32, 16, gpr.RX(a), gpr.R(s));
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -474,7 +474,7 @@
|
|||||||
if (inst.OE) PanicAlert("OE: subfx");
|
if (inst.OE) PanicAlert("OE: subfx");
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
// result is already in eax
|
// result is already in eax
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -514,7 +514,7 @@
|
|||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
MOV(32, R(EAX), gpr.R(d));
|
MOV(32, R(EAX), gpr.R(d));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -544,7 +544,7 @@
|
|||||||
MOV(32, R(EAX), R(EDX));
|
MOV(32, R(EAX), R(EDX));
|
||||||
MOV(32, gpr.R(d), R(EDX));
|
MOV(32, gpr.R(d), R(EDX));
|
||||||
// result is already in eax
|
// result is already in eax
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
} else {
|
} else {
|
||||||
MOV(32, gpr.R(d), R(EDX));
|
MOV(32, gpr.R(d), R(EDX));
|
||||||
}
|
}
|
||||||
@ -570,7 +570,7 @@
|
|||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -606,7 +606,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(d));
|
MOV(32, R(EAX), gpr.R(d));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
@ -618,7 +618,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(d));
|
MOV(32, R(EAX), gpr.R(d));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
@ -630,7 +630,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(d));
|
MOV(32, R(EAX), gpr.R(d));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
@ -666,7 +666,7 @@
|
|||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -730,7 +730,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -767,7 +767,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -799,7 +799,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -821,7 +821,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -851,7 +851,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -881,7 +881,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -929,7 +929,7 @@
|
|||||||
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -975,7 +975,7 @@
|
|||||||
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1006,7 +1006,7 @@
|
|||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
CALL((u8*)Asm::computeRc);
|
CALL((u8*)asm_routines.computeRc);
|
||||||
// TODO: Check PPC manual too
|
// TODO: Check PPC manual too
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,7 +144,7 @@
|
|||||||
fpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||||
MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC + 12));
|
MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC + 12));
|
||||||
JMP(Asm::testExceptions, true);
|
JMP(asm_routines.testExceptions, true);
|
||||||
js.compilerPC += 8;
|
js.compilerPC += 8;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -287,14 +287,13 @@
|
|||||||
gpr.SetImmediate32(a, addr);
|
gpr.SetImmediate32(a, addr);
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||||
// INT3();
|
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
// No need to protect these, they don't touch any state
|
// No need to protect these, they don't touch any state
|
||||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||||
case 8: CALL((void *)Asm::fifoDirectWrite8); break;
|
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
|
||||||
case 16: CALL((void *)Asm::fifoDirectWrite16); break;
|
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
|
||||||
case 32: CALL((void *)Asm::fifoDirectWrite32); break;
|
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
|
||||||
}
|
}
|
||||||
js.fifoBytesThisBlock += accessSize >> 3;
|
js.fifoBytesThisBlock += accessSize >> 3;
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
@ -377,9 +376,9 @@
|
|||||||
SetJumpTarget(unsafe_addr);
|
SetJumpTarget(unsafe_addr);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
case 32: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
|
||||||
case 16: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
|
||||||
case 8: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
|
||||||
}
|
}
|
||||||
SetJumpTarget(skip_call);
|
SetJumpTarget(skip_call);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
@ -402,7 +401,6 @@
|
|||||||
//return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
|
//return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
|
||||||
gpr.FlushLockX(ECX, EDX);
|
gpr.FlushLockX(ECX, EDX);
|
||||||
gpr.FlushLockX(ESI);
|
gpr.FlushLockX(ESI);
|
||||||
//INT3();
|
|
||||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
|
@ -242,7 +242,7 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||||||
{
|
{
|
||||||
// Float directly to write gather pipe! Fun!
|
// Float directly to write gather pipe! Fun!
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
CALL((void*)Asm::fifoDirectWriteFloat);
|
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
||||||
// TODO
|
// TODO
|
||||||
js.fifoBytesThisBlock += 4;
|
js.fifoBytesThisBlock += 4;
|
||||||
return;
|
return;
|
||||||
|
@ -161,7 +161,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
#endif
|
#endif
|
||||||
FixupBranch skip_call = J();
|
FixupBranch skip_call = J();
|
||||||
SetJumpTarget(argh);
|
SetJumpTarget(argh);
|
||||||
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||||
SetJumpTarget(skip_call);
|
SetJumpTarget(skip_call);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
@ -184,7 +184,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
// Writing to FIFO. Let's do fast method.
|
// Writing to FIFO. Let's do fast method.
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
CVTPD2PS(XMM0, fpr.R(s));
|
||||||
PSHUFB(XMM0, M((void*)&pbswapShuffle2x4));
|
PSHUFB(XMM0, M((void*)&pbswapShuffle2x4));
|
||||||
CALL((void*)Asm::fifoDirectWriteXmm64);
|
CALL((void*)asm_routines.fifoDirectWriteXmm64);
|
||||||
js.fifoBytesThisBlock += 8;
|
js.fifoBytesThisBlock += 8;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -211,7 +211,7 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||||
FixupBranch arg2 = J();
|
FixupBranch arg2 = J();
|
||||||
SetJumpTarget(argh);
|
SetJumpTarget(argh);
|
||||||
CALL(ProtectFunction((void *)&WriteDual32, 0));
|
CALL(thunks.ProtectFunction((void *)&WriteDual32, 0));
|
||||||
#else
|
#else
|
||||||
FixupBranch argh = J_CC(CC_NZ);
|
FixupBranch argh = J_CC(CC_NZ);
|
||||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||||
@ -224,10 +224,10 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||||||
FixupBranch arg2 = J();
|
FixupBranch arg2 = J();
|
||||||
SetJumpTarget(argh);
|
SetJumpTarget(argh);
|
||||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||||
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
|
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
|
||||||
ADD(32, R(ABI_PARAM2), Imm32(4));
|
ADD(32, R(ABI_PARAM2), Imm32(4));
|
||||||
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||||
#endif
|
#endif
|
||||||
SetJumpTarget(arg2);
|
SetJumpTarget(arg2);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
@ -424,7 +424,6 @@ void Jit64::psq_l(UGeckoInstruction inst)
|
|||||||
#endif
|
#endif
|
||||||
BSWAP(32, EAX);
|
BSWAP(32, EAX);
|
||||||
MOV(32, M(&temp64), R(EAX));
|
MOV(32, M(&temp64), R(EAX));
|
||||||
//INT3();
|
|
||||||
fpr.LoadToX64(inst.RS, false, true);
|
fpr.LoadToX64(inst.RS, false, true);
|
||||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||||
MOVD_xmm(XMM0, M(&temp64));
|
MOVD_xmm(XMM0, M(&temp64));
|
||||||
|
@ -163,40 +163,40 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
//There's still a little bit more optimization that can be squeezed out of this
|
//There's still a little bit more optimization that can be squeezed out of this
|
||||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (*op)(X64Reg, OpArg))
|
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
fpr.Lock(d, a, b);
|
||||||
|
|
||||||
if (d == a)
|
if (d == a)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, true);
|
fpr.LoadToX64(d, true);
|
||||||
op(fpr.RX(d), fpr.R(b));
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
else if (d == b && reversible)
|
else if (d == b && reversible)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, true);
|
fpr.LoadToX64(d, true);
|
||||||
op(fpr.RX(d), fpr.R(a));
|
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||||
}
|
}
|
||||||
else if (a != d && b != d)
|
else if (a != d && b != d)
|
||||||
{
|
{
|
||||||
//sources different from d, can use rather quick solution
|
//sources different from d, can use rather quick solution
|
||||||
fpr.LoadToX64(d, false);
|
fpr.LoadToX64(d, false);
|
||||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||||
op(fpr.RX(d), fpr.R(b));
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
else if (b != d)
|
else if (b != d)
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(d, false);
|
fpr.LoadToX64(d, false);
|
||||||
MOVAPD(XMM0, fpr.R(b));
|
MOVAPD(XMM0, fpr.R(b));
|
||||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||||
op(fpr.RX(d), Gen::R(XMM0));
|
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||||
}
|
}
|
||||||
else //Other combo, must use two temps :(
|
else //Other combo, must use two temps :(
|
||||||
{
|
{
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
fpr.LoadToX64(d, false);
|
fpr.LoadToX64(d, false);
|
||||||
op(XMM0, Gen::R(XMM1));
|
(this->*op)(XMM0, Gen::R(XMM1));
|
||||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||||
}
|
}
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
@ -213,16 +213,16 @@
|
|||||||
}
|
}
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &DIVPD); break; //div
|
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &SUBPD); break; //sub
|
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &ADDPD); break; //add
|
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||||
case 23://sel
|
case 23://sel
|
||||||
Default(inst);
|
Default(inst);
|
||||||
break;
|
break;
|
||||||
case 24://res
|
case 24://res
|
||||||
Default(inst);
|
Default(inst);
|
||||||
break;
|
break;
|
||||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &MULPD); break; //mul
|
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||||
}
|
}
|
||||||
|
@ -76,9 +76,9 @@ void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signEx
|
|||||||
FixupBranch argh = J_CC(CC_Z);
|
FixupBranch argh = J_CC(CC_Z);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
|
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
|
||||||
case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
|
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
|
||||||
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
||||||
}
|
}
|
||||||
if (signExtend && accessSize < 32) {
|
if (signExtend && accessSize < 32) {
|
||||||
// Need to sign extend values coming from the Read_U* functions.
|
// Need to sign extend values coming from the Read_U* functions.
|
||||||
@ -114,7 +114,7 @@ void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize,
|
|||||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
|
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
|
||||||
FixupBranch skip_call = J();
|
FixupBranch skip_call = J();
|
||||||
SetJumpTarget(unsafe_addr);
|
SetJumpTarget(unsafe_addr);
|
||||||
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||||
SetJumpTarget(skip_call);
|
SetJumpTarget(skip_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -463,7 +463,7 @@ void BPWritten(int addr, int changes, int newval)
|
|||||||
{
|
{
|
||||||
// the number of lines copied is determined by the y scale * source efb height
|
// the number of lines copied is determined by the y scale * source efb height
|
||||||
float yScale = bpmem.dispcopyyscale / 256.0f;
|
float yScale = bpmem.dispcopyyscale / 256.0f;
|
||||||
float xfbLines = bpmem.copyTexSrcWH.y + 1.0 * yScale;
|
float xfbLines = bpmem.copyTexSrcWH.y + 1.0f * yScale;
|
||||||
XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), (int)xfbLines);
|
XFB_Write(Memory_GetPtr(bpmem.copyTexDest<<5), multirc, (bpmem.copyMipMapStrideChannels << 4), (int)xfbLines);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -82,68 +82,68 @@ void NativeVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_JIT
|
#ifdef USE_JIT
|
||||||
|
Gen::XEmitter emit(m_compiledCode);
|
||||||
// Alright, we have our vertex declaration. Compile some crazy code to set it quickly using GL.
|
// Alright, we have our vertex declaration. Compile some crazy code to set it quickly using GL.
|
||||||
u8 *old_code_ptr = GetWritableCodePtr();
|
emit.ABI_EmitPrologue(6);
|
||||||
SetCodePtr(m_compiledCode);
|
|
||||||
ABI_EmitPrologue(6);
|
|
||||||
|
|
||||||
CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, _vtx_decl.stride, 0);
|
emit.CallCdeclFunction4_I(glVertexPointer, 3, GL_FLOAT, _vtx_decl.stride, 0);
|
||||||
|
|
||||||
if (_vtx_decl.num_normals >= 1) {
|
if (_vtx_decl.num_normals >= 1) {
|
||||||
CallCdeclFunction3_I(glNormalPointer, VarToGL(_vtx_decl.normal_gl_type), _vtx_decl.stride, _vtx_decl.normal_offset[0]);
|
emit.CallCdeclFunction3_I(glNormalPointer, VarToGL(_vtx_decl.normal_gl_type), _vtx_decl.stride, _vtx_decl.normal_offset[0]);
|
||||||
if (_vtx_decl.num_normals == 3) {
|
if (_vtx_decl.num_normals == 3) {
|
||||||
CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM1_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[1]);
|
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM1_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[1]);
|
||||||
CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM2_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[2]);
|
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_NORM2_ATTRIB, _vtx_decl.normal_gl_size, VarToGL(_vtx_decl.normal_gl_type), GL_TRUE, _vtx_decl.stride, _vtx_decl.normal_offset[2]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++) {
|
for (int i = 0; i < 2; i++) {
|
||||||
if (_vtx_decl.color_offset[i] != -1) {
|
if (_vtx_decl.color_offset[i] != -1) {
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
CallCdeclFunction4_I(glColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
|
emit.CallCdeclFunction4_I(glColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
|
||||||
else
|
else
|
||||||
CallCdeclFunction4((void *)glSecondaryColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
|
emit.CallCdeclFunction4((void *)glSecondaryColorPointer, 4, GL_UNSIGNED_BYTE, _vtx_decl.stride, _vtx_decl.color_offset[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++)
|
||||||
if (_vtx_decl.texcoord_offset[i] != -1) {
|
{
|
||||||
|
if (_vtx_decl.texcoord_offset[i] != -1)
|
||||||
|
{
|
||||||
int id = GL_TEXTURE0 + i;
|
int id = GL_TEXTURE0 + i;
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
MOV(32, R(RCX), Imm32(id));
|
emit.MOV(32, R(RCX), Imm32(id));
|
||||||
#else
|
#else
|
||||||
MOV(32, R(RDI), Imm32(id));
|
emit.MOV(32, R(RDI), Imm32(id));
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
ABI_AlignStack(1 * 4);
|
emit.ABI_AlignStack(1 * 4);
|
||||||
PUSH(32, Imm32(id));
|
emit.PUSH(32, Imm32(id));
|
||||||
#endif
|
#endif
|
||||||
CALL((void *)glClientActiveTexture);
|
emit.CALL((void *)glClientActiveTexture);
|
||||||
#ifndef _M_X64
|
#ifndef _M_X64
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// don't inc stack on windows, stdcall
|
// don't inc stack on windows, stdcall
|
||||||
#else
|
#else
|
||||||
ABI_RestoreStack(1 * 4);
|
emit.ABI_RestoreStack(1 * 4);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
CallCdeclFunction4_I(
|
emit.CallCdeclFunction4_I(
|
||||||
glTexCoordPointer, _vtx_decl.texcoord_size[i], VarToGL(_vtx_decl.texcoord_gl_type[i]),
|
glTexCoordPointer, _vtx_decl.texcoord_size[i], VarToGL(_vtx_decl.texcoord_gl_type[i]),
|
||||||
_vtx_decl.stride, _vtx_decl.texcoord_offset[i]);
|
_vtx_decl.stride, _vtx_decl.texcoord_offset[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_vtx_decl.posmtx_offset != -1) {
|
if (_vtx_decl.posmtx_offset != -1) {
|
||||||
CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, _vtx_decl.stride, _vtx_decl.posmtx_offset);
|
emit.CallCdeclFunction6((void *)glVertexAttribPointer, SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, _vtx_decl.stride, _vtx_decl.posmtx_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
ABI_EmitEpilogue(6);
|
emit.ABI_EmitEpilogue(6);
|
||||||
if (Gen::GetCodePtr() - (u8*)m_compiledCode > COMPILED_CODE_SIZE)
|
if (emit.GetCodePtr() - (u8*)m_compiledCode > COMPILED_CODE_SIZE)
|
||||||
{
|
{
|
||||||
Crash();
|
Crash();
|
||||||
}
|
}
|
||||||
|
|
||||||
SetCodePtr(old_code_ptr);
|
|
||||||
#endif
|
#endif
|
||||||
this->vtx_decl = _vtx_decl;
|
this->vtx_decl = _vtx_decl;
|
||||||
}
|
}
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
|
|
||||||
#define USE_JIT
|
#define USE_JIT
|
||||||
|
|
||||||
#define COMPILED_CODE_SIZE 4096*4
|
#define COMPILED_CODE_SIZE 4096
|
||||||
|
|
||||||
NativeVertexFormat *g_nativeVertexFmt;
|
NativeVertexFormat *g_nativeVertexFmt;
|
||||||
|
|
||||||
@ -116,6 +116,7 @@ void LOADERDECL TexMtx_Write_Short3()
|
|||||||
|
|
||||||
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||||
{
|
{
|
||||||
|
m_compiledCode = NULL;
|
||||||
m_numLoadedVertices = 0;
|
m_numLoadedVertices = 0;
|
||||||
m_VertexSize = 0;
|
m_VertexSize = 0;
|
||||||
m_numPipelineStages = 0;
|
m_numPipelineStages = 0;
|
||||||
@ -126,16 +127,14 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
|||||||
m_VtxDesc = vtx_desc;
|
m_VtxDesc = vtx_desc;
|
||||||
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
|
SetVAT(vtx_attr.g0.Hex, vtx_attr.g1.Hex, vtx_attr.g2.Hex);
|
||||||
|
|
||||||
m_compiledCode = (u8 *)AllocateExecutableMemory(COMPILED_CODE_SIZE, false);
|
AllocCodeSpace(COMPILED_CODE_SIZE);
|
||||||
if (m_compiledCode) {
|
|
||||||
memset(m_compiledCode, 0, COMPILED_CODE_SIZE);
|
|
||||||
}
|
|
||||||
CompileVertexTranslator();
|
CompileVertexTranslator();
|
||||||
|
WriteProtect();
|
||||||
}
|
}
|
||||||
|
|
||||||
VertexLoader::~VertexLoader()
|
VertexLoader::~VertexLoader()
|
||||||
{
|
{
|
||||||
FreeMemoryPages(m_compiledCode, COMPILED_CODE_SIZE);
|
FreeCodeSpace();
|
||||||
delete m_NativeFmt;
|
delete m_NativeFmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,13 +142,14 @@ void VertexLoader::CompileVertexTranslator()
|
|||||||
{
|
{
|
||||||
m_VertexSize = 0;
|
m_VertexSize = 0;
|
||||||
const TVtxAttr &vtx_attr = m_VtxAttr;
|
const TVtxAttr &vtx_attr = m_VtxAttr;
|
||||||
//const TVtxDesc &vtx_desc = m_VtxDesc;
|
|
||||||
|
|
||||||
#ifdef USE_JIT
|
#ifdef USE_JIT
|
||||||
u8 *old_code_ptr = GetWritableCodePtr();
|
if (m_compiledCode)
|
||||||
SetCodePtr(m_compiledCode);
|
PanicAlert("trying to recompile a vtx translator");
|
||||||
|
|
||||||
|
m_compiledCode = GetCodePtr();
|
||||||
ABI_EmitPrologue(4);
|
ABI_EmitPrologue(4);
|
||||||
// MOV(32, R(EBX), M(&loop_counter));
|
|
||||||
// Start loop here
|
// Start loop here
|
||||||
const u8 *loop_start = GetCodePtr();
|
const u8 *loop_start = GetCodePtr();
|
||||||
|
|
||||||
@ -477,7 +477,6 @@ void VertexLoader::CompileVertexTranslator()
|
|||||||
//SUB(32, R(EBX), Imm8(1));
|
//SUB(32, R(EBX), Imm8(1));
|
||||||
J_CC(CC_NZ, loop_start, true);
|
J_CC(CC_NZ, loop_start, true);
|
||||||
ABI_EmitEpilogue(4);
|
ABI_EmitEpilogue(4);
|
||||||
SetCodePtr(old_code_ptr);
|
|
||||||
#endif
|
#endif
|
||||||
m_NativeFmt->Initialize(vtx_decl);
|
m_NativeFmt->Initialize(vtx_decl);
|
||||||
}
|
}
|
||||||
|
@ -22,9 +22,10 @@
|
|||||||
|
|
||||||
#include "CPMemory.h"
|
#include "CPMemory.h"
|
||||||
#include "DataReader.h"
|
#include "DataReader.h"
|
||||||
|
|
||||||
#include "NativeVertexFormat.h"
|
#include "NativeVertexFormat.h"
|
||||||
|
|
||||||
|
#include "x64Emitter.h"
|
||||||
|
|
||||||
class VertexLoaderUID
|
class VertexLoaderUID
|
||||||
{
|
{
|
||||||
u32 vid[5];
|
u32 vid[5];
|
||||||
@ -52,7 +53,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class VertexLoader
|
class VertexLoader : public Gen::XCodeBlock
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
||||||
@ -86,7 +87,7 @@ private:
|
|||||||
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
|
TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
|
||||||
int m_numPipelineStages;
|
int m_numPipelineStages;
|
||||||
|
|
||||||
u8 *m_compiledCode;
|
const u8 *m_compiledCode;
|
||||||
|
|
||||||
int m_numLoadedVertices;
|
int m_numLoadedVertices;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user