mirror of
https://github.com/dborth/vbagx.git
synced 2025-01-14 19:49:06 +01:00
582 lines
16 KiB
C++
582 lines
16 KiB
C++
// VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
|
|
// Copyright (C) 1999-2003 Forgotten
|
|
// Copyright (C) 2004 Forgotten and the VBA development team
|
|
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2, or(at your option)
|
|
// any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software Foundation,
|
|
// Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
#include "System.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifdef MMX
|
|
extern "C" bool cpu_mmx;
|
|
#endif
|
|
|
|
/*
|
|
* Thanks to Kawaks' Mr. K for the code
|
|
|
|
Incorporated into vba by Anthony Di Franco
|
|
*/
|
|
|
|
static u8 *frm1 = NULL;
|
|
static u8 *frm2 = NULL;
|
|
static u8 *frm3 = NULL;
|
|
|
|
extern int RGB_LOW_BITS_MASK;
|
|
extern u32 qRGB_COLOR_MASK[2];
|
|
|
|
static void Init()
|
|
{
|
|
frm1 = (u8 *)calloc(322*242,4);
|
|
// 1 frame ago
|
|
frm2 = (u8 *)calloc(322*242,4);
|
|
// 2 frames ago
|
|
frm3 = (u8 *)calloc(322*242,4);
|
|
// 3 frames ago
|
|
}
|
|
|
|
void InterframeCleanup()
|
|
{
|
|
if(frm1)
|
|
free(frm1);
|
|
if(frm2)
|
|
free(frm2);
|
|
if(frm3)
|
|
free(frm3);
|
|
frm1 = frm2 = frm3 = NULL;
|
|
}
|
|
|
|
#ifdef MMX
|
|
static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
u16 *src0 = (u16 *)srcPtr;
|
|
u16 *src1 = (u16 *)frm1;
|
|
u16 *src2 = (u16 *)frm2;
|
|
u16 *src3 = (u16 *)frm3;
|
|
|
|
int count = width >> 2;
|
|
|
|
for(int i = 0; i < height; i++) {
|
|
#ifdef __GNUC__
|
|
asm volatile (
|
|
"push %4\n"
|
|
"movq 0(%5), %%mm7\n" // colorMask
|
|
"0:\n"
|
|
"movq 0(%0), %%mm0\n" // src0
|
|
"movq 0(%1), %%mm1\n" // src1
|
|
"movq 0(%2), %%mm2\n" // src2
|
|
"movq 0(%3), %%mm3\n" // src3
|
|
"movq %%mm0, 0(%3)\n" // src3 = src0
|
|
"movq %%mm0, %%mm4\n"
|
|
"movq %%mm1, %%mm5\n"
|
|
"pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
|
|
"pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
|
|
"por %%mm5, %%mm4\n" // A | B
|
|
"movq %%mm2, %%mm5\n"
|
|
"pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
|
|
"pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
|
|
"por %%mm3, %%mm5\n" // C|D
|
|
"pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
|
|
"movq %%mm0, %%mm2\n"
|
|
"pand %%mm7, %%mm2\n" // color & colorMask
|
|
"pand %%mm7, %%mm1\n" // src1 & colorMask
|
|
"psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
|
|
"psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
|
"paddw %%mm2, %%mm1\n" // E+F
|
|
"pand %%mm4, %%mm1\n" // (E+F) & res
|
|
"pandn %%mm0, %%mm4\n" // color& !res
|
|
|
|
"por %%mm1, %%mm4\n"
|
|
"movq %%mm4, 0(%0)\n" // src0 = res
|
|
|
|
"addl $8, %0\n"
|
|
"addl $8, %1\n"
|
|
"addl $8, %2\n"
|
|
"addl $8, %3\n"
|
|
|
|
"decl %4\n"
|
|
"jnz 0b\n"
|
|
"pop %4\n"
|
|
"emms\n"
|
|
: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
|
|
: "r" (count), "r" (qRGB_COLOR_MASK)
|
|
);
|
|
#else
|
|
__asm {
|
|
movq mm7, qword ptr [qRGB_COLOR_MASK];
|
|
mov eax, src0;
|
|
mov ebx, src1;
|
|
mov ecx, src2;
|
|
mov edx, src3;
|
|
mov edi, count;
|
|
label0:
|
|
movq mm0, qword ptr [eax]; // src0
|
|
movq mm1, qword ptr [ebx]; // src1
|
|
movq mm2, qword ptr [ecx]; // src2
|
|
movq mm3, qword ptr [edx]; // src3
|
|
movq qword ptr [edx], mm0; // src3 = src0
|
|
movq mm4, mm0;
|
|
movq mm5, mm1;
|
|
pcmpeqw mm5, mm2; // src1 == src2 (A)
|
|
pcmpeqw mm4, mm3; // src3 == src0 (B)
|
|
por mm4, mm5; // A | B
|
|
movq mm5, mm2;
|
|
pcmpeqw mm5, mm0; // src0 == src2 (C)
|
|
pcmpeqw mm3, mm1; // src1 == src3 (D)
|
|
por mm5, mm3; // C|D
|
|
pandn mm4, mm5; // (!(A|B))&(C|D)
|
|
movq mm2, mm0;
|
|
pand mm2, mm7; // color & colorMask
|
|
pand mm1, mm7; // src1 & colorMask
|
|
psrlw mm2, 1; // (color & colorMask) >> 1 (E)
|
|
psrlw mm1, 1; // (src & colorMask) >> 1 (F)
|
|
paddw mm1, mm2; // E+F
|
|
pand mm1, mm4; // (E+F) & res
|
|
pandn mm4, mm0; // color & !res
|
|
|
|
por mm4, mm1;
|
|
movq qword ptr [eax], mm4; // src0 = res
|
|
|
|
add eax, 8;
|
|
add ebx, 8;
|
|
add ecx, 8;
|
|
add edx, 8;
|
|
|
|
dec edi;
|
|
jnz label0;
|
|
mov src0, eax;
|
|
mov src1, ebx;
|
|
mov src2, ecx;
|
|
mov src3, edx;
|
|
emms;
|
|
}
|
|
#endif
|
|
src0+=2;
|
|
src1+=2;
|
|
src2+=2;
|
|
src3+=2;
|
|
}
|
|
|
|
/* Swap buffers around */
|
|
u8 *temp = frm1;
|
|
frm1 = frm3;
|
|
frm3 = frm2;
|
|
frm2 = temp;
|
|
}
|
|
#endif
|
|
|
|
void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
if(frm1 == NULL) {
|
|
Init();
|
|
}
|
|
#ifdef MMX
|
|
if(cpu_mmx) {
|
|
SmartIB_MMX(srcPtr, srcPitch, width, height);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
u16 colorMask = ~RGB_LOW_BITS_MASK;
|
|
|
|
u16 *src0 = (u16 *)srcPtr;
|
|
u16 *src1 = (u16 *)frm1;
|
|
u16 *src2 = (u16 *)frm2;
|
|
u16 *src3 = (u16 *)frm3;
|
|
|
|
int sPitch = srcPitch >> 1;
|
|
|
|
int pos = 0;
|
|
for (int j = 0; j < height; j++)
|
|
for (int i = 0; i < sPitch; i++) {
|
|
u16 color = src0[pos];
|
|
src0[pos] =
|
|
(src1[pos] != src2[pos]) &&
|
|
(src3[pos] != color) &&
|
|
((color == src2[pos]) || (src1[pos] == src3[pos]))
|
|
? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
|
|
color;
|
|
src3[pos] = color; /* oldest buffer now holds newest frame */
|
|
pos++;
|
|
}
|
|
|
|
/* Swap buffers around */
|
|
u8 *temp = frm1;
|
|
frm1 = frm3;
|
|
frm3 = frm2;
|
|
frm2 = temp;
|
|
}
|
|
|
|
#ifdef MMX
|
|
static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
u32 *src0 = (u32 *)srcPtr;
|
|
u32 *src1 = (u32 *)frm1;
|
|
u32 *src2 = (u32 *)frm2;
|
|
u32 *src3 = (u32 *)frm3;
|
|
|
|
int count = width >> 1;
|
|
|
|
for(int i = 0; i < height; i++) {
|
|
#ifdef __GNUC__
|
|
asm volatile (
|
|
"push %4\n"
|
|
"movq 0(%5), %%mm7\n" // colorMask
|
|
"0:\n"
|
|
"movq 0(%0), %%mm0\n" // src0
|
|
"movq 0(%1), %%mm1\n" // src1
|
|
"movq 0(%2), %%mm2\n" // src2
|
|
"movq 0(%3), %%mm3\n" // src3
|
|
"movq %%mm0, 0(%3)\n" // src3 = src0
|
|
"movq %%mm0, %%mm4\n"
|
|
"movq %%mm1, %%mm5\n"
|
|
"pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
|
|
"pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
|
|
"por %%mm5, %%mm4\n" // A | B
|
|
"movq %%mm2, %%mm5\n"
|
|
"pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
|
|
"pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
|
|
"por %%mm3, %%mm5\n" // C|D
|
|
"pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
|
|
"movq %%mm0, %%mm2\n"
|
|
"pand %%mm7, %%mm2\n" // color & colorMask
|
|
"pand %%mm7, %%mm1\n" // src1 & colorMask
|
|
"psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
|
|
"psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
|
"paddd %%mm2, %%mm1\n" // E+F
|
|
"pand %%mm4, %%mm1\n" // (E+F) & res
|
|
"pandn %%mm0, %%mm4\n" // color& !res
|
|
|
|
"por %%mm1, %%mm4\n"
|
|
"movq %%mm4, 0(%0)\n" // src0 = res
|
|
|
|
"addl $8, %0\n"
|
|
"addl $8, %1\n"
|
|
"addl $8, %2\n"
|
|
"addl $8, %3\n"
|
|
|
|
"decl %4\n"
|
|
"jnz 0b\n"
|
|
"pop %4\n"
|
|
"emms\n"
|
|
: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
|
|
: "r" (count), "r" (qRGB_COLOR_MASK)
|
|
);
|
|
#else
|
|
__asm {
|
|
movq mm7, qword ptr [qRGB_COLOR_MASK];
|
|
mov eax, src0;
|
|
mov ebx, src1;
|
|
mov ecx, src2;
|
|
mov edx, src3;
|
|
mov edi, count;
|
|
label0:
|
|
movq mm0, qword ptr [eax]; // src0
|
|
movq mm1, qword ptr [ebx]; // src1
|
|
movq mm2, qword ptr [ecx]; // src2
|
|
movq mm3, qword ptr [edx]; // src3
|
|
movq qword ptr [edx], mm0; // src3 = src0
|
|
movq mm4, mm0;
|
|
movq mm5, mm1;
|
|
pcmpeqd mm5, mm2; // src1 == src2 (A)
|
|
pcmpeqd mm4, mm3; // src3 == src0 (B)
|
|
por mm4, mm5; // A | B
|
|
movq mm5, mm2;
|
|
pcmpeqd mm5, mm0; // src0 == src2 (C)
|
|
pcmpeqd mm3, mm1; // src1 == src3 (D)
|
|
por mm5, mm3; // C|D
|
|
pandn mm4, mm5; // (!(A|B))&(C|D)
|
|
movq mm2, mm0;
|
|
pand mm2, mm7; // color & colorMask
|
|
pand mm1, mm7; // src1 & colorMask
|
|
psrld mm2, 1; // (color & colorMask) >> 1 (E)
|
|
psrld mm1, 1; // (src & colorMask) >> 1 (F)
|
|
paddd mm1, mm2; // E+F
|
|
pand mm1, mm4; // (E+F) & res
|
|
pandn mm4, mm0; // color & !res
|
|
|
|
por mm4, mm1;
|
|
movq qword ptr [eax], mm4; // src0 = res
|
|
|
|
add eax, 8;
|
|
add ebx, 8;
|
|
add ecx, 8;
|
|
add edx, 8;
|
|
|
|
dec edi;
|
|
jnz label0;
|
|
mov src0, eax;
|
|
mov src1, ebx;
|
|
mov src2, ecx;
|
|
mov src3, edx;
|
|
emms;
|
|
}
|
|
#endif
|
|
|
|
src0++;
|
|
src1++;
|
|
src2++;
|
|
src3++;
|
|
}
|
|
/* Swap buffers around */
|
|
u8 *temp = frm1;
|
|
frm1 = frm3;
|
|
frm3 = frm2;
|
|
frm2 = temp;
|
|
}
|
|
#endif
|
|
|
|
void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
if(frm1 == NULL) {
|
|
Init();
|
|
}
|
|
#ifdef MMX
|
|
if(cpu_mmx) {
|
|
SmartIB32_MMX(srcPtr, srcPitch, width, height);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
u32 *src0 = (u32 *)srcPtr;
|
|
u32 *src1 = (u32 *)frm1;
|
|
u32 *src2 = (u32 *)frm2;
|
|
u32 *src3 = (u32 *)frm3;
|
|
|
|
u32 colorMask = 0xfefefe;
|
|
|
|
int sPitch = srcPitch >> 2;
|
|
int pos = 0;
|
|
|
|
for (int j = 0; j < height; j++)
|
|
for (int i = 0; i < sPitch; i++) {
|
|
u32 color = src0[pos];
|
|
src0[pos] =
|
|
(src1[pos] != src2[pos]) &&
|
|
(src3[pos] != color) &&
|
|
((color == src2[pos]) || (src1[pos] == src3[pos]))
|
|
? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
|
|
color;
|
|
src3[pos] = color; /* oldest buffer now holds newest frame */
|
|
pos++;
|
|
}
|
|
|
|
/* Swap buffers around */
|
|
u8 *temp = frm1;
|
|
frm1 = frm3;
|
|
frm3 = frm2;
|
|
frm2 = temp;
|
|
}
|
|
|
|
#ifdef MMX
|
|
static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
u16 *src0 = (u16 *)srcPtr;
|
|
u16 *src1 = (u16 *)frm1;
|
|
|
|
int count = width >> 2;
|
|
|
|
for(int i = 0; i < height; i++) {
|
|
#ifdef __GNUC__
|
|
asm volatile (
|
|
"push %2\n"
|
|
"movq 0(%3), %%mm7\n" // colorMask
|
|
"0:\n"
|
|
"movq 0(%0), %%mm0\n" // src0
|
|
"movq 0(%1), %%mm1\n" // src1
|
|
"movq %%mm0, 0(%1)\n" // src1 = src0
|
|
"pand %%mm7, %%mm0\n" // color & colorMask
|
|
"pand %%mm7, %%mm1\n" // src1 & colorMask
|
|
"psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
|
|
"psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
|
"paddw %%mm1, %%mm0\n" // E+F
|
|
|
|
"movq %%mm0, 0(%0)\n" // src0 = res
|
|
|
|
"addl $8, %0\n"
|
|
"addl $8, %1\n"
|
|
|
|
"decl %2\n"
|
|
"jnz 0b\n"
|
|
"pop %2\n"
|
|
"emms\n"
|
|
: "+r" (src0), "+r" (src1)
|
|
: "r" (count), "r" (qRGB_COLOR_MASK)
|
|
);
|
|
#else
|
|
__asm {
|
|
movq mm7, qword ptr [qRGB_COLOR_MASK];
|
|
mov eax, src0;
|
|
mov ebx, src1;
|
|
mov edi, count;
|
|
label0:
|
|
movq mm0, qword ptr [eax]; // src0
|
|
movq mm1, qword ptr [ebx]; // src1
|
|
movq qword ptr [ebx], mm0; // src1 = src0
|
|
pand mm0, mm7; // color & colorMask
|
|
pand mm1, mm7; // src1 & colorMask
|
|
psrlw mm0, 1; // (color & colorMask) >> 1 (E)
|
|
psrlw mm1, 1; // (src & colorMask) >> 1 (F)
|
|
paddw mm0, mm1; // E+F
|
|
|
|
movq qword ptr [eax], mm0; // src0 = res
|
|
|
|
add eax, 8;
|
|
add ebx, 8;
|
|
|
|
dec edi;
|
|
jnz label0;
|
|
mov src0, eax;
|
|
mov src1, ebx;
|
|
emms;
|
|
}
|
|
#endif
|
|
src0+=2;
|
|
src1+=2;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
if(frm1 == NULL) {
|
|
Init();
|
|
}
|
|
|
|
#ifdef MMX
|
|
if(cpu_mmx) {
|
|
MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
u16 colorMask = ~RGB_LOW_BITS_MASK;
|
|
|
|
u16 *src0 = (u16 *)srcPtr;
|
|
u16 *src1 = (u16 *)frm1;
|
|
|
|
int sPitch = srcPitch >> 1;
|
|
|
|
int pos = 0;
|
|
for (int j = 0; j < height; j++)
|
|
for (int i = 0; i < sPitch; i++) {
|
|
u16 color = src0[pos];
|
|
src0[pos] =
|
|
(((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
|
|
src1[pos] = color;
|
|
pos++;
|
|
}
|
|
}
|
|
|
|
#ifdef MMX
|
|
static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
u32 *src0 = (u32 *)srcPtr;
|
|
u32 *src1 = (u32 *)frm1;
|
|
|
|
int count = width >> 1;
|
|
|
|
for(int i = 0; i < height; i++) {
|
|
#ifdef __GNUC__
|
|
asm volatile (
|
|
"push %2\n"
|
|
"movq 0(%3), %%mm7\n" // colorMask
|
|
"0:\n"
|
|
"movq 0(%0), %%mm0\n" // src0
|
|
"movq 0(%1), %%mm1\n" // src1
|
|
"movq %%mm0, 0(%1)\n" // src1 = src0
|
|
"pand %%mm7, %%mm0\n" // color & colorMask
|
|
"pand %%mm7, %%mm1\n" // src1 & colorMask
|
|
"psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
|
|
"psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
|
|
"paddd %%mm1, %%mm0\n" // E+F
|
|
|
|
"movq %%mm0, 0(%0)\n" // src0 = res
|
|
|
|
"addl $8, %0\n"
|
|
"addl $8, %1\n"
|
|
|
|
"decl %2\n"
|
|
"jnz 0b\n"
|
|
"pop %2\n"
|
|
"emms\n"
|
|
: "+r" (src0), "+r" (src1)
|
|
: "r" (count), "r" (qRGB_COLOR_MASK)
|
|
);
|
|
#else
|
|
__asm {
|
|
movq mm7, qword ptr [qRGB_COLOR_MASK];
|
|
mov eax, src0;
|
|
mov ebx, src1;
|
|
mov edi, count;
|
|
label0:
|
|
movq mm0, qword ptr [eax]; // src0
|
|
movq mm1, qword ptr [ebx]; // src1
|
|
movq qword ptr [ebx], mm0; // src1 = src0
|
|
pand mm0, mm7; // color & colorMask
|
|
pand mm1, mm7; // src1 & colorMask
|
|
psrld mm0, 1; // (color & colorMask) >> 1 (E)
|
|
psrld mm1, 1; // (src & colorMask) >> 1 (F)
|
|
paddd mm0, mm1; // E+F
|
|
|
|
movq qword ptr [eax], mm0; // src0 = res
|
|
|
|
add eax, 8;
|
|
add ebx, 8;
|
|
|
|
dec edi;
|
|
jnz label0;
|
|
mov src0, eax;
|
|
mov src1, ebx;
|
|
emms;
|
|
}
|
|
#endif
|
|
src0++;
|
|
src1++;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
|
|
{
|
|
if(frm1 == NULL) {
|
|
Init();
|
|
}
|
|
|
|
#ifdef MMX
|
|
if(cpu_mmx) {
|
|
MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
u32 *src0 = (u32 *)srcPtr;
|
|
u32 *src1 = (u32 *)frm1;
|
|
|
|
u32 colorMask = 0xfefefe;
|
|
|
|
int sPitch = srcPitch >> 2;
|
|
int pos = 0;
|
|
|
|
for (int j = 0; j < height; j++)
|
|
for (int i = 0; i < sPitch; i++) {
|
|
u32 color = src0[pos];
|
|
src0[pos] = (((color & colorMask) >> 1) +
|
|
((src1[pos] & colorMask) >> 1));
|
|
src1[pos] = color;
|
|
pos++;
|
|
}
|
|
}
|