mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-02-10 06:29:00 +01:00
Fix XFB converting, silly mistake. Disable Texture converting ATM since it fails. And if can't read work group size, fall to 64, which is what my 8600GTS does. Tested XFB in Melee intro, got around 10FPS faster tPS faster then with CPU side
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4371 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
dde693afb8
commit
059970e971
@ -134,6 +134,7 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
||||
default:
|
||||
return PC_TEX_FMT_NONE;
|
||||
}*/
|
||||
return PC_TEX_FMT_NONE;
|
||||
switch(texformat)
|
||||
{
|
||||
case GX_TF_I8:
|
||||
@ -143,13 +144,13 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
||||
printf("width %d, height %d\n", width, height);
|
||||
// Create the input and output arrays in device memory for our calculation
|
||||
//
|
||||
cl_mem _dst = clCreateBuffer(OpenCL::g_context, CL_MEM_WRITE_ONLY, TexDecoder_GetTextureSizeInBytes(width, height, texformat), NULL, NULL);
|
||||
cl_mem _dst = clCreateBuffer(OpenCL::g_context, CL_MEM_WRITE_ONLY, sizeof(unsigned char) * width * height, NULL, NULL);
|
||||
if (!dst)
|
||||
{
|
||||
printf("Error: Failed to allocate device memory!\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem _src = clCreateBuffer(OpenCL::g_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, TexDecoder_GetTextureSizeInBytes(width, height, texformat), (void*)src, NULL);
|
||||
cl_mem _src = clCreateBuffer(OpenCL::g_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(unsigned char) * width * height, (void*)src, NULL);
|
||||
if (!src)
|
||||
{
|
||||
printf("Error: Failed to allocate device memory!\n");
|
||||
@ -174,8 +175,9 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
||||
exit(1);
|
||||
local = 64;
|
||||
}
|
||||
|
||||
// Execute the kernel over the entire range of our 1d input data set
|
||||
// using the maximum number of work group items for this device
|
||||
//
|
||||
@ -193,7 +195,7 @@ PC_TexFormat TexDecoder_Decode_OpenCL(u8 *dst, const u8 *src, int width, int hei
|
||||
|
||||
// Read back the results from the device to verify the output
|
||||
//
|
||||
err = clEnqueueReadBuffer( OpenCL::g_cmdq, _dst, CL_TRUE, 0, TexDecoder_GetTextureSizeInBytes(width, height, texformat), dst, 0, NULL, NULL );
|
||||
err = clEnqueueReadBuffer( OpenCL::g_cmdq, _dst, CL_TRUE, 0, sizeof(unsigned char) * width * height, dst, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Failed to read output array! %d\n", err);
|
||||
|
@ -87,15 +87,14 @@ const char *__ConvertToXFB = "__kernel void ConvertToXFB(__global unsigned int
|
||||
{ \n \
|
||||
const unsigned char *src = _pEFB;\n \
|
||||
int id = get_global_id(0);\n \
|
||||
src += id * 8; \n \
|
||||
int srcOffset = id * 8; \n \
|
||||
\n \
|
||||
int y1 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16; \n \
|
||||
int u1 = ((-(9699 * src[0]) - (19071 * src[1]) + (28770 * src[2])) >> 16) + 128;\n \
|
||||
src += 4;\n \
|
||||
int y1 = (((16843 * src[srcOffset]) + (33030 * src[srcOffset + 1]) + (6423 * src[srcOffset + 2])) >> 16) + 16; \n \
|
||||
int u1 = ((-(9699 * src[srcOffset]) - (19071 * src[srcOffset + 1]) + (28770 * src[srcOffset + 2])) >> 16) + 128;\n \
|
||||
srcOffset += 4;\n \
|
||||
\n \
|
||||
int y2 = (((16843 * src[0]) + (33030 * src[1]) + (6423 * src[2])) >> 16) + 16;\n \
|
||||
int v2 = (((28770 * src[0]) - (24117 * src[1]) - (4653 * src[2])) >> 16) + 128;\n \
|
||||
src += 4;\n \
|
||||
int y2 = (((16843 * src[srcOffset]) + (33030 * src[srcOffset + 1]) + (6423 * src[srcOffset + 2])) >> 16) + 16;\n \
|
||||
int v2 = (((28770 * src[srcOffset]) - (24117 * src[srcOffset + 1]) - (4653 * src[srcOffset + 2])) >> 16) + 128;\n \
|
||||
\n \
|
||||
dst[id] = (v2 << 24) | (y2 << 16) | (u1 << 8) | (y1); \n \
|
||||
} \n ";
|
||||
@ -178,8 +177,9 @@ void ConvertFromXFB(u32 *dst, const u8* _pXFB, int width, int height)
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
||||
exit(1);
|
||||
local = 32;
|
||||
}
|
||||
|
||||
// Execute the kernel over the entire range of our 1d input data set
|
||||
// using the maximum number of work group items for this device
|
||||
//
|
||||
@ -283,8 +283,9 @@ void ConvertToXFB(u32 *dst, const u8* _pEFB, int width, int height)
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Failed to retrieve kernel work group info! %d\n", err);
|
||||
exit(1);
|
||||
local = 64;
|
||||
}
|
||||
|
||||
// Execute the kernel over the entire range of our 1d input data set
|
||||
// using the maximum number of work group items for this device
|
||||
//
|
||||
|
Loading…
x
Reference in New Issue
Block a user