WiiFlow_Lite/source/gui/texture.cpp
fix94.1 e8c66b7278 -added jpg file support for game covers and themes, if no .png
is found, it will just replace the filename extension with .jpg
and try it again, the jpg file width and height need to be
divisable with 4, otherwise wiiflow will just ignore the cover
2012-07-22 22:10:17 +00:00

622 lines
18 KiB
C++

#include <string.h>
#include <stdio.h>
#include <ogcsys.h>
#include "texture.hpp"
#include "pngu.h"
#include "gcvid.h"
using namespace std;
static u32 upperPower(u32 width)
{
u32 i = 8;
u32 maxWidth = 1090;
while (i < width && i < maxWidth)
i <<= 1;
return i;
}
u32 fixGX_GetTexBufferSize(u16 wd, u16 ht, u32 fmt, u8 mipmap, u8 maxlod)
{
if (mipmap) return GX_GetTexBufferSize(wd, ht, fmt, mipmap, maxlod + 1);
return GX_GetTexBufferSize(wd, ht, fmt, mipmap, maxlod);
}
static inline u32 coordsRGBA8(u32 x, u32 y, u32 w)
{
return ((((y >> 2) * (w >> 2) + (x >> 2)) << 5) + ((y & 3) << 2) + (x & 3)) << 1;
}
static inline u32 coordsRGB565(u32 x, u32 y, u32 w)
{
return (((y >> 2) * (w >> 2) + (x >> 2)) << 4) + ((y & 3) << 2) + (x & 3);
}
static inline void _convertToFlippedRGBA(u8 *dst, const u8 *src, u32 width, u32 height)
{
for(u32 block = 0; block < height; block += 4)
{
for(u32 i = 0; i < width; i += 4)
{
for(u32 c = 0; c < 4; ++c)
{
for(u32 rgb = 0; rgb < 4; ++rgb)
{
u32 y = height - 1 - (c + block);
u32 x = rgb + i;
u32 dst_offset = (x + y * width) * 4;
u32 src_offset = (x + ((block + c) * width)) * 3;
/* RGB */
memcpy(&dst[dst_offset], &src[src_offset], 3);
/* Alpha */
dst[dst_offset + 3] = 0xFF;
}
}
}
}
}
static inline void _convertToRGBA8(u8 *dst, const u8 *src, u32 width, u32 height)
{
for (u32 y = 0; y < height; ++y)
for (u32 x = 0; x < width; ++x)
{
u32 i = (x + y * width) * 4;
dst[coordsRGBA8(x, y, width) + 1] = src[i];
dst[coordsRGBA8(x, y, width) + 32] = src[i + 1];
dst[coordsRGBA8(x, y, width) + 33] = src[i + 2];
dst[coordsRGBA8(x, y, width)] = src[i + 3];
}
}
static inline void _convertToRGB565(u8 *dst, const u8 *src, u32 width, u32 height)
{
u16 *dst16 = (u16 *)dst;
for (u32 y = 0; y < height; ++y)
for (u32 x = 0; x < width; ++x)
{
u32 i = (x + y * width) * 4;
dst16[coordsRGB565(x, y, width)] = ((src[i] & 0xF8) << 8) | ((src[i + 1] & 0xFC) << 3) | (src[i + 2] >> 3);
}
}
static inline u16 rgb8ToRGB565(u8 *color)
{
return ((color[0] >> 3) << 11) | ((color[1] >> 2) << 5) | (color[2] >> 3);
}
static int colorDistance(const u8 *c0, const u8 *c1)
{
return (c1[0] - c0[0]) * (c1[0] - c0[0]) + (c1[1] - c0[1]) * (c1[1] - c0[1]) + (c1[2] - c0[2]) * (c1[2] - c0[2]);
}
static void getBaseColors(u8 *color0, u8 *color1, const u8 *srcBlock)
{
int maxDistance = -1;
for (int i = 0; i < 15; ++i)
for (int j = i + 1; j < 16; ++j)
{
int distance = colorDistance(srcBlock + i * 4, srcBlock + j * 4);
if (distance > maxDistance)
{
maxDistance = distance;
*(u32 *)color0 = ((u32 *)srcBlock)[i];
*(u32 *)color1 = ((u32 *)srcBlock)[j];
}
}
if (rgb8ToRGB565(color0) < rgb8ToRGB565(color1))
{
u32 tmp;
tmp = *(u32 *)color0;
*(u32 *)color0 = *(u32 *)color1;
*(u32 *)color1 = tmp;
}
}
static u32 colorIndices(const u8 *color0, const u8 *color1, const u8 *srcBlock)
{
u16 colors[4][4];
u32 res = 0;
// Make the 4 colors available in the block
colors[0][0] = (color0[0] & 0xF8) | (color0[0] >> 5);
colors[0][1] = (color0[1] & 0xFC) | (color0[1] >> 6);
colors[0][2] = (color0[2] & 0xF8) | (color0[2] >> 5);
colors[1][0] = (color1[0] & 0xF8) | (color1[0] >> 5);
colors[1][1] = (color1[1] & 0xFC) | (color1[1] >> 6);
colors[1][2] = (color1[2] & 0xF8) | (color1[2] >> 5);
colors[2][0] = (2 * colors[0][0] + 1 * colors[1][0]) / 3;
colors[2][1] = (2 * colors[0][1] + 1 * colors[1][1]) / 3;
colors[2][2] = (2 * colors[0][2] + 1 * colors[1][2]) / 3;
colors[3][0] = (1 * colors[0][0] + 2 * colors[1][0]) / 3;
colors[3][1] = (1 * colors[0][1] + 2 * colors[1][1]) / 3;
colors[3][2] = (1 * colors[0][2] + 2 * colors[1][2]) / 3;
for (int i = 15; i >= 0; --i)
{
int c0 = srcBlock[i * 4 + 0];
int c1 = srcBlock[i * 4 + 1];
int c2 = srcBlock[i * 4 + 2];
int d0 = abs(colors[0][0] - c0) + abs(colors[0][1] - c1) + abs(colors[0][2] - c2);
int d1 = abs(colors[1][0] - c0) + abs(colors[1][1] - c1) + abs(colors[1][2] - c2);
int d2 = abs(colors[2][0] - c0) + abs(colors[2][1] - c1) + abs(colors[2][2] - c2);
int d3 = abs(colors[3][0] - c0) + abs(colors[3][1] - c1) + abs(colors[3][2] - c2);
int b0 = d0 > d3;
int b1 = d1 > d2;
int b2 = d0 > d2;
int b3 = d1 > d3;
int b4 = d2 > d3;
int x0 = b1 & b2;
int x1 = b0 & b3;
int x2 = b0 & b4;
res |= (x2 | ((x0 | x1) << 1)) << ((15 - i) << 1);
}
return res;
}
static inline void _convertToCMPR(u8 *dst, const u8 *src, u32 width, u32 height)
{
u8 srcBlock[16 * 4];
u8 color0[4];
u8 color1[4];
for (u32 jj = 0; jj < height; jj += 8)
for (u32 ii = 0; ii < width; ii += 8)
for (u32 k = 0; k < 4; ++k)
{
int i = ii + ((k & 1) << 2);
int j = jj + ((k >> 1) << 2);
memcpy(srcBlock, src + (j * width + i) * 4, 16);
memcpy(srcBlock + 4 * 4, src + ((j + 1) * width + i) * 4, 16);
memcpy(srcBlock + 8 * 4, src + ((j + 2) * width + i) * 4, 16);
memcpy(srcBlock + 12 * 4, src + ((j + 3) * width + i) * 4, 16);
getBaseColors(color0, color1, srcBlock);
*(u16 *)dst = rgb8ToRGB565(color0);
dst += 2;
*(u16 *)dst = rgb8ToRGB565(color1);
dst += 2;
*(u32 *)dst = colorIndices(color0, color1, srcBlock);
dst += 4;
}
}
STexture::TexErr STexture::fromImageFile(const char *filename, u8 f, Alloc alloc, u32 minMipSize, u32 maxMipSize)
{
FILE *file = fopen(filename, "rb");
if(file == NULL)
{
strncpy((char*)filename+(strlen(filename)-3), "jp", 2);
file = fopen(filename, "rb");
}
if(file == NULL)
return TE_ERROR;
fseek(file, 0, SEEK_END);
u32 fileSize = ftell(file);
fseek(file, 0, SEEK_SET);
u8 *Image = NULL;
if(fileSize)
{
Image = (u8*)MEM2_alloc(fileSize);
if(Image != NULL)
fread(Image, 1, fileSize, file);
}
fclose(file);
TexErr result = TE_NOMEM;
if(Image != NULL)
{
if(strstr(filename, ".png") != NULL)
result = fromPNG(Image, f, alloc, minMipSize, maxMipSize);
else
result = fromJPG(Image, fileSize, f, alloc, minMipSize, maxMipSize);
MEM2_free(Image);
}
return result;
}
STexture::TexErr STexture::fromRAW(const u8 *buffer, u32 w, u32 h, u8 f, Alloc alloc, u32 minMipSize, u32 maxMipSize)
{
// Convert our raw stuff to a usable format
SmartBuf rawData = smartMem2Alloc(w * h * 4);
if(rawData.get() == NULL)
return TE_NOMEM;
_convertToFlippedRGBA(rawData.get(), buffer, w, h);
//Let the real work begin
SmartBuf tmpData;
u8 maxLODTmp = 0;
u8 minLODTmp = 0;
u32 baseWidth;
u32 baseHeight;
width = w;
height = h;
switch(f)
{
case GX_TF_RGBA8:
case GX_TF_RGB565:
case GX_TF_CMPR:
break;
default:
f = GX_TF_RGBA8;
}
format = f;
if (minMipSize > 0 || maxMipSize > 0)
_calcMipMaps(maxLODTmp, minLODTmp, baseWidth, baseHeight, w, h, minMipSize, maxMipSize);
if (maxLODTmp > 0)
{
DCFlushRange(rawData.get(), width * height * 4);
rawData = _genMipMaps(rawData.get(), width, height, maxLODTmp, baseWidth, baseHeight);
if(!rawData)
return TE_NOMEM;
u32 newWidth = baseWidth;
u32 newHeight = baseHeight;
for(int i = 0; i < minLODTmp; ++i)
{
newWidth >>= 1;
newHeight >>= 1;
}
switch(alloc)
{
case ALLOC_MEM2:
tmpData = smartMem2Alloc(fixGX_GetTexBufferSize(newWidth, newHeight, f, GX_TRUE, maxLODTmp - minLODTmp));
break;
case ALLOC_MALLOC:
tmpData = smartMemAlign32(fixGX_GetTexBufferSize(newWidth, newHeight, f, GX_TRUE, maxLODTmp - minLODTmp));
break;
}
u32 nWidth = newWidth;
u32 nHeight = newHeight;
u8 *pSrc = rawData.get();
if (minLODTmp > 0)
pSrc += fixGX_GetTexBufferSize(baseWidth, baseHeight, GX_TF_RGBA8, minLODTmp > 1 ? GX_TRUE : GX_FALSE, minLODTmp - 1);
u8 *pDst = tmpData.get();
for (u8 i = minLODTmp; i <= maxLODTmp; ++i)
{
switch(f)
{
case GX_TF_RGBA8:
_convertToRGBA8(pDst, pSrc, nWidth, nHeight);
break;
case GX_TF_RGB565:
_convertToRGB565(pDst, pSrc, nWidth, nHeight);
break;
case GX_TF_CMPR:
_convertToCMPR(pDst, pSrc, nWidth, nHeight);
break;
}
pSrc += nWidth * nHeight * 4;
pDst += GX_GetTexBufferSize(nWidth, nHeight, f, GX_FALSE, 0);
nWidth >>= 1;
nHeight >>= 1;
}
maxLOD = maxLODTmp - minLODTmp;
width = newWidth;
height = newHeight;
data = tmpData;
DCFlushRange(data.get(), fixGX_GetTexBufferSize(width, height, format, maxLOD > 0 ? GX_TRUE : GX_FALSE, maxLOD));
}
else
{
switch(alloc)
{
case ALLOC_MEM2:
tmpData = smartMem2Alloc(GX_GetTexBufferSize(w, h, format, GX_FALSE, 0));
break;
case ALLOC_MALLOC:
tmpData = smartMemAlign32(GX_GetTexBufferSize(w, h, format, GX_FALSE, 0));
break;
}
if(tmpData.get() == NULL)
{
rawData.release();
return TE_NOMEM;
}
switch(f)
{
case GX_TF_RGBA8:
_convertToRGBA8(tmpData.get(), rawData.get(), width, height);
break;
case GX_TF_RGB565:
_convertToRGB565(tmpData.get(), rawData.get(), width, height);
break;
case GX_TF_CMPR:
_convertToCMPR(tmpData.get(), rawData.get(), width, height);
break;
}
data = tmpData;
DCFlushRange(data.get(), GX_GetTexBufferSize(width, height, format, GX_FALSE, 0));
}
rawData.release();
return TE_OK;
}
STexture::TexErr STexture::fromJPG(const u8 *buffer, const u32 buffer_size, u8 f, Alloc alloc, u32 minMipSize, u32 maxMipSize)
{
VideoFrame VideoF;
decodeRealJpeg(buffer, buffer_size, VideoF);
if(!VideoF.getData() || (VideoF.getWidth() % 4) != 0 || (VideoF.getHeight() % 4) != 0)
return TE_ERROR;
return fromRAW(VideoF.getData(), VideoF.getWidth(), VideoF.getHeight(), f, alloc, minMipSize, maxMipSize);
}
STexture::TexErr STexture::fromPNG(const u8 *buffer, u8 f, Alloc alloc, u32 minMipSize, u32 maxMipSize)
{
PNGUPROP imgProp;
SmartBuf tmpData;
u8 maxLODTmp = 0;
u8 minLODTmp = 0;
u32 baseWidth;
u32 baseHeight;
IMGCTX ctx = PNGU_SelectImageFromBuffer(buffer);
if (ctx == 0) return STexture::TE_ERROR;
if (PNGU_GetImageProperties(ctx, &imgProp) != PNGU_OK)
{
PNGU_ReleaseImageContext(ctx);
return STexture::TE_ERROR;
}
if (imgProp.imgWidth > 1090 || imgProp.imgHeight > 1090)
{
PNGU_ReleaseImageContext(ctx);
return STexture::TE_ERROR;
}
switch (f)
{
case GX_TF_RGBA8:
case GX_TF_RGB565:
case GX_TF_CMPR:
break;
default:
f = (imgProp.imgColorType == PNGU_COLOR_TYPE_GRAY_ALPHA || imgProp.imgColorType == PNGU_COLOR_TYPE_RGB_ALPHA) ? GX_TF_RGBA8 : GX_TF_RGB565;
}
u32 pngWidth = imgProp.imgWidth & (f == GX_TF_CMPR ? ~7u : ~3u);
u32 pngHeight = imgProp.imgHeight & (f == GX_TF_CMPR ? ~7u : ~3u);
if (minMipSize > 0 || maxMipSize > 0)
STexture::_calcMipMaps(maxLODTmp, minLODTmp, baseWidth, baseHeight, imgProp.imgWidth, imgProp.imgHeight, minMipSize, maxMipSize);
if (maxLODTmp > 0)
{
u32 newWidth = baseWidth;
u32 newHeight = baseHeight;
for (int i = 0; i < minLODTmp; ++i)
{
newWidth >>= 1;
newHeight >>= 1;
}
SmartBuf tmpData2 = smartMem2Alloc(imgProp.imgWidth * imgProp.imgHeight * 4);
switch (alloc)
{
case ALLOC_MEM2:
tmpData = smartMem2Alloc(fixGX_GetTexBufferSize(newWidth, newHeight, f, GX_TRUE, maxLODTmp - minLODTmp));
break;
case ALLOC_MALLOC:
tmpData = smartMemAlign32(fixGX_GetTexBufferSize(newWidth, newHeight, f, GX_TRUE, maxLODTmp - minLODTmp));
break;
}
if (!tmpData || !tmpData2)
{
tmpData.release();
tmpData2.release();
PNGU_ReleaseImageContext(ctx);
return STexture::TE_NOMEM;
}
PNGU_DecodeToRGBA8(ctx, imgProp.imgWidth, imgProp.imgHeight, tmpData2.get(), 0, 0xFF);
PNGU_ReleaseImageContext(ctx);
DCFlushRange(tmpData2.get(), imgProp.imgWidth * imgProp.imgHeight * 4);
tmpData2 = STexture::_genMipMaps(tmpData2.get(), imgProp.imgWidth, imgProp.imgHeight, maxLODTmp, baseWidth, baseHeight);
if (!tmpData2) return STexture::TE_NOMEM;
u32 nWidth = newWidth;
u32 nHeight = newHeight;
u8 *pSrc = tmpData2.get();
if (minLODTmp > 0)
pSrc += fixGX_GetTexBufferSize(baseWidth, baseHeight, GX_TF_RGBA8, minLODTmp > 1 ? GX_TRUE : GX_FALSE, minLODTmp - 1);
u8 *pDst = tmpData.get();
for (u8 i = minLODTmp; i <= maxLODTmp; ++i)
{
switch (f)
{
case GX_TF_RGBA8:
_convertToRGBA8(pDst, pSrc, nWidth, nHeight);
break;
case GX_TF_RGB565:
_convertToRGB565(pDst, pSrc, nWidth, nHeight);
break;
case GX_TF_CMPR:
_convertToCMPR(pDst, pSrc, nWidth, nHeight);
break;
}
pSrc += nWidth * nHeight * 4;
pDst += GX_GetTexBufferSize(nWidth, nHeight, f, GX_FALSE, 0);
nWidth >>= 1;
nHeight >>= 1;
}
maxLOD = maxLODTmp - minLODTmp;
data = tmpData;
format = f;
width = newWidth;
height = newHeight;
DCFlushRange(data.get(), fixGX_GetTexBufferSize(width, height, format, maxLOD > 0 ? GX_TRUE : GX_FALSE, maxLOD));
}
else
{
switch (alloc)
{
case ALLOC_MEM2:
tmpData = smartMem2Alloc(GX_GetTexBufferSize(pngWidth, pngHeight, f, GX_FALSE, maxLOD));
break;
case ALLOC_MALLOC:
tmpData = smartMemAlign32(GX_GetTexBufferSize(pngWidth, pngHeight, f, GX_FALSE, maxLOD));
break;
}
if (!tmpData)
{
PNGU_ReleaseImageContext(ctx);
return STexture::TE_NOMEM;
}
format = f;
width = pngWidth;
height = pngHeight;
maxLOD = 0;
data = tmpData;
switch (f)
{
case GX_TF_RGBA8:
PNGU_DecodeTo4x4RGBA8(ctx, imgProp.imgWidth, imgProp.imgHeight, data.get(), 0xFF);
break;
case GX_TF_RGB565:
PNGU_DecodeTo4x4RGB565(ctx, imgProp.imgWidth, imgProp.imgHeight, data.get());
break;
case GX_TF_CMPR:
PNGU_DecodeToCMPR(ctx, imgProp.imgWidth, imgProp.imgHeight, data.get());
break;
}
PNGU_ReleaseImageContext(ctx);
DCFlushRange(data.get(), GX_GetTexBufferSize(width, height, format, GX_FALSE, 0));
}
return STexture::TE_OK;
}
void STexture::_resize(u8 *dst, u32 dstWidth, u32 dstHeight, const u8 *src, u32 srcWidth, u32 srcHeight)
{
float wc = (float)srcWidth / (float)dstWidth;
float hc = (float)srcHeight / (float)dstHeight;
float ax1;
float ay1;
for (u32 y = 0; y < dstHeight; ++y)
{
for (u32 x = 0; x < dstWidth; ++x)
{
float xf = ((float)x + 0.5f) * wc - 0.5f;
float yf = ((float)y + 0.5f) * hc - 0.5f;
u32 x0 = (int)xf;
u32 y0 = (int)yf;
if (x0 >= srcWidth - 1)
{
x0 = srcWidth - 2;
ax1 = 1.f;
}
else
ax1 = xf - (float)x0;
float ax0 = 1.f - ax1;
if (y0 >= srcHeight - 1)
{
y0 = srcHeight - 2;
ay1 = 1.f;
}
else
ay1 = yf - (float)y0;
float ay0 = 1.f - ay1;
u8 *pdst = dst + (x + y * dstWidth) * 4;
const u8 *psrc0 = src + (x0 + y0 * srcWidth) * 4;
const u8 *psrc1 = psrc0 + srcWidth * 4;
for (int c = 0; c < 3; ++c)
pdst[c] = (u8)((((float)psrc0[c] * ax0) + ((float)psrc0[4 + c] * ax1)) * ay0 + (((float)psrc1[c] * ax0) + ((float)psrc1[4 + c] * ax1)) * ay1 + 0.5f);
pdst[3] = 0xFF; // Alpha not handled, it would require using it in the weights for color channels, easy but slower and useless so far.
}
}
}
// For powers of two
void STexture::_resizeD2x2(u8 *dst, const u8 *src, u32 srcWidth, u32 srcHeight)
{
#if 0
u32 *dst32 = (u32 *)dst;
const u32 *src32 = (const u32 *)src;
u32 i = 0, i0 = 0, i1 = 1, i2 = srcWidth, i3 = srcWidth + 1, dstWidth = srcWidth >> 1, dstHeight = srcHeight >> 1;
for (u32 y = 0; y < dstHeight; ++y)
{
for (u32 x = 0; x < dstWidth; ++x)
{
dst32[i] = (((src32[i0] & 0xFCFCFCFC) >> 2)
+ ((src32[i1] & 0xFCFCFCFC) >> 2)
+ ((src32[i2] & 0xFCFCFCFC) >> 2)
+ ((src32[i3] & 0xFCFCFCFC) >> 2)) | 0x000000FF;
++i;
i0 += 2;
i1 += 2;
i2 += 2;
i3 += 2;
}
i0 += srcWidth;
i1 += srcWidth;
i2 += srcWidth;
i3 += srcWidth;
}
#else
u32 i = 0, i0 = 0, i1 = 4, i2 = srcWidth * 4, i3 = (srcWidth + 1) * 4, dstWidth = srcWidth >> 1, dstHeight = srcHeight >> 1, w4 = srcWidth * 4;
for (u32 y = 0; y < dstHeight; ++y)
{
for (u32 x = 0; x < dstWidth; ++x)
{
dst[i] = ((u32)src[i0] + src[i1] + src[i2] + src[i3]) >> 2;
dst[i + 1] = ((u32)src[i0 + 1] + src[i1 + 1] + src[i2 + 1] + src[i3 + 1]) >> 2;
dst[i + 2] = ((u32)src[i0 + 2] + src[i1 + 2] + src[i2 + 2] + src[i3 + 2]) >> 2;
dst[i + 3] = ((u32)src[i0 + 3] + src[i1 + 3] + src[i2 + 3] + src[i3 + 3]) >> 2;
i += 4;
i0 += 8;
i1 += 8;
i2 += 8;
i3 += 8;
}
i0 += w4;
i1 += w4;
i2 += w4;
i3 += w4;
}
#endif
}
void STexture::_calcMipMaps(u8 &maxLOD, u8 &minLOD, u32 &lod0Width, u32 &lod0Height, u32 width, u32 height, u32 minSize, u32 maxSize)
{
if (minSize < 8) minSize = 8;
lod0Width = upperPower(width);
lod0Height = upperPower(height);
if (width - (lod0Width >> 1) < lod0Width >> 3 && minSize <= lod0Width >> 1)
lod0Width >>= 1;
if (height - (lod0Height >> 1) < lod0Height >> 3 && minSize <= lod0Height >> 1)
lod0Height >>= 1;
maxLOD = 0;
for (u32 i = min(lod0Width, lod0Height); i > minSize; i >>= 1)
++maxLOD;
minLOD = 0;
if (maxSize > 8)
for (u32 i = max(lod0Width, lod0Height); i > maxSize; i >>= 1)
++minLOD;
if (minLOD > maxLOD)
maxLOD = minLOD;
}
SmartBuf STexture::_genMipMaps(const u8 *src, u32 width, u32 height, u8 maxLOD, u32 lod0Width, u32 lod0Height)
{
u32 bufSize = fixGX_GetTexBufferSize(lod0Width, lod0Height, GX_TF_RGBA8, GX_TRUE, maxLOD);
SmartBuf dst = smartMem2Alloc(bufSize);
if (!dst) return dst;
STexture::_resize(dst.get(), lod0Width, lod0Height, src, width, height);
DCFlushRange(dst.get(), lod0Width * lod0Height * 4);
u32 nWidth = lod0Width;
u32 nHeight = lod0Height;
u8 *pDst = dst.get();
for (u8 i = 0; i < maxLOD; ++i)
{
u8 *pSrc = pDst;
pDst += nWidth * nHeight * 4;
STexture::_resizeD2x2(pDst, pSrc, nWidth, nHeight);
DCFlushRange(pDst, nWidth * nWidth);
nWidth >>= 1;
nHeight >>= 1;
}
return dst;
}