From 77b967f4d5f6917f9583cef0b9c08928e03237ab Mon Sep 17 00:00:00 2001 From: Romain TISSERAND Date: Thu, 29 Oct 2020 21:07:28 +0100 Subject: [PATCH] Fix crash loading CHD on some ARM boards (minimal changes) --- core/cd_hw/libchdr/src/chd.c | 65 ++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/core/cd_hw/libchdr/src/chd.c b/core/cd_hw/libchdr/src/chd.c index cba334f..4e1ad93 100644 --- a/core/cd_hw/libchdr/src/chd.c +++ b/core/cd_hw/libchdr/src/chd.c @@ -201,6 +201,7 @@ typedef struct _zlib_allocator zlib_allocator; struct _zlib_allocator { UINT32 * allocptr[MAX_ZLIB_ALLOCS]; + UINT32 * allocptr2[MAX_ZLIB_ALLOCS]; }; typedef struct _zlib_codec_data zlib_codec_data; @@ -220,10 +221,11 @@ struct _lzma_allocator void (*Free)(void *p, void *address); /* address can be 0 */ void (*FreeSz)(void *p, void *address, size_t size); /* address can be 0 */ uint32_t* allocptr[MAX_LZMA_ALLOCS]; + uint32_t* allocptr2[MAX_LZMA_ALLOCS]; }; typedef struct _lzma_codec_data lzma_codec_data; -struct _lzma_codec_data +struct _lzma_codec_data { CLzmaDec decoder; lzma_allocator allocator; @@ -375,6 +377,7 @@ void lzma_allocator_init(void* p) /* reset pointer list */ memset(codec->allocptr, 0, sizeof(codec->allocptr)); + memset(codec->allocptr2, 0, sizeof(codec->allocptr2)); codec->Alloc = lzma_fast_alloc; codec->Free = lzma_fast_free; } @@ -403,11 +406,16 @@ void lzma_allocator_free(void* p ) *------------------------------------------------- */ +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define LZMA_MIN_ALIGNMENT_BITS 512 +#define LZMA_MIN_ALIGNMENT_BYTES (LZMA_MIN_ALIGNMENT_BITS / 8) + void *lzma_fast_alloc(void *p, size_t size) { int scan; - uint32_t *addr; + uint32_t *addr = NULL; lzma_allocator *codec = (lzma_allocator *)(p); + uintptr_t vaddr = 0; /* compute the size, rounding to the nearest 1k */ size = (size + 0x3ff) & ~0x3ff; @@ -420,28 +428,37 @@ void *lzma_fast_alloc(void *p, size_t size) { /* set the low bit of the size so we don't match next time */ *ptr |= 1; - return ptr + 1; + + /* return aligned address of the block */ + return codec->allocptr2[scan]; } } /* alloc a new one and put it into the list */ - addr = (uint32_t *)malloc(sizeof(uint8_t) * (size + sizeof(uint32_t))); + addr = (uint32_t *)malloc(size + sizeof(uint32_t) + LZMA_MIN_ALIGNMENT_BYTES); if (addr==NULL) return NULL; - for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) + for (int scan = 0; scan < MAX_LZMA_ALLOCS; scan++) { if (codec->allocptr[scan] == NULL) { + /* store block address */ codec->allocptr[scan] = addr; + + /* compute aligned address, store it */ + vaddr = (uintptr_t)addr; + vaddr = (vaddr + sizeof(uint32_t) + (LZMA_MIN_ALIGNMENT_BYTES-1)) & (~(LZMA_MIN_ALIGNMENT_BYTES-1)); + codec->allocptr2[scan] = (uint32_t*)vaddr; break; } } /* set the low bit of the size so we don't match next time */ *addr = size | 1; - return addr + 1; -} + /* return aligned address */ + return (void*)vaddr; +} /*------------------------------------------------- * lzma_fast_free - fast free for lzma, which @@ -452,21 +469,22 @@ void *lzma_fast_alloc(void *p, size_t size) void lzma_fast_free(void *p, void *address) { int scan; - uint32_t *ptr; - lzma_allocator *codec; + uint32_t *ptr = NULL; + lzma_allocator *codec = NULL; + if (address == NULL) return; codec = (lzma_allocator *)(p); /* find the hunk */ - ptr = (uint32_t *)(address) - 1; + ptr = (uint32_t *)address; for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) { - if (ptr == codec->allocptr[scan]) + if (ptr == codec->allocptr2[scan]) { /* clear the low bit of the size to allow matches */ - *ptr &= ~1; + *codec->allocptr[scan] &= ~1; return; } } @@ -2458,9 +2476,14 @@ static chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t allocates and frees memory frequently -------------------------------------------------*/ +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define ZLIB_MIN_ALIGNMENT_BITS 512 +#define ZLIB_MIN_ALIGNMENT_BYTES (ZLIB_MIN_ALIGNMENT_BITS / 8) + static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) { zlib_allocator *alloc = (zlib_allocator *)opaque; + uintptr_t paddr = 0; UINT32 *ptr; int i; @@ -2475,12 +2498,14 @@ static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) { /* set the low bit of the size so we don't match next time */ *ptr |= 1; - return ptr + 1; + + /* return aligned block address */ + return (voidpf)(alloc->allocptr2[i]); } } /* alloc a new one */ - ptr = (UINT32 *)malloc(size + sizeof(UINT32)); + ptr = (UINT32 *)malloc(size + sizeof(UINT32) + ZLIB_MIN_ALIGNMENT_BYTES); if (!ptr) return NULL; @@ -2489,12 +2514,16 @@ static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) if (!alloc->allocptr[i]) { alloc->allocptr[i] = ptr; + paddr = (((uintptr_t)ptr) + sizeof(UINT32) + (ZLIB_MIN_ALIGNMENT_BYTES-1)) & (~(ZLIB_MIN_ALIGNMENT_BYTES-1)); + alloc->allocptr2[i] = (uint32_t*)paddr; break; } /* set the low bit of the size so we don't match next time */ *ptr = size | 1; - return ptr + 1; + + /* return aligned block address */ + return (voidpf)paddr; } @@ -2506,15 +2535,15 @@ static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) static void zlib_fast_free(voidpf opaque, voidpf address) { zlib_allocator *alloc = (zlib_allocator *)opaque; - UINT32 *ptr = (UINT32 *)address - 1; + UINT32 *ptr = (UINT32 *)address; int i; /* find the hunk */ for (i = 0; i < MAX_ZLIB_ALLOCS; i++) - if (ptr == alloc->allocptr[i]) + if (ptr == alloc->allocptr2[i]) { /* clear the low bit of the size to allow matches */ - *ptr &= ~1; + *(alloc->allocptr[i]) &= ~1; return; } }