From 43fd560e55569b17edc15f7d8e6b6d5134875e7a Mon Sep 17 00:00:00 2001 From: ekeeke31 Date: Wed, 28 Jan 2009 16:43:15 +0000 Subject: [PATCH] optimized some float/int conversions, added -fomit-frame-pointer flag --- Makefile.gc | 2 +- Makefile.wii | 2 +- source/ngc/config.h | 4 +- source/sound/fm.c | 12 +++++- source/sound/sn76489.c | 2 +- source/sound/sound.c | 91 +++++++++++++++--------------------------- source/sound/sound.h | 1 + source/system.c | 52 ++++++++++++++---------- source/system.h | 4 +- source/vdp.c | 48 ++++++++++------------ 10 files changed, 101 insertions(+), 117 deletions(-) diff --git a/Makefile.gc b/Makefile.gc index 1d3ea75..b4b6c50 100644 --- a/Makefile.gc +++ b/Makefile.gc @@ -26,7 +26,7 @@ INCLUDES := source source/m68k source/z80 source/sound source/sound/SRC source/n # options for code generation #--------------------------------------------------------------------------------- -CFLAGS = -O3 -Wall $(MACHDEP) $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_DOL +CFLAGS = -O3 -fomit-frame-pointer -Wall $(MACHDEP) $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_DOL CXXFLAGS = $(CFLAGS) LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map diff --git a/Makefile.wii b/Makefile.wii index 01a9390..04a991a 100644 --- a/Makefile.wii +++ b/Makefile.wii @@ -26,7 +26,7 @@ INCLUDES := source source/m68k source/z80 source/sound source/sound/SRC source/n # options for code generation #--------------------------------------------------------------------------------- -CFLAGS = -O3 -mrvl -Wall $(MACHDEP) -Wno-strict-aliasing $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_RVL +CFLAGS = -O3 -fomit-frame-pointer -mrvl -Wall $(MACHDEP) -Wno-strict-aliasing $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_RVL CXXFLAGS = $(CFLAGS) LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map diff --git a/source/ngc/config.h b/source/ngc/config.h index 94a5111..56c7af7 100644 --- a/source/ngc/config.h +++ b/source/ngc/config.h @@ -31,8 +31,8 @@ typedef struct { char version[15]; - double psg_preamp; - double fm_preamp; + int32 psg_preamp; + int32 fm_preamp; uint8 boost; uint8 filter; uint8 hq_fm; diff --git a/source/sound/fm.c b/source/sound/fm.c index eb2c2d8..ea45dbf 100644 --- a/source/sound/fm.c +++ b/source/sound/fm.c @@ -1804,8 +1804,16 @@ void YM2612UpdateOne(int **buffer, int length) Limit(rt,MAXOUT,MINOUT); /* buffering */ - bufL[i] = lt; - bufR[i] = rt; + if (src_buffer) + { + src_buffer[i*2] = (float) lt / (8.0 * 0x10000000); + src_buffer[i*2 + 1] = (float) rt / (8.0 * 0x10000000); + } + else + { + bufL[i] = lt; + bufR[i] = rt; + } /* timer A control */ INTERNAL_TIMER_A(); diff --git a/source/sound/sn76489.c b/source/sound/sn76489.c index 2b6f981..a89c0be 100644 --- a/source/sound/sn76489.c +++ b/source/sound/sn76489.c @@ -48,7 +48,7 @@ static SN76489_Context SN76489[MAX_SN76489]; void SN76489_Init(int which, int PSGClockValue, int SamplingRate) { SN76489_Context *p = &SN76489[which]; - p->dClock=(float)PSGClockValue/16/SamplingRate; + p->dClock=(float)(PSGClockValue)/16.0/(float)SamplingRate; SN76489_Config(which, MUTE_ALLON, VOL_FULL, FB_SEGAVDP, SRW_SEGAVDP, 1); SN76489_Reset(which); } diff --git a/source/sound/sound.c b/source/sound/sound.c index b30ee36..763135e 100644 --- a/source/sound/sound.c +++ b/source/sound/sound.c @@ -22,7 +22,6 @@ ****************************************************************************************/ #include "shared.h" -#include "samplerate.h" #define CLOCK_NTSC 53693175 #define CLOCK_PAL 53203424 @@ -34,13 +33,10 @@ void (*_YM2612_Update)(int **buf, int length); int (*_YM2612_Reset)(void); /* cycle-accurate samples */ -static double m68cycles_per_sample[2]; -static double z80cycles_per_sample[2]; +static int m68cycles_per_sample[2]; -/* libsamplerate buffers (max. is 488 cycles per line x 313 lines / 144) */ -static SRC_DATA src_data; -static float src_in[1061*2]; -static int src_buffer[2][1061]; +/* pointer to current SRC buffer */ +float *src_buffer; /* YM2612 register arrays */ int fm_reg[2][0x100]; @@ -48,14 +44,14 @@ int fm_reg[2][0x100]; /* return the number of samples that should have been rendered so far */ static inline uint32 fm_sample_cnt(uint8 is_z80) { - if (is_z80) return (uint32) ((double)(count_z80 + current_z80 - z80_ICount) / z80cycles_per_sample[0]); - else return (uint32) ((double) count_m68k / m68cycles_per_sample[0]); + if (is_z80) return ((count_z80 + current_z80 - z80_ICount) * 15) / (7 * m68cycles_per_sample[0]); + else return count_m68k / m68cycles_per_sample[0]; } static inline uint32 psg_sample_cnt(uint8 is_z80) { - if (is_z80) return (uint32) ((double)(count_z80 + current_z80 - z80_ICount) / z80cycles_per_sample[1]); - else return (uint32) ((double) count_m68k / m68cycles_per_sample[1]); + if (is_z80) return ((count_z80 + current_z80 - z80_ICount) * 15) / (7 * m68cycles_per_sample[1]); + else return count_m68k / m68cycles_per_sample[1]; } /* update FM samples */ @@ -64,16 +60,12 @@ static inline void fm_update() if(snd.fm.curStage - snd.fm.lastStage > 0) { int *tempBuffer[2]; - - if (config.hq_fm && !config.fm_core) + tempBuffer[0] = snd.fm.buffer[0] + snd.fm.lastStage; + tempBuffer[1] = snd.fm.buffer[1] + snd.fm.lastStage; + + if (src_buffer) { - tempBuffer[0] = src_buffer[0] + snd.fm.lastStage; - tempBuffer[1] = src_buffer[1] + snd.fm.lastStage; - } - else - { - tempBuffer[0] = snd.fm.buffer[0] + snd.fm.lastStage; - tempBuffer[1] = snd.fm.buffer[1] + snd.fm.lastStage; + src_buffer = src_data.data_in + (snd.fm.lastStage * 2); } _YM2612_Update(tempBuffer, snd.fm.curStage - snd.fm.lastStage); @@ -97,28 +89,20 @@ void sound_init(int rate) double vclk = (vdp_pal ? (double)CLOCK_PAL : (double)CLOCK_NTSC) / 7.0; /* 68000 and YM2612 clock */ double zclk = (vdp_pal ? (double)CLOCK_PAL : (double)CLOCK_NTSC) / 15.0; /* Z80 and SN76489 clock */ - /* cycle-accurate FM samples */ - if (config.hq_fm && !config.fm_core) - { - m68cycles_per_sample[0] = 144.0; - z80cycles_per_sample[0] = (144.0 * 7.0) / 15.0; + /* cycle-accurate samples */ + m68cycles_per_sample[0] = (m68cycles_per_line * lines_per_frame * vdp_rate) / rate; + m68cycles_per_sample[1] = (m68cycles_per_line * lines_per_frame * vdp_rate) / rate; - /* initialize samplerate converter data */ - src_data.data_in = src_in; - src_data.data_out = snd.fm.src_out; - src_data.input_frames = (int)(((double)m68cycles_per_line * (double)lines_per_frame / 144.0) + 0.5); - src_data.output_frames = rate / vdp_rate; - src_data.src_ratio = (double)src_data.output_frames / (double)src_data.input_frames; - } - else + /* YM2612 is emulated at the original frequency */ + src_buffer = 0; + if (src_data.data_in) { - m68cycles_per_sample[0] = ((double)m68cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate); - z80cycles_per_sample[0] = ((double)z80cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate); - } + /* YM2612 original frequency is VCLK/144 */ + m68cycles_per_sample[0] = 144; - /* cycle-accurate PSG samples */ - m68cycles_per_sample[1] = ((double)m68cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate); - z80cycles_per_sample[1] = ((double)z80cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate); + /* Initialize SRC buffer */ + src_buffer = src_data.data_in; + } /* initialize sound chips */ SN76489_Init(0, (int)zclk, rate); @@ -145,7 +129,7 @@ void sound_init(int rate) void sound_update(void) { /* finalize sound buffers */ - snd.fm.curStage = (config.hq_fm && !config.fm_core) ? src_data.input_frames : snd.buffer_size; + snd.fm.curStage = (src_data.data_in) ? src_data.input_frames : snd.buffer_size; snd.psg.curStage = snd.buffer_size; /* update last samples (if needed) */ @@ -153,45 +137,34 @@ void sound_update(void) psg_update(); /* Resampling */ - if (config.hq_fm && !config.fm_core) + if (src_data.data_in) { - double scaled_value ; - int len = src_data.input_frames; - - /* this is basically libsamplerate "src_int_to_float_array" function, adapted to interlace samples */ - while (len) - { - len -- ; - src_in[len*2] = (float) (src_buffer[0] [len] / (8.0 * 0x10000000)); - src_in[len*2 + 1] = (float) (src_buffer[1] [len] / (8.0 * 0x10000000)); - } - /* samplerate conversion */ src_simple (&src_data, (config.hq_fm&1) ? SRC_LINEAR : SRC_SINC_FASTEST, 2); /* this is basically libsamplerate "src_float_to_int_array" function, adapted to interlace samples */ - len = snd.buffer_size; + double scaled_value; + int len = snd.buffer_size; while (len) { len -- ; - - scaled_value = snd.fm.src_out[len*2] * (8.0 * 0x10000000); + scaled_value = src_data.data_out[len*2] * (8.0 * 0x10000000); if (scaled_value >= (1.0 * 0x7FFFFFFF)) snd.fm.buffer[0][len] = 0x7fffffff; else if (scaled_value <= (-8.0 * 0x10000000)) snd.fm.buffer[0][len] = -1 - 0x7fffffff; else - snd.fm.buffer[0][len] = (long)scaled_value; + snd.fm.buffer[0][len] = lrint(scaled_value); - scaled_value = snd.fm.src_out[len*2+1] * (8.0 * 0x10000000); + scaled_value = src_data.data_out[len*2+1] * (8.0 * 0x10000000); if (scaled_value >= (1.0 * 0x7FFFFFFF)) snd.fm.buffer[1][len] = 0x7fffffff; else if (scaled_value <= (-8.0 * 0x10000000)) snd.fm.buffer[1][len] = -1 - 0x7fffffff; else - snd.fm.buffer[1][len] = (long)scaled_value; - } + snd.fm.buffer[1][len] = lrint(scaled_value); } + } /* reset samples count */ snd.fm.curStage = 0; diff --git a/source/sound/sound.h b/source/sound/sound.h index 818d79d..3bbf71a 100644 --- a/source/sound/sound.h +++ b/source/sound/sound.h @@ -28,6 +28,7 @@ extern int fm_reg[2][0x100]; extern double fm_timera_tab[0x400]; extern double fm_timerb_tab[0x100]; +extern float *src_buffer; /* Function prototypes */ extern void sound_init(int rate); diff --git a/source/system.c b/source/system.c index e3693c2..b5cca5c 100644 --- a/source/system.c +++ b/source/system.c @@ -35,6 +35,7 @@ uint32 count_z80; uint32 line_z80; int32 current_z80; uint8 system_hw; +SRC_DATA src_data; static inline void audio_update (void); @@ -65,9 +66,13 @@ void system_reset (void) SN76489_Reset(0); /* Sound Buffers */ - memset (snd.psg.buffer, 0, SND_SIZE); - memset (snd.fm.buffer[0], 0, SND_SIZE*2); - memset (snd.fm.buffer[1], 0, SND_SIZE*2); + if (snd.psg.buffer) memset(snd.psg.buffer, 0, SND_SIZE); + if (snd.fm.buffer[0]) memset(snd.fm.buffer[0], 0, SND_SIZE*2); + if (snd.fm.buffer[1]) memset(snd.fm.buffer[1], 0, SND_SIZE*2); + + /* SRC */ + if (src_data.data_in) memset(src_data.data_in, 0, src_data.input_frames * 2 * sizeof(float)); + if (src_data.data_out) memset(src_data.data_out,0, src_data.output_frames * 2 * sizeof(float)); } /**************************************************************** @@ -275,28 +280,30 @@ int audio_init (int rate) snd.buffer[0] = (int16 *) malloc(SND_SIZE); snd.buffer[1] = (int16 *) malloc(SND_SIZE); if (!snd.buffer[0] || !snd.buffer[1]) return (-1); - memset (snd.buffer[0], 0, SND_SIZE); - memset (snd.buffer[1], 0, SND_SIZE); #endif /* YM2612 stream buffers */ snd.fm.buffer[0] = (int *)malloc (SND_SIZE*2); snd.fm.buffer[1] = (int *)malloc (SND_SIZE*2); if (!snd.fm.buffer[0] || !snd.fm.buffer[1]) return (-1); - memset (snd.fm.buffer[0], 0, SND_SIZE*2); - memset (snd.fm.buffer[1], 0, SND_SIZE*2); - /* SRC buffers */ + /* YM2612 resampling */ + src_data.data_in = NULL; + src_data.data_out = NULL; if (config.hq_fm && !config.fm_core) { - snd.fm.src_out = (float *) malloc(snd.buffer_size*2*sizeof(float)); - if (!snd.fm.src_out) return (-1); + /* initialize SRC */ + src_data.input_frames = (int)(((double)m68cycles_per_line * (double)lines_per_frame / 144.0) + 0.5); + src_data.output_frames = snd.buffer_size; + src_data.data_in = (float *)malloc(src_data.input_frames * 2 * sizeof(float)); + src_data.data_out = (float *)malloc(src_data.output_frames * 2 * sizeof(float)); + src_data.src_ratio = (double)src_data.output_frames / (double)src_data.input_frames; + if (!src_data.data_in || !src_data.data_out) return (-1); } /* SN76489 stream buffers */ snd.psg.buffer = (int16 *)malloc (SND_SIZE); if (!snd.psg.buffer) return (-1); - memset (snd.psg.buffer, 0, SND_SIZE); /* Set audio enable flag */ snd.enabled = 1; @@ -310,12 +317,13 @@ int audio_init (int rate) void audio_shutdown(void) { /* free sound buffers */ - if (snd.buffer[0]) free(snd.buffer[0]); - if (snd.buffer[1]) free(snd.buffer[1]); - if (snd.fm.buffer[0]) free(snd.fm.buffer[0]); - if (snd.fm.buffer[1]) free(snd.fm.buffer[1]); - if (snd.fm.src_out) free(snd.fm.src_out); - if (snd.psg.buffer) free(snd.psg.buffer); + if (snd.buffer[0]) free(snd.buffer[0]); + if (snd.buffer[1]) free(snd.buffer[1]); + if (snd.fm.buffer[0]) free(snd.fm.buffer[0]); + if (snd.fm.buffer[1]) free(snd.fm.buffer[1]); + if (snd.psg.buffer) free(snd.psg.buffer); + if (src_data.data_in) free(src_data.data_in); + if (src_data.data_out) free(src_data.data_out); } static int ll, rr; @@ -324,8 +332,8 @@ static inline void audio_update (void) { int i; int l, r; - double psg_preamp = config.psg_preamp; - double fm_preamp = config.fm_preamp; + int psg_preamp = config.psg_preamp; + int fm_preamp = config.fm_preamp; int boost = config.boost; int filter = config.filter; @@ -339,9 +347,9 @@ static inline void audio_update (void) /* mix samples */ for (i = 0; i < snd.buffer_size; i ++) { - l = r = (int) ((double)snd.psg.buffer[i] * psg_preamp); - l += (int) ((double)snd.fm.buffer[0][i] * fm_preamp); - r += (int) ((double)snd.fm.buffer[1][i] * fm_preamp); + l = r = (snd.psg.buffer[i] * psg_preamp) / 100; + l += ((snd.fm.buffer[0][i] * fm_preamp) / 100); + r += ((snd.fm.buffer[1][i] * fm_preamp) / 100); snd.fm.buffer[0][i] = 0; snd.fm.buffer[1][i] = 0; snd.psg.buffer[i] = 0; diff --git a/source/system.h b/source/system.h index 69ec40f..9c39356 100644 --- a/source/system.h +++ b/source/system.h @@ -24,6 +24,8 @@ #ifndef _SYSTEM_H_ #define _SYSTEM_H_ +#include "samplerate.h" + #define SYSTEM_GENESIS 0 #define SYSTEM_MEGADRIVE 1 #define SYSTEM_PICO 2 @@ -66,7 +68,6 @@ typedef struct int curStage; int lastStage; int *buffer[2]; - float *src_out; /* SRC conversion buffer */ } fm; struct { @@ -86,6 +87,7 @@ extern uint32 count_z80; extern uint32 line_z80; extern int32 current_z80; extern uint8 system_hw; +extern SRC_DATA src_data; /* Function prototypes */ extern void system_init (void); diff --git a/source/vdp.c b/source/vdp.c index 083d934..9810bf9 100644 --- a/source/vdp.c +++ b/source/vdp.c @@ -91,9 +91,6 @@ static uint16 sat_base_mask; /* Base bits of SAT */ static uint16 sat_addr_mask; /* Index bits of SAT */ static uint32 dma_endCycles; /* 68k cycles to DMA end */ static uint8 dma_type; /* Type of DMA */ -static double vdp_timings[4][4]; /* DMA timings */ - -static inline void vdp_reg_w(unsigned int r, unsigned int d); /* DMA Timings @@ -122,31 +119,22 @@ static inline void vdp_reg_w(unsigned int r, unsigned int d); CRAM or VSRAM for a 68K > VDP transfer, in which case it is in words. */ -static const uint8 dma_rates[16] = { - 8, 9, 83 , 102, /* 68K to VRAM */ - 16, 18, 167, 205, /* 68K to CRAM or VSRAM */ - 15, 17, 166, 204, /* DMA fill */ - 8, 9, 83 , 102, /* DMA Copy */ +static const uint32 dma_rates[16] = { + 8, 83, 9, 102, /* 68K to VRAM (1 word = 2 bytes) */ + 16, 167, 18, 205, /* 68K to CRAM or VSRAM */ + 15, 166, 17, 204, /* DMA fill */ + 8, 83, 9, 102, /* DMA Copy */ }; /* Function prototypes */ static inline void data_write(unsigned int data); +static inline void vdp_reg_w(unsigned int r, unsigned int d); /*--------------------------------------------------------------------------*/ /* Init, reset, shutdown functions */ /*--------------------------------------------------------------------------*/ void vdp_init(void) { - /* reinitialize DMA timings table */ - int i; - for (i=0; i<4; i++) - { - vdp_timings[0][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i]); - vdp_timings[1][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 4]); - vdp_timings[2][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 8]); - vdp_timings[3][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 12]); - } - /* PAL/NTSC timings */ vdp_rate = vdp_pal ? 50 : 60; lines_per_frame = vdp_pal ? 313 : 262; @@ -280,24 +268,27 @@ void vdp_restore(uint8 *vdp_regs) /* Update DMA timings (this is call on start of DMA and then at the start of each scanline) */ void dma_update() { - int32 left_cycles; - uint32 dma_cycles, dma_bytes; - uint8 index = 0; + int dma_cycles = 0; - /* get the appropriate tranfer rate (bytes/line) for this DMA operation */ - if ((status&8) || !(reg[1] & 0x40)) index = 2; /* VBLANK or Display OFF */ - index += (reg[12] & 1); /* 32 or 40 Horizontal Cells */ + /* DMA timings table index */ + int index = (4 * dma_type) + ((reg[12] & 1)*2); + if ((status&8) || !(reg[1] & 0x40)) index++; - /* calculate transfer quantity for the remaining 68k cycles */ - left_cycles = line_m68k + m68cycles_per_line - count_m68k; + /* DMA transfer rate */ + int rate = dma_rates[index]; + + /* 68k cycles left */ + int left_cycles = (line_m68k + m68cycles_per_line) - count_m68k; if (left_cycles < 0) left_cycles = 0; - dma_bytes = (uint32)(((double)left_cycles / vdp_timings[dma_type][index]) + 0.5); + + /* DMA bytes left */ + int dma_bytes = (left_cycles * rate) / m68cycles_per_line; /* determinate DMA length in CPU cycles */ if (dma_length < dma_bytes) { /* DMA will be finished during this line */ - dma_cycles = (uint32)(((double)dma_length * vdp_timings[dma_type][index]) + 0.5); + dma_cycles = (dma_length * m68cycles_per_line) / rate; dma_length = 0; } else @@ -307,6 +298,7 @@ void dma_update() dma_length -= dma_bytes; } + /* update 68k cycles counter */ if (dma_type < 2) { /* 68K COPY to V-RAM */