From 41285e1131ba676e687b921912639478db83aec4 Mon Sep 17 00:00:00 2001 From: EkeEke Date: Sun, 18 Dec 2016 23:09:16 +0100 Subject: [PATCH] [Core/Sound] optimized Blip Buffer implementation for stereo streams (thanks to David Knight for original idea) --- Makefile.gc | 2 +- Makefile.wii | 2 +- core/cd_hw/cdd.c | 76 ++++---- core/cd_hw/pcm.c | 51 +++--- core/cd_hw/pcm.h | 2 +- core/sound/blip_buf.c | 413 +++++++++++++++++++++++++++++++++++------- core/sound/blip_buf.h | 17 +- core/sound/sn76489.c | 46 ++--- core/sound/sound.c | 55 +++--- core/sound/sound.h | 4 +- core/system.c | 72 +++----- core/system.h | 2 +- 12 files changed, 480 insertions(+), 262 deletions(-) diff --git a/Makefile.gc b/Makefile.gc index d593d8f..f86e9dd 100644 --- a/Makefile.gc +++ b/Makefile.gc @@ -27,7 +27,7 @@ INCLUDES := core core/m68k core/z80 core/sound core/tremor core/ntsc core/input_ # options for code generation #--------------------------------------------------------------------------------- -CFLAGS = -O3 -fomit-frame-pointer -Wall -Wno-strict-aliasing $(MACHDEP) $(INCLUDE) -DUSE_LIBTREMOR -DDISABLE_MANY_OGG_OPEN_FILES -DUSE_16BPP_RENDERING -DALT_RENDERER +CFLAGS = -O3 -fomit-frame-pointer -Wall -Wno-strict-aliasing $(MACHDEP) $(INCLUDE) -DUSE_LIBTREMOR -DDISABLE_MANY_OGG_OPEN_FILES -DUSE_16BPP_RENDERING -DALT_RENDERER -DBLIP_INVERT CXXFLAGS = $(CFLAGS) LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map diff --git a/Makefile.wii b/Makefile.wii index aff1cd7..1febf4c 100644 --- a/Makefile.wii +++ b/Makefile.wii @@ -27,7 +27,7 @@ INCLUDES := core core/m68k core/z80 core/sound core/tremor core/ntsc core/input_ # options for code generation #--------------------------------------------------------------------------------- -CFLAGS = -O3 -fomit-frame-pointer -Wall -Wno-strict-aliasing $(MACHDEP) $(INCLUDE) -DUSE_LIBTREMOR -DUSE_16BPP_RENDERING -DALT_RENDERER -DHW_RVL +CFLAGS = -O3 -fomit-frame-pointer -Wall -Wno-strict-aliasing $(MACHDEP) $(INCLUDE) -DUSE_LIBTREMOR -DUSE_16BPP_RENDERING -DALT_RENDERER -DBLIP_INVERT -DHW_RVL CXXFLAGS = $(CFLAGS) LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map,-wrap,wiiuse_set_ir,-wrap,wiiuse_handshake,-wrap,classic_ctrl_handshake,-wrap,classic_ctrl_event diff --git a/core/cd_hw/cdd.c b/core/cd_hw/cdd.c index 673e725..d20aa87 100644 --- a/core/cd_hw/cdd.c +++ b/core/cd_hw/cdd.c @@ -179,8 +179,7 @@ void cdd_init(int samplerate) { /* CD-DA is running by default at 44100 Hz */ /* Audio stream is resampled to desired rate using Blip Buffer */ - blip_set_rates(snd.blips[2][0], 44100, samplerate); - blip_set_rates(snd.blips[2][1], 44100, samplerate); + blip_set_rates(snd.blips[2], 44100, samplerate); } void cdd_reset(void) @@ -1059,16 +1058,16 @@ void cdd_read_data(uint8 *dst) void cdd_read_audio(unsigned int samples) { /* previous audio outputs */ - int16 l = cdd.audio[0]; - int16 r = cdd.audio[1]; + int prev_l = cdd.audio[0]; + int prev_r = cdd.audio[1]; /* get number of internal clocks (samples) needed */ - samples = blip_clocks_needed(snd.blips[2][0], samples); + samples = blip_clocks_needed(snd.blips[2], samples); /* audio track playing ? */ if (!scd.regs[0x36>>1].byte.h && cdd.toc.tracks[cdd.index].fd) { - int i, mul, delta; + int i, mul, l, r; /* current CD-DA fader volume */ int curVol = cdd.volume; @@ -1106,17 +1105,13 @@ void cdd_read_audio(unsigned int samples) /* (MIN) 0,1,2,3,4,8,12,16,20...,1020,1024 (MAX) */ mul = (curVol & 0x7fc) ? (curVol & 0x7fc) : (curVol & 0x03); - /* left channel */ - delta = ((ptr[0] * mul) / 1024) - l; - ptr++; - l += delta; - blip_add_delta_fast(snd.blips[2][0], i, delta); - - /* right channel */ - delta = ((ptr[0] * mul) / 1024) - r; - ptr++; - r += delta; - blip_add_delta_fast(snd.blips[2][1], i, delta); + /* left & right channels */ + l = ((ptr[0] * mul) / 1024); + r = ((ptr[1] * mul) / 1024); + blip_add_delta_fast(snd.blips[2], i, l-prev_l, r-prev_r); + prev_l = l; + prev_r = r; + ptr+=2; /* update CD-DA fader volume (one step/sample) */ if (curVol < endVol) @@ -1153,27 +1148,19 @@ void cdd_read_audio(unsigned int samples) /* (MIN) 0,1,2,3,4,8,12,16,20...,1020,1024 (MAX) */ mul = (curVol & 0x7fc) ? (curVol & 0x7fc) : (curVol & 0x03); - /* left channel */ + /* left & right channels */ #ifdef LSB_FIRST - delta = ((ptr[0] * mul) / 1024) - l; - ptr++; + l = ((ptr[0] * mul) / 1024); + r = ((ptr[1] * mul) / 1024); + ptr+=2; #else - delta = (((int16)((ptr[0] + ptr[1]*256)) * mul) / 1024) - l; - ptr += 2; + l = (((int16)((ptr[0] + ptr[1]*256)) * mul) / 1024); + r = (((int16)((ptr[2] + ptr[3]*256)) * mul) / 1024); + ptr+=4; #endif - l += delta; - blip_add_delta_fast(snd.blips[2][0], i, delta); - - /* right channel */ -#ifdef LSB_FIRST - delta = ((ptr[0] * mul) / 1024) - r; - ptr++; -#else - delta = (((int16)((ptr[0] + ptr[1]*256)) * mul) / 1024) - r; - ptr += 2; -#endif - r += delta; - blip_add_delta_fast(snd.blips[2][1], i, delta); + blip_add_delta_fast(snd.blips[2], i, l-prev_l, r-prev_r); + prev_l = l; + prev_r = r; /* update CD-DA fader volume (one step/sample) */ if (curVol < endVol) @@ -1198,23 +1185,24 @@ void cdd_read_audio(unsigned int samples) cdd.volume = curVol; /* save last audio output for next frame */ - cdd.audio[0] = l; - cdd.audio[1] = r; + cdd.audio[0] = prev_l; + cdd.audio[1] = prev_r; } else { /* no audio output */ - if (l) blip_add_delta_fast(snd.blips[2][0], 0, -l); - if (r) blip_add_delta_fast(snd.blips[2][1], 0, -r); + if (prev_l | prev_r) + { + blip_add_delta_fast(snd.blips[2], 0, -prev_l, -prev_r); - /* save audio output for next frame */ - cdd.audio[0] = 0; - cdd.audio[1] = 0; + /* save audio output for next frame */ + cdd.audio[0] = 0; + cdd.audio[1] = 0; + } } /* end of Blip Buffer timeframe */ - blip_end_frame(snd.blips[2][0], samples); - blip_end_frame(snd.blips[2][1], samples); + blip_end_frame(snd.blips[2], samples); } static void cdd_read_subcode(void) diff --git a/core/cd_hw/pcm.c b/core/cd_hw/pcm.c index 09f99d3..e068d70 100644 --- a/core/cd_hw/pcm.c +++ b/core/cd_hw/pcm.c @@ -2,7 +2,7 @@ * Genesis Plus * PCM sound chip (315-5476A) (RF5C164 compatible) * - * Copyright (C) 2012-2014 Eke-Eke (Genesis Plus GX) + * Copyright (C) 2012-2016 Eke-Eke (Genesis Plus GX) * * Redistribution and use of this code or any derivative works are permitted * provided that the following conditions are met: @@ -45,8 +45,7 @@ void pcm_init(double clock, int samplerate) { /* PCM chip is running at original rate and is synchronized with SUB-CPU */ /* Chip output is resampled to desired rate using Blip Buffer. */ - blip_set_rates(snd.blips[1][0], clock / PCM_SCYCLES_RATIO, samplerate); - blip_set_rates(snd.blips[1][1], clock / PCM_SCYCLES_RATIO, samplerate); + blip_set_rates(snd.blips[1], clock / PCM_SCYCLES_RATIO, samplerate); } void pcm_reset(void) @@ -71,8 +70,7 @@ void pcm_reset(void) pcm.cycles = 0; /* clear blip buffers */ - blip_clear(snd.blips[1][0]); - blip_clear(snd.blips[1][1]); + blip_clear(snd.blips[1]); } int pcm_context_save(uint8 *state) @@ -117,6 +115,11 @@ void pcm_run(unsigned int length) #ifdef LOG_PCM error("[%d][%d]run %d PCM samples (from %d)\n", v_counter, s68k.cycles, length, pcm.cycles); #endif + + /* previous audio outputs */ + int prev_l = pcm.out[0]; + int prev_r = pcm.out[1]; + /* check if PCM chip is running */ if (pcm.enabled) { @@ -180,41 +183,29 @@ void pcm_run(unsigned int length) if (r < -32768) r = -32768; else if (r > 32767) r = 32767; - /* check if PCM left output changed */ - if (pcm.out[0] != l) - { - blip_add_delta_fast(snd.blips[1][0], i, l-pcm.out[0]); - pcm.out[0] = l; - } - - /* check if PCM right output changed */ - if (pcm.out[1] != r) - { - blip_add_delta_fast(snd.blips[1][1], i, r-pcm.out[1]); - pcm.out[1] = r; - } + /* update Blip Buffer */ + blip_add_delta_fast(snd.blips[1], i, l-prev_l, r-prev_r); + prev_l = l; + prev_r = r; } + + /* save last audio outputs */ + pcm.out[0] = prev_l; + pcm.out[1] = prev_r; } else { - /* check if PCM left output changed */ - if (pcm.out[0]) + /* check if PCM output was not muted */ + if (prev_l | prev_r) { - blip_add_delta_fast(snd.blips[1][0], 0, -pcm.out[0]); + blip_add_delta_fast(snd.blips[1], 0, -prev_l, -prev_r); pcm.out[0] = 0; - } - - /* check if PCM right output changed */ - if (pcm.out[1]) - { - blip_add_delta_fast(snd.blips[1][1], 0, -pcm.out[1]); pcm.out[1] = 0; } } /* end of blip buffer frame */ - blip_end_frame(snd.blips[1][0], length); - blip_end_frame(snd.blips[1][1], length); + blip_end_frame(snd.blips[1], length); /* update PCM master clock counter */ pcm.cycles += length * PCM_SCYCLES_RATIO; @@ -223,7 +214,7 @@ void pcm_run(unsigned int length) void pcm_update(unsigned int samples) { /* get number of internal clocks (samples) needed */ - unsigned int clocks = blip_clocks_needed(snd.blips[1][0], samples); + unsigned int clocks = blip_clocks_needed(snd.blips[1], samples); /* run PCM chip */ if (clocks > 0) diff --git a/core/cd_hw/pcm.h b/core/cd_hw/pcm.h index ee1a5c5..eef847d 100644 --- a/core/cd_hw/pcm.h +++ b/core/cd_hw/pcm.h @@ -2,7 +2,7 @@ * Genesis Plus * PCM sound chip (315-5476A) (RF5C164 compatible) * - * Copyright (C) 2012-2014 Eke-Eke (Genesis Plus GX) + * Copyright (C) 2012-2016 Eke-Eke (Genesis Plus GX) * * Redistribution and use of this code or any derivative works are permitted * provided that the following conditions are met: diff --git a/core/sound/blip_buf.c b/core/sound/blip_buf.c index 2f48536..ae0db45 100644 --- a/core/sound/blip_buf.c +++ b/core/sound/blip_buf.c @@ -1,10 +1,11 @@ -/* blip_buf $vers. http://www.slack.net/~ant/ */ +/* blip_buf $vers. http://www.slack.net/~ant/ */ -/* Modified for Genesis Plus GX by EkeEke (01/09/12) */ -/* - disabled assertions checks (define #BLIP_ASSERT to re-enable) */ -/* - fixed multiple time-frames support & removed m->avail */ -/* - modified blip_read_samples to always output to stereo streams */ -/* - added blip_mix_samples function (see blip_buf.h) */ +/* Modified for Genesis Plus GX by EkeEke */ +/* - disabled assertions checks (define #BLIP_ASSERT to re-enable) */ +/* - fixed multiple time-frames support & removed m->avail */ +/* - added blip_mix_samples function (see blip_buf.h) */ +/* - added stereo buffer support (define #BLIP_MONO to disable) */ +/* - added inverted stereo output (define #BLIP_INVERT to enable)*/ #include "blip_buf.h" @@ -61,24 +62,32 @@ enum { phase_count = 1 << phase_bits }; enum { delta_bits = 15 }; enum { delta_unit = 1 << delta_bits }; enum { frac_bits = time_bits - pre_shift }; +enum { phase_shift = frac_bits - phase_bits }; /* We could eliminate avail and encode whole samples in offset, but that would limit the total buffered samples to blip_max_frame. That could only be increased by decreasing time_bits, which would reduce resample ratio accuracy. */ +typedef int buf_t; + struct blip_t { fixed_t factor; fixed_t offset; int size; +#ifdef BLIP_MONO int integrator; +#else + int integrator[2]; + buf_t* buffer[2]; +#endif }; -typedef int buf_t; - +#ifdef BLIP_MONO /* probably not totally portable */ -#define SAMPLES( buf ) ((buf_t*) ((buf) + 1)) +#define SAMPLES( blip ) ((buf_t*) ((blip) + 1)) +#endif /* Arithmetic (sign-preserving) right shift */ #define ARITH_SHIFT( n, shift ) \ @@ -124,9 +133,23 @@ blip_t* blip_new( int size ) assert( size >= 0 ); #endif +#ifdef BLIP_MONO m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) ); +#else + m = (blip_t*) malloc( sizeof *m ); +#endif + if ( m ) { +#ifndef BLIP_MONO + m->buffer[0] = (buf_t*) malloc( (size + buf_extra) * sizeof (buf_t)); + m->buffer[1] = (buf_t*) malloc( (size + buf_extra) * sizeof (buf_t)); + if ((m->buffer[0] == NULL) || (m->buffer[1] == NULL)) + { + blip_delete(m); + return 0; + } +#endif m->factor = time_unit / blip_max_ratio; m->size = size; blip_clear( m ); @@ -141,7 +164,13 @@ void blip_delete( blip_t* m ) { if ( m != NULL ) { - /* Clear fields in case user tries to use after freeing */ +#ifndef BLIP_MONO + if (m->buffer[0] != NULL) + free(m->buffer[0]); + if (m->buffer[1] != NULL) + free(m->buffer[1]); +#endif + /* Clear fields in case user tries to use after freeing */ memset( m, 0, sizeof *m ); free( m ); } @@ -173,16 +202,23 @@ void blip_clear( blip_t* m ) Since we don't know rounding direction, factor/2 accommodates either, with the slight loss of showing an error in half the time. Since for a 64-bit factor this is years, the halving isn't a problem. */ - - m->offset = m->factor / 2; + + m->offset = m->factor / 2; +#ifdef BLIP_MONO m->integrator = 0; memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) ); +#else + m->integrator[0] = 0; + m->integrator[1] = 0; + memset( m->buffer[0], 0, (m->size + buf_extra) * sizeof (buf_t) ); + memset( m->buffer[1], 0, (m->size + buf_extra) * sizeof (buf_t) ); +#endif } int blip_clocks_needed( const blip_t* m, int samples ) { fixed_t needed; - + #ifdef BLIP_ASSERT /* Fails if buffer can't hold that many more samples */ assert( (samples >= 0) && (((m->offset >> time_bits) + samples) <= m->size) ); @@ -191,14 +227,14 @@ int blip_clocks_needed( const blip_t* m, int samples ) needed = (fixed_t) samples * time_unit; if ( needed < m->offset ) return 0; - + return (needed - m->offset + m->factor - 1) / m->factor; } void blip_end_frame( blip_t* m, unsigned t ) { m->offset += t * m->factor; - + #ifdef BLIP_ASSERT /* Fails if buffer size was exceeded */ assert( (m->offset >> time_bits) <= m->size ); @@ -212,91 +248,167 @@ int blip_samples_avail( const blip_t* m ) static void remove_samples( blip_t* m, int count ) { +#ifdef BLIP_MONO buf_t* buf = SAMPLES( m ); - int remain = (m->offset >> time_bits) + buf_extra - count; +#else + buf_t* buf = m->buffer[0]; +#endif + int remain = (m->offset >> time_bits) + buf_extra - count; m->offset -= count * time_unit; - - memmove( &buf [0], &buf [count], remain * sizeof buf [0] ); - memset( &buf [remain], 0, count * sizeof buf [0] ); + + memmove( &buf [0], &buf [count], remain * sizeof (buf_t) ); + memset( &buf [remain], 0, count * sizeof (buf_t) ); +#ifndef BLIP_MONO + buf = m->buffer[1]; + memmove( &buf [0], &buf [count], remain * sizeof (buf_t) ); + memset( &buf [remain], 0, count * sizeof (buf_t) ); +#endif } int blip_read_samples( blip_t* m, short out [], int count) { #ifdef BLIP_ASSERT assert( count >= 0 ); - + if ( count > (m->offset >> time_bits) ) count = m->offset >> time_bits; - + if ( count ) #endif { - buf_t const* in = SAMPLES( m ); - buf_t const* end = in + count; +#ifdef BLIP_MONO + buf_t const* in = SAMPLES( m ); int sum = m->integrator; +#else + buf_t const* in = m->buffer[0]; + buf_t const* in2 = m->buffer[1]; + int sum = m->integrator[0]; + int sum2 = m->integrator[1]; +#endif + buf_t const* end = in + count; do { /* Eliminate fraction */ int s = ARITH_SHIFT( sum, delta_bits ); - + sum += *in++; - + CLAMP( s ); - - *out = s; - out += 2; - + + *out++ = s; + /* High-pass filter */ sum -= s << (delta_bits - bass_shift); + +#ifndef BLIP_MONO + /* Eliminate fraction */ + s = ARITH_SHIFT( sum2, delta_bits ); + + sum2 += *in2++; + + CLAMP( s ); + + *out++ = s; + + /* High-pass filter */ + sum2 -= s << (delta_bits - bass_shift); +#endif } while ( in != end ); + +#ifdef BLIP_MONO m->integrator = sum; - +#else + m->integrator[0] = sum; + m->integrator[1] = sum2; +#endif remove_samples( m, count ); } - + return count; } -int blip_mix_samples( blip_t* m, short out [], int count) +int blip_mix_samples( blip_t* m1, blip_t* m2, blip_t* m3, short out [], int count) { #ifdef BLIP_ASSERT - assert( count >= 0 ); - - if ( count > (m->offset >> time_bits) ) - count = m->offset >> time_bits; - - if ( count ) + assert( count >= 0 ); + + if ( count > (m1->offset >> time_bits) ) + count = m1->offset >> time_bits; + if ( count > (m2->offset >> time_bits) ) + count = m2->offset >> time_bits; + if ( count > (m3->offset >> time_bits) ) + count = m3->offset >> time_bits; + + if ( count ) #endif { - buf_t const* in = SAMPLES( m ); - buf_t const* end = in + count; - int sum = m->integrator; - do - { - /* Eliminate fraction */ - int s = ARITH_SHIFT( sum, delta_bits ); - - sum += *in++; - - /* High-pass filter */ - sum -= s << (delta_bits - bass_shift); + buf_t const* end; + buf_t const* in[3]; +#ifdef BLIP_MONO + int sum = m1->integrator; + in[0] = SAMPLES( m1 ); + in[1] = SAMPLES( m2 ); + in[2] = SAMPLES( m3 ); +#else + int sum = m1->integrator[0]; + int sum2 = m1->integrator[1]; + buf_t const* in2[3]; + in[0] = m1->buffer[0]; + in[1] = m2->buffer[0]; + in[2] = m3->buffer[0]; + in2[0] = m1->buffer[1]; + in2[1] = m2->buffer[1]; + in2[2] = m3->buffer[1]; +#endif - /* Add current buffer value */ - s += *out; - - CLAMP( s ); - - *out = s; - out += 2; - } - while ( in != end ); - m->integrator = sum; - - remove_samples( m, count ); - } - - return count; + end = in[0] + count; + do + { + /* Eliminate fraction */ + int s = ARITH_SHIFT( sum, delta_bits ); + + sum += *in[0]++; + sum += *in[1]++; + sum += *in[2]++; + + CLAMP( s ); + + *out++ = s; + + /* High-pass filter */ + sum -= s << (delta_bits - bass_shift); + +#ifndef BLIP_MONO + /* Eliminate fraction */ + s = ARITH_SHIFT( sum2, delta_bits ); + + sum2 += *in2[0]++; + sum2 += *in2[1]++; + sum2 += *in2[2]++; + + CLAMP( s ); + + *out++ = s; + + /* High-pass filter */ + sum2 -= s << (delta_bits - bass_shift); +#endif + } + while ( in[0] != end ); + +#ifdef BLIP_MONO + m1->integrator = sum; +#else + m1->integrator[0] = sum; + m1->integrator[1] = sum2; +#endif + remove_samples( m1, count ); + remove_samples( m2, count ); + remove_samples( m3, count ); + } + + return count; } /* Things that didn't help performance on x86: @@ -348,12 +460,180 @@ possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient. And by having pre_shift 32, a 32-bit platform can easily do the shift by simply ignoring the low half. */ +#ifndef BLIP_MONO + +void blip_add_delta( blip_t* m, unsigned time, int delta_l, int delta_r ) +{ + if (delta_l | delta_r) + { + unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); + int phase = fixed >> phase_shift & (phase_count - 1); + short const* in = bl_step [phase]; + short const* rev = bl_step [phase_count - phase]; + int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1); + int pos = fixed >> frac_bits; + +#ifdef BLIP_INVERT + buf_t* out_l = m->buffer[1] + pos; + buf_t* out_r = m->buffer[0] + pos; +#else + buf_t* out_l = m->buffer[0] + pos; + buf_t* out_r = m->buffer[1] + pos; +#endif + + int delta; + +#ifdef BLIP_ASSERT + /* Fails if buffer size was exceeded */ + assert( pos <= m->size + end_frame_extra ); +#endif + + if (delta_l == delta_r) + { + buf_t out; + delta = (delta_l * interp) >> delta_bits; + delta_l -= delta; + out = in[0]*delta_l + in[half_width+0]*delta; + out_l[0] += out; + out_r[0] += out; + out = in[1]*delta_l + in[half_width+1]*delta; + out_l[1] += out; + out_r[1] += out; + out = in[2]*delta_l + in[half_width+2]*delta; + out_l[2] += out; + out_r[2] += out; + out = in[3]*delta_l + in[half_width+3]*delta; + out_l[3] += out; + out_r[3] += out; + out = in[4]*delta_l + in[half_width+4]*delta; + out_l[4] += out; + out_r[4] += out; + out = in[5]*delta_l + in[half_width+5]*delta; + out_l[5] += out; + out_r[5] += out; + out = in[6]*delta_l + in[half_width+6]*delta; + out_l[6] += out; + out_r[6] += out; + out = in[7]*delta_l + in[half_width+7]*delta; + out_l[7] += out; + out_r[7] += out; + out = rev[7]*delta_l + rev[7-half_width]*delta; + out_l[8] += out; + out_r[8] += out; + out = rev[6]*delta_l + rev[6-half_width]*delta; + out_l[9] += out; + out_r[9] += out; + out = rev[5]*delta_l + rev[5-half_width]*delta; + out_l[10] += out; + out_r[10] += out; + out = rev[4]*delta_l + rev[4-half_width]*delta; + out_l[11] += out; + out_r[11] += out; + out = rev[3]*delta_l + rev[3-half_width]*delta; + out_l[12] += out; + out_r[12] += out; + out = rev[2]*delta_l + rev[2-half_width]*delta; + out_l[13] += out; + out_r[13] += out; + out = rev[1]*delta_l + rev[1-half_width]*delta; + out_l[14] += out; + out_r[14] += out; + out = rev[0]*delta_l + rev[0-half_width]*delta; + out_l[15] += out; + out_r[15] += out; + } + else + { + delta = (delta_l * interp) >> delta_bits; + delta_l -= delta; + out_l [0] += in[0]*delta_l + in[half_width+0]*delta; + out_l [1] += in[1]*delta_l + in[half_width+1]*delta; + out_l [2] += in[2]*delta_l + in[half_width+2]*delta; + out_l [3] += in[3]*delta_l + in[half_width+3]*delta; + out_l [4] += in[4]*delta_l + in[half_width+4]*delta; + out_l [5] += in[5]*delta_l + in[half_width+5]*delta; + out_l [6] += in[6]*delta_l + in[half_width+6]*delta; + out_l [7] += in[7]*delta_l + in[half_width+7]*delta; + out_l [8] += rev[7]*delta_l + rev[7-half_width]*delta; + out_l [9] += rev[6]*delta_l + rev[6-half_width]*delta; + out_l [10] += rev[5]*delta_l + rev[5-half_width]*delta; + out_l [11] += rev[4]*delta_l + rev[4-half_width]*delta; + out_l [12] += rev[3]*delta_l + rev[3-half_width]*delta; + out_l [13] += rev[2]*delta_l + rev[2-half_width]*delta; + out_l [14] += rev[1]*delta_l + rev[1-half_width]*delta; + out_l [15] += rev[0]*delta_l + rev[0-half_width]*delta; + + delta = (delta_r * interp) >> delta_bits; + delta_r -= delta; + out_r [0] += in[0]*delta_r + in[half_width+0]*delta; + out_r [1] += in[1]*delta_r + in[half_width+1]*delta; + out_r [2] += in[2]*delta_r + in[half_width+2]*delta; + out_r [3] += in[3]*delta_r + in[half_width+3]*delta; + out_r [4] += in[4]*delta_r + in[half_width+4]*delta; + out_r [5] += in[5]*delta_r + in[half_width+5]*delta; + out_r [6] += in[6]*delta_r + in[half_width+6]*delta; + out_r [7] += in[7]*delta_r + in[half_width+7]*delta; + out_r [8] += rev[7]*delta_r + rev[7-half_width]*delta; + out_r [9] += rev[6]*delta_r + rev[6-half_width]*delta; + out_r [10] += rev[5]*delta_r + rev[5-half_width]*delta; + out_r [11] += rev[4]*delta_r + rev[4-half_width]*delta; + out_r [12] += rev[3]*delta_r + rev[3-half_width]*delta; + out_r [13] += rev[2]*delta_r + rev[2-half_width]*delta; + out_r [14] += rev[1]*delta_r + rev[1-half_width]*delta; + out_r [15] += rev[0]*delta_r + rev[0-half_width]*delta; + } + } +} + +void blip_add_delta_fast( blip_t* m, unsigned time, int delta_l, int delta_r ) +{ + if (delta_l | delta_r) + { + unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); + int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1); + int pos = fixed >> frac_bits; + +#ifdef STEREO_INVERT + buf_t* out_l = m->buffer[1] + pos; + buf_t* out_r = m->buffer[0] + pos; +#else + buf_t* out_l = m->buffer[0] + pos; + buf_t* out_r = m->buffer[1] + pos; +#endif + + int delta = delta_l * interp; + +#ifdef BLIP_ASSERT + /* Fails if buffer size was exceeded */ + assert( pos <= m->size + end_frame_extra ); +#endif + + if (delta_l == delta_r) + { + delta_l = delta_l * delta_unit - delta; + out_l[7] += delta_l; + out_l[8] += delta; + out_r[7] += delta_l; + out_r[8] += delta; + } + else + { + out_l[7] += delta_l * delta_unit - delta; + out_l[8] += delta; + delta = delta_r * interp; + out_r[7] += delta_r * delta_unit - delta; + out_r[8] += delta; + } + } +} + +#else + void blip_add_delta( blip_t* m, unsigned time, int delta ) { unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); buf_t* out = SAMPLES( m ) + (fixed >> frac_bits); - int const phase_shift = frac_bits - phase_bits; int phase = fixed >> phase_shift & (phase_count - 1); short const* in = bl_step [phase]; short const* rev = bl_step [phase_count - phase]; @@ -403,3 +683,4 @@ void blip_add_delta_fast( blip_t* m, unsigned time, int delta ) out [7] += delta * delta_unit - delta2; out [8] += delta2; } +#endif diff --git a/core/sound/blip_buf.h b/core/sound/blip_buf.h index 21c45d0..81b986b 100644 --- a/core/sound/blip_buf.h +++ b/core/sound/blip_buf.h @@ -28,12 +28,24 @@ blip_max_ratio = 1 << 20 }; /** Clears entire buffer. Afterwards, blip_samples_avail() == 0. */ void blip_clear( blip_t* ); +#ifndef BLIP_MONO + +/** Adds positive/negative deltas into stereo buffers at specified clock time. */ +void blip_add_delta( blip_t*, unsigned time, int delta_l, int delta_r ); + +/** Same as blip_add_delta(), but uses faster, lower-quality synthesis. */ +void blip_add_delta_fast( blip_t*, unsigned int clock_time, int delta_l, int delta_r ); + +#else + /** Adds positive/negative delta into buffer at specified clock time. */ void blip_add_delta( blip_t*, unsigned int clock_time, int delta ); /** Same as blip_add_delta(), but uses faster, lower-quality synthesis. */ void blip_add_delta_fast( blip_t*, unsigned int clock_time, int delta ); +#endif + /** Length of time frame, in clocks, needed to make sample_count additional samples available. */ int blip_clocks_needed( const blip_t*, int sample_count ); @@ -56,9 +68,8 @@ element of 'out', allowing easy interleaving of two buffers into a stereo sample stream. Outputs 16-bit signed samples. Returns number of samples actually read. */ int blip_read_samples( blip_t*, short out [], int count); -/* Same as above function except sample is added to output buffer previous value */ -/* This allows easy mixing of different blip buffers into a single output stream */ -int blip_mix_samples( blip_t* m, short out [], int count); +/* Same as above function except sample is mixed from three blip buffers source */ +int blip_mix_samples( blip_t* m1, blip_t* m2, blip_t* m3, short out [], int count); /** Frees buffer. No effect if NULL is passed. */ void blip_delete( blip_t* ); diff --git a/core/sound/sn76489.c b/core/sound/sn76489.c index 27ea516..da0a1b6 100644 --- a/core/sound/sn76489.c +++ b/core/sound/sn76489.c @@ -167,45 +167,23 @@ int SN76489_GetContextSize(void) /* Updates tone amplitude in delta buffer. Call whenever amplitude might have changed. */ INLINE void UpdateToneAmplitude(int i, int time) { - int delta; - - /* left output */ - delta = (SN76489.Channel[i][0] * SN76489.ToneFreqPos[i]) - SN76489.ChanOut[i][0]; - if (delta != 0) - { - SN76489.ChanOut[i][0] += delta; - blip_add_delta(snd.blips[0][0], time, delta); - } - - /* right output */ - delta = (SN76489.Channel[i][1] * SN76489.ToneFreqPos[i]) - SN76489.ChanOut[i][1]; - if (delta != 0) - { - SN76489.ChanOut[i][1] += delta; - blip_add_delta(snd.blips[0][1], time, delta); - } + /* left & right output */ + int delta_l = (SN76489.Channel[i][0] * SN76489.ToneFreqPos[i]) - SN76489.ChanOut[i][0]; + int delta_r = (SN76489.Channel[i][1] * SN76489.ToneFreqPos[i]) - SN76489.ChanOut[i][1]; + blip_add_delta(snd.blips[0], time, delta_l, delta_r); + SN76489.ChanOut[i][0] += delta_l; + SN76489.ChanOut[i][1] += delta_r; } /* Updates noise amplitude in delta buffer. Call whenever amplitude might have changed. */ INLINE void UpdateNoiseAmplitude(int time) { - int delta; - - /* left output */ - delta = (SN76489.Channel[3][0] * ( SN76489.NoiseShiftRegister & 0x1 )) - SN76489.ChanOut[3][0]; - if (delta != 0) - { - SN76489.ChanOut[3][0] += delta; - blip_add_delta(snd.blips[0][0], time, delta); - } - - /* right output */ - delta = (SN76489.Channel[3][1] * ( SN76489.NoiseShiftRegister & 0x1 )) - SN76489.ChanOut[3][1]; - if (delta != 0) - { - SN76489.ChanOut[3][1] += delta; - blip_add_delta(snd.blips[0][1], time, delta); - } + /* left & right output */ + int delta_l = (SN76489.Channel[3][0] * ( SN76489.NoiseShiftRegister & 0x1 )) - SN76489.ChanOut[3][0]; + int delta_r = (SN76489.Channel[3][1] * ( SN76489.NoiseShiftRegister & 0x1 )) - SN76489.ChanOut[3][1]; + blip_add_delta(snd.blips[0], time, delta_l, delta_r); + SN76489.ChanOut[3][0] += delta_l; + SN76489.ChanOut[3][1] += delta_r; } /* Runs tone channel for clock_length clocks */ diff --git a/core/sound/sound.c b/core/sound/sound.c index 06bea1a..f1862ff 100644 --- a/core/sound/sound.c +++ b/core/sound/sound.c @@ -2,8 +2,8 @@ * Genesis Plus * Sound Hardware * - * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Charles Mac Donald (original code) - * Copyright (C) 2007-2013 Eke-Eke (Genesis Plus GX) + * Copyright (C) 1998-2003 Charles Mac Donald (original code) + * Copyright (C) 2007-2016 Eke-Eke (Genesis Plus GX) * * Redistribution and use of this code or any derivative works are permitted * provided that the following conditions are met: @@ -125,21 +125,21 @@ void sound_reset(void) int sound_update(unsigned int cycles) { - int delta, preamp, time, l, r, *ptr; + int prev_l, prev_r, preamp, time, l, r, *ptr; /* Run PSG & FM chips until end of frame */ SN76489_Update(cycles); fm_update(cycles); - /* FM output pre-amplification */ + /* FM output pre-amplification */ preamp = config.fm_preamp; /* FM frame initial timestamp */ time = fm_cycles_start; /* Restore last FM outputs from previous frame */ - l = fm_last[0]; - r = fm_last[1]; + prev_l = fm_last[0]; + prev_r = fm_last[1]; /* FM buffer start pointer */ ptr = fm_buffer; @@ -150,15 +150,12 @@ int sound_update(unsigned int cycles) /* high-quality Band-Limited synthesis */ do { - /* left channel */ - delta = ((*ptr++ * preamp) / 100) - l; - l += delta; - blip_add_delta(snd.blips[0][0], time, delta); - - /* right channel */ - delta = ((*ptr++ * preamp) / 100) - r; - r += delta; - blip_add_delta(snd.blips[0][1], time, delta); + /* left & right channels */ + l = ((*ptr++ * preamp) / 100); + r = ((*ptr++ * preamp) / 100); + blip_add_delta(snd.blips[0], time, l-prev_l, r-prev_r); + prev_l = l; + prev_r = r; /* increment time counter */ time += fm_cycles_ratio; @@ -170,15 +167,12 @@ int sound_update(unsigned int cycles) /* faster Linear Interpolation */ do { - /* left channel */ - delta = ((*ptr++ * preamp) / 100) - l; - l += delta; - blip_add_delta_fast(snd.blips[0][0], time, delta); - - /* right channel */ - delta = ((*ptr++ * preamp) / 100) - r; - r += delta; - blip_add_delta_fast(snd.blips[0][1], time, delta); + /* left & right channels */ + l = ((*ptr++ * preamp) / 100); + r = ((*ptr++ * preamp) / 100); + blip_add_delta_fast(snd.blips[0], time, l-prev_l, r-prev_r); + prev_l = l; + prev_r = r; /* increment time counter */ time += fm_cycles_ratio; @@ -190,18 +184,17 @@ int sound_update(unsigned int cycles) fm_ptr = fm_buffer; /* save last FM output for next frame */ - fm_last[0] = l; - fm_last[1] = r; + fm_last[0] = prev_l; + fm_last[1] = prev_r; /* adjust FM cycle counters for next frame */ fm_cycles_count = fm_cycles_start = time - cycles; - - /* end of blip buffers time frame */ - blip_end_frame(snd.blips[0][0], cycles); - blip_end_frame(snd.blips[0][1], cycles); + + /* end of blip buffer time frame */ + blip_end_frame(snd.blips[0], cycles); /* return number of available samples */ - return blip_samples_avail(snd.blips[0][0]); + return blip_samples_avail(snd.blips[0]); } int sound_context_save(uint8 *state) diff --git a/core/sound/sound.h b/core/sound/sound.h index a4284a7..ae7cc56 100644 --- a/core/sound/sound.h +++ b/core/sound/sound.h @@ -2,8 +2,8 @@ * Genesis Plus * Sound Hardware * - * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Charles Mac Donald (original code) - * Copyright (C) 2007-2013 Eke-Eke (Genesis Plus GX) + * Copyright (C) 1998-2003 Charles Mac Donald (original code) + * Copyright (C) 2007-2016 Eke-Eke (Genesis Plus GX) * * Redistribution and use of this code or any derivative works are permitted * provided that the following conditions are met: diff --git a/core/system.c b/core/system.c index e6134b2..07d86ed 100644 --- a/core/system.c +++ b/core/system.c @@ -68,11 +68,9 @@ int audio_init(int samplerate, double framerate) memset(&snd, 0, sizeof (snd)); /* Initialize Blip Buffers */ - snd.blips[0][0] = blip_new(samplerate / 10); - snd.blips[0][1] = blip_new(samplerate / 10); - if (!snd.blips[0][0] || !snd.blips[0][1]) + snd.blips[0] = blip_new(samplerate / 10); + if (!snd.blips[0]) { - audio_shutdown(); return -1; } @@ -80,11 +78,9 @@ int audio_init(int samplerate, double framerate) if (system_hw == SYSTEM_MCD) { /* allocate blip buffers */ - snd.blips[1][0] = blip_new(samplerate / 10); - snd.blips[1][1] = blip_new(samplerate / 10); - snd.blips[2][0] = blip_new(samplerate / 10); - snd.blips[2][1] = blip_new(samplerate / 10); - if (!snd.blips[1][0] || !snd.blips[1][1] || !snd.blips[2][0] || !snd.blips[2][1]) + snd.blips[1] = blip_new(samplerate / 10); + snd.blips[2] = blip_new(samplerate / 10); + if (!snd.blips[1] || !snd.blips[2]) { audio_shutdown(); return -1; @@ -132,8 +128,7 @@ void audio_set_rate(int samplerate, double framerate) /* master clock timebase so they remain perfectly synchronized together, while still */ /* being synchronized with 68K and Z80 CPUs as well. Mixed sound chip output is then */ /* resampled to desired rate at the end of each frame, using Blip Buffer. */ - blip_set_rates(snd.blips[0][0], mclk, samplerate); - blip_set_rates(snd.blips[0][1], mclk, samplerate); + blip_set_rates(snd.blips[0], mclk, samplerate); /* Mega CD sound hardware */ if (system_hw == SYSTEM_MCD) @@ -155,17 +150,14 @@ void audio_set_rate(int samplerate, double framerate) void audio_reset(void) { - int i,j; + int i; /* Clear blip buffers */ for (i=0; i<3; i++) { - for (j=0; j<2; j++) + if (snd.blips[i]) { - if (snd.blips[i][j]) - { - blip_clear(snd.blips[i][j]); - } + blip_clear(snd.blips[i]); } } @@ -187,16 +179,13 @@ void audio_set_equalizer(void) void audio_shutdown(void) { - int i,j; + int i; /* Delete blip buffers */ for (i=0; i<3; i++) { - for (j=0; j<2; j++) - { - blip_delete(snd.blips[i][j]); - snd.blips[i][j] = 0; - } + blip_delete(snd.blips[i]); + snd.blips[i] = 0; } } @@ -213,37 +202,24 @@ int audio_update(int16 *buffer) /* read CDDA samples */ cdd_read_audio(size); - } #ifdef ALIGN_SND - /* return an aligned number of samples if required */ - size &= ALIGN_SND; + /* return an aligned number of samples if required */ + size &= ALIGN_SND; #endif - /* resample FM & PSG mixed stream to output buffer */ -#ifdef LSB_FIRST - blip_read_samples(snd.blips[0][0], buffer, size); - blip_read_samples(snd.blips[0][1], buffer + 1, size); -#else - blip_read_samples(snd.blips[0][0], buffer + 1, size); - blip_read_samples(snd.blips[0][1], buffer, size); -#endif - - /* Mega CD specific */ - if (system_hw == SYSTEM_MCD) + /* resample & mix FM/PSG, PCM & CD-DA streams to output buffer */ + blip_mix_samples(snd.blips[0], snd.blips[1], snd.blips[2], buffer, size); + } + else { - /* resample PCM & CD-DA streams to output buffer */ -#ifdef LSB_FIRST - blip_mix_samples(snd.blips[1][0], buffer, size); - blip_mix_samples(snd.blips[1][1], buffer + 1, size); - blip_mix_samples(snd.blips[2][0], buffer, size); - blip_mix_samples(snd.blips[2][1], buffer + 1, size); -#else - blip_mix_samples(snd.blips[1][0], buffer + 1, size); - blip_mix_samples(snd.blips[1][1], buffer, size); - blip_mix_samples(snd.blips[2][0], buffer + 1, size); - blip_mix_samples(snd.blips[2][1], buffer, size); +#ifdef ALIGN_SND + /* return an aligned number of samples if required */ + size &= ALIGN_SND; #endif + + /* resample FM/PSG mixed stream to output buffer */ + blip_read_samples(snd.blips[0], buffer, size); } /* Audio filtering */ diff --git a/core/system.h b/core/system.h index 2a5a6d3..e6fe50d 100644 --- a/core/system.h +++ b/core/system.h @@ -91,7 +91,7 @@ typedef struct int sample_rate; /* Output Sample rate (8000-48000) */ double frame_rate; /* Output Frame rate (usually 50 or 60 frames per second) */ int enabled; /* 1= sound emulation is enabled */ - blip_t* blips[3][2]; /* Blip Buffer resampling */ + blip_t* blips[3]; /* Blip Buffer resampling (stereo) */ } t_snd;