optimized some float/int conversions, added -fomit-frame-pointer flag

This commit is contained in:
ekeeke31 2009-01-28 16:43:15 +00:00
parent b9cab7c7c4
commit 43fd560e55
10 changed files with 101 additions and 117 deletions

View File

@ -26,7 +26,7 @@ INCLUDES := source source/m68k source/z80 source/sound source/sound/SRC source/n
# options for code generation
#---------------------------------------------------------------------------------
CFLAGS = -O3 -Wall $(MACHDEP) $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_DOL
CFLAGS = -O3 -fomit-frame-pointer -Wall $(MACHDEP) $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_DOL
CXXFLAGS = $(CFLAGS)
LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map

View File

@ -26,7 +26,7 @@ INCLUDES := source source/m68k source/z80 source/sound source/sound/SRC source/n
# options for code generation
#---------------------------------------------------------------------------------
CFLAGS = -O3 -mrvl -Wall $(MACHDEP) -Wno-strict-aliasing $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_RVL
CFLAGS = -O3 -fomit-frame-pointer -mrvl -Wall $(MACHDEP) -Wno-strict-aliasing $(INCLUDE) -DWORDS_BIGENDIAN -DNGC="1" -DHW_RVL
CXXFLAGS = $(CFLAGS)
LDFLAGS = $(MACHDEP) -Wl,-Map,$(notdir $@).map

View File

@ -31,8 +31,8 @@
typedef struct
{
char version[15];
double psg_preamp;
double fm_preamp;
int32 psg_preamp;
int32 fm_preamp;
uint8 boost;
uint8 filter;
uint8 hq_fm;

View File

@ -1804,8 +1804,16 @@ void YM2612UpdateOne(int **buffer, int length)
Limit(rt,MAXOUT,MINOUT);
/* buffering */
bufL[i] = lt;
bufR[i] = rt;
if (src_buffer)
{
src_buffer[i*2] = (float) lt / (8.0 * 0x10000000);
src_buffer[i*2 + 1] = (float) rt / (8.0 * 0x10000000);
}
else
{
bufL[i] = lt;
bufR[i] = rt;
}
/* timer A control */
INTERNAL_TIMER_A();

View File

@ -48,7 +48,7 @@ static SN76489_Context SN76489[MAX_SN76489];
void SN76489_Init(int which, int PSGClockValue, int SamplingRate)
{
SN76489_Context *p = &SN76489[which];
p->dClock=(float)PSGClockValue/16/SamplingRate;
p->dClock=(float)(PSGClockValue)/16.0/(float)SamplingRate;
SN76489_Config(which, MUTE_ALLON, VOL_FULL, FB_SEGAVDP, SRW_SEGAVDP, 1);
SN76489_Reset(which);
}

View File

@ -22,7 +22,6 @@
****************************************************************************************/
#include "shared.h"
#include "samplerate.h"
#define CLOCK_NTSC 53693175
#define CLOCK_PAL 53203424
@ -34,13 +33,10 @@ void (*_YM2612_Update)(int **buf, int length);
int (*_YM2612_Reset)(void);
/* cycle-accurate samples */
static double m68cycles_per_sample[2];
static double z80cycles_per_sample[2];
static int m68cycles_per_sample[2];
/* libsamplerate buffers (max. is 488 cycles per line x 313 lines / 144) */
static SRC_DATA src_data;
static float src_in[1061*2];
static int src_buffer[2][1061];
/* pointer to current SRC buffer */
float *src_buffer;
/* YM2612 register arrays */
int fm_reg[2][0x100];
@ -48,14 +44,14 @@ int fm_reg[2][0x100];
/* return the number of samples that should have been rendered so far */
static inline uint32 fm_sample_cnt(uint8 is_z80)
{
if (is_z80) return (uint32) ((double)(count_z80 + current_z80 - z80_ICount) / z80cycles_per_sample[0]);
else return (uint32) ((double) count_m68k / m68cycles_per_sample[0]);
if (is_z80) return ((count_z80 + current_z80 - z80_ICount) * 15) / (7 * m68cycles_per_sample[0]);
else return count_m68k / m68cycles_per_sample[0];
}
static inline uint32 psg_sample_cnt(uint8 is_z80)
{
if (is_z80) return (uint32) ((double)(count_z80 + current_z80 - z80_ICount) / z80cycles_per_sample[1]);
else return (uint32) ((double) count_m68k / m68cycles_per_sample[1]);
if (is_z80) return ((count_z80 + current_z80 - z80_ICount) * 15) / (7 * m68cycles_per_sample[1]);
else return count_m68k / m68cycles_per_sample[1];
}
/* update FM samples */
@ -64,16 +60,12 @@ static inline void fm_update()
if(snd.fm.curStage - snd.fm.lastStage > 0)
{
int *tempBuffer[2];
if (config.hq_fm && !config.fm_core)
tempBuffer[0] = snd.fm.buffer[0] + snd.fm.lastStage;
tempBuffer[1] = snd.fm.buffer[1] + snd.fm.lastStage;
if (src_buffer)
{
tempBuffer[0] = src_buffer[0] + snd.fm.lastStage;
tempBuffer[1] = src_buffer[1] + snd.fm.lastStage;
}
else
{
tempBuffer[0] = snd.fm.buffer[0] + snd.fm.lastStage;
tempBuffer[1] = snd.fm.buffer[1] + snd.fm.lastStage;
src_buffer = src_data.data_in + (snd.fm.lastStage * 2);
}
_YM2612_Update(tempBuffer, snd.fm.curStage - snd.fm.lastStage);
@ -97,28 +89,20 @@ void sound_init(int rate)
double vclk = (vdp_pal ? (double)CLOCK_PAL : (double)CLOCK_NTSC) / 7.0; /* 68000 and YM2612 clock */
double zclk = (vdp_pal ? (double)CLOCK_PAL : (double)CLOCK_NTSC) / 15.0; /* Z80 and SN76489 clock */
/* cycle-accurate FM samples */
if (config.hq_fm && !config.fm_core)
{
m68cycles_per_sample[0] = 144.0;
z80cycles_per_sample[0] = (144.0 * 7.0) / 15.0;
/* cycle-accurate samples */
m68cycles_per_sample[0] = (m68cycles_per_line * lines_per_frame * vdp_rate) / rate;
m68cycles_per_sample[1] = (m68cycles_per_line * lines_per_frame * vdp_rate) / rate;
/* initialize samplerate converter data */
src_data.data_in = src_in;
src_data.data_out = snd.fm.src_out;
src_data.input_frames = (int)(((double)m68cycles_per_line * (double)lines_per_frame / 144.0) + 0.5);
src_data.output_frames = rate / vdp_rate;
src_data.src_ratio = (double)src_data.output_frames / (double)src_data.input_frames;
}
else
/* YM2612 is emulated at the original frequency */
src_buffer = 0;
if (src_data.data_in)
{
m68cycles_per_sample[0] = ((double)m68cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate);
z80cycles_per_sample[0] = ((double)z80cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate);
}
/* YM2612 original frequency is VCLK/144 */
m68cycles_per_sample[0] = 144;
/* cycle-accurate PSG samples */
m68cycles_per_sample[1] = ((double)m68cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate);
z80cycles_per_sample[1] = ((double)z80cycles_per_line * (double)lines_per_frame) / (double) (rate / vdp_rate);
/* Initialize SRC buffer */
src_buffer = src_data.data_in;
}
/* initialize sound chips */
SN76489_Init(0, (int)zclk, rate);
@ -145,7 +129,7 @@ void sound_init(int rate)
void sound_update(void)
{
/* finalize sound buffers */
snd.fm.curStage = (config.hq_fm && !config.fm_core) ? src_data.input_frames : snd.buffer_size;
snd.fm.curStage = (src_data.data_in) ? src_data.input_frames : snd.buffer_size;
snd.psg.curStage = snd.buffer_size;
/* update last samples (if needed) */
@ -153,45 +137,34 @@ void sound_update(void)
psg_update();
/* Resampling */
if (config.hq_fm && !config.fm_core)
if (src_data.data_in)
{
double scaled_value ;
int len = src_data.input_frames;
/* this is basically libsamplerate "src_int_to_float_array" function, adapted to interlace samples */
while (len)
{
len -- ;
src_in[len*2] = (float) (src_buffer[0] [len] / (8.0 * 0x10000000));
src_in[len*2 + 1] = (float) (src_buffer[1] [len] / (8.0 * 0x10000000));
}
/* samplerate conversion */
src_simple (&src_data, (config.hq_fm&1) ? SRC_LINEAR : SRC_SINC_FASTEST, 2);
/* this is basically libsamplerate "src_float_to_int_array" function, adapted to interlace samples */
len = snd.buffer_size;
double scaled_value;
int len = snd.buffer_size;
while (len)
{
len -- ;
scaled_value = snd.fm.src_out[len*2] * (8.0 * 0x10000000);
scaled_value = src_data.data_out[len*2] * (8.0 * 0x10000000);
if (scaled_value >= (1.0 * 0x7FFFFFFF))
snd.fm.buffer[0][len] = 0x7fffffff;
else if (scaled_value <= (-8.0 * 0x10000000))
snd.fm.buffer[0][len] = -1 - 0x7fffffff;
else
snd.fm.buffer[0][len] = (long)scaled_value;
snd.fm.buffer[0][len] = lrint(scaled_value);
scaled_value = snd.fm.src_out[len*2+1] * (8.0 * 0x10000000);
scaled_value = src_data.data_out[len*2+1] * (8.0 * 0x10000000);
if (scaled_value >= (1.0 * 0x7FFFFFFF))
snd.fm.buffer[1][len] = 0x7fffffff;
else if (scaled_value <= (-8.0 * 0x10000000))
snd.fm.buffer[1][len] = -1 - 0x7fffffff;
else
snd.fm.buffer[1][len] = (long)scaled_value;
}
snd.fm.buffer[1][len] = lrint(scaled_value);
}
}
/* reset samples count */
snd.fm.curStage = 0;

View File

@ -28,6 +28,7 @@
extern int fm_reg[2][0x100];
extern double fm_timera_tab[0x400];
extern double fm_timerb_tab[0x100];
extern float *src_buffer;
/* Function prototypes */
extern void sound_init(int rate);

View File

@ -35,6 +35,7 @@ uint32 count_z80;
uint32 line_z80;
int32 current_z80;
uint8 system_hw;
SRC_DATA src_data;
static inline void audio_update (void);
@ -65,9 +66,13 @@ void system_reset (void)
SN76489_Reset(0);
/* Sound Buffers */
memset (snd.psg.buffer, 0, SND_SIZE);
memset (snd.fm.buffer[0], 0, SND_SIZE*2);
memset (snd.fm.buffer[1], 0, SND_SIZE*2);
if (snd.psg.buffer) memset(snd.psg.buffer, 0, SND_SIZE);
if (snd.fm.buffer[0]) memset(snd.fm.buffer[0], 0, SND_SIZE*2);
if (snd.fm.buffer[1]) memset(snd.fm.buffer[1], 0, SND_SIZE*2);
/* SRC */
if (src_data.data_in) memset(src_data.data_in, 0, src_data.input_frames * 2 * sizeof(float));
if (src_data.data_out) memset(src_data.data_out,0, src_data.output_frames * 2 * sizeof(float));
}
/****************************************************************
@ -275,28 +280,30 @@ int audio_init (int rate)
snd.buffer[0] = (int16 *) malloc(SND_SIZE);
snd.buffer[1] = (int16 *) malloc(SND_SIZE);
if (!snd.buffer[0] || !snd.buffer[1]) return (-1);
memset (snd.buffer[0], 0, SND_SIZE);
memset (snd.buffer[1], 0, SND_SIZE);
#endif
/* YM2612 stream buffers */
snd.fm.buffer[0] = (int *)malloc (SND_SIZE*2);
snd.fm.buffer[1] = (int *)malloc (SND_SIZE*2);
if (!snd.fm.buffer[0] || !snd.fm.buffer[1]) return (-1);
memset (snd.fm.buffer[0], 0, SND_SIZE*2);
memset (snd.fm.buffer[1], 0, SND_SIZE*2);
/* SRC buffers */
/* YM2612 resampling */
src_data.data_in = NULL;
src_data.data_out = NULL;
if (config.hq_fm && !config.fm_core)
{
snd.fm.src_out = (float *) malloc(snd.buffer_size*2*sizeof(float));
if (!snd.fm.src_out) return (-1);
/* initialize SRC */
src_data.input_frames = (int)(((double)m68cycles_per_line * (double)lines_per_frame / 144.0) + 0.5);
src_data.output_frames = snd.buffer_size;
src_data.data_in = (float *)malloc(src_data.input_frames * 2 * sizeof(float));
src_data.data_out = (float *)malloc(src_data.output_frames * 2 * sizeof(float));
src_data.src_ratio = (double)src_data.output_frames / (double)src_data.input_frames;
if (!src_data.data_in || !src_data.data_out) return (-1);
}
/* SN76489 stream buffers */
snd.psg.buffer = (int16 *)malloc (SND_SIZE);
if (!snd.psg.buffer) return (-1);
memset (snd.psg.buffer, 0, SND_SIZE);
/* Set audio enable flag */
snd.enabled = 1;
@ -310,12 +317,13 @@ int audio_init (int rate)
void audio_shutdown(void)
{
/* free sound buffers */
if (snd.buffer[0]) free(snd.buffer[0]);
if (snd.buffer[1]) free(snd.buffer[1]);
if (snd.fm.buffer[0]) free(snd.fm.buffer[0]);
if (snd.fm.buffer[1]) free(snd.fm.buffer[1]);
if (snd.fm.src_out) free(snd.fm.src_out);
if (snd.psg.buffer) free(snd.psg.buffer);
if (snd.buffer[0]) free(snd.buffer[0]);
if (snd.buffer[1]) free(snd.buffer[1]);
if (snd.fm.buffer[0]) free(snd.fm.buffer[0]);
if (snd.fm.buffer[1]) free(snd.fm.buffer[1]);
if (snd.psg.buffer) free(snd.psg.buffer);
if (src_data.data_in) free(src_data.data_in);
if (src_data.data_out) free(src_data.data_out);
}
static int ll, rr;
@ -324,8 +332,8 @@ static inline void audio_update (void)
{
int i;
int l, r;
double psg_preamp = config.psg_preamp;
double fm_preamp = config.fm_preamp;
int psg_preamp = config.psg_preamp;
int fm_preamp = config.fm_preamp;
int boost = config.boost;
int filter = config.filter;
@ -339,9 +347,9 @@ static inline void audio_update (void)
/* mix samples */
for (i = 0; i < snd.buffer_size; i ++)
{
l = r = (int) ((double)snd.psg.buffer[i] * psg_preamp);
l += (int) ((double)snd.fm.buffer[0][i] * fm_preamp);
r += (int) ((double)snd.fm.buffer[1][i] * fm_preamp);
l = r = (snd.psg.buffer[i] * psg_preamp) / 100;
l += ((snd.fm.buffer[0][i] * fm_preamp) / 100);
r += ((snd.fm.buffer[1][i] * fm_preamp) / 100);
snd.fm.buffer[0][i] = 0;
snd.fm.buffer[1][i] = 0;
snd.psg.buffer[i] = 0;

View File

@ -24,6 +24,8 @@
#ifndef _SYSTEM_H_
#define _SYSTEM_H_
#include "samplerate.h"
#define SYSTEM_GENESIS 0
#define SYSTEM_MEGADRIVE 1
#define SYSTEM_PICO 2
@ -66,7 +68,6 @@ typedef struct
int curStage;
int lastStage;
int *buffer[2];
float *src_out; /* SRC conversion buffer */
} fm;
struct
{
@ -86,6 +87,7 @@ extern uint32 count_z80;
extern uint32 line_z80;
extern int32 current_z80;
extern uint8 system_hw;
extern SRC_DATA src_data;
/* Function prototypes */
extern void system_init (void);

View File

@ -91,9 +91,6 @@ static uint16 sat_base_mask; /* Base bits of SAT */
static uint16 sat_addr_mask; /* Index bits of SAT */
static uint32 dma_endCycles; /* 68k cycles to DMA end */
static uint8 dma_type; /* Type of DMA */
static double vdp_timings[4][4]; /* DMA timings */
static inline void vdp_reg_w(unsigned int r, unsigned int d);
/* DMA Timings
@ -122,31 +119,22 @@ static inline void vdp_reg_w(unsigned int r, unsigned int d);
CRAM or VSRAM for a 68K > VDP transfer, in which case it is in words.
*/
static const uint8 dma_rates[16] = {
8, 9, 83 , 102, /* 68K to VRAM */
16, 18, 167, 205, /* 68K to CRAM or VSRAM */
15, 17, 166, 204, /* DMA fill */
8, 9, 83 , 102, /* DMA Copy */
static const uint32 dma_rates[16] = {
8, 83, 9, 102, /* 68K to VRAM (1 word = 2 bytes) */
16, 167, 18, 205, /* 68K to CRAM or VSRAM */
15, 166, 17, 204, /* DMA fill */
8, 83, 9, 102, /* DMA Copy */
};
/* Function prototypes */
static inline void data_write(unsigned int data);
static inline void vdp_reg_w(unsigned int r, unsigned int d);
/*--------------------------------------------------------------------------*/
/* Init, reset, shutdown functions */
/*--------------------------------------------------------------------------*/
void vdp_init(void)
{
/* reinitialize DMA timings table */
int i;
for (i=0; i<4; i++)
{
vdp_timings[0][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i]);
vdp_timings[1][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 4]);
vdp_timings[2][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 8]);
vdp_timings[3][i] = ((double)m68cycles_per_line) / ((double) dma_rates[i + 12]);
}
/* PAL/NTSC timings */
vdp_rate = vdp_pal ? 50 : 60;
lines_per_frame = vdp_pal ? 313 : 262;
@ -280,24 +268,27 @@ void vdp_restore(uint8 *vdp_regs)
/* Update DMA timings (this is call on start of DMA and then at the start of each scanline) */
void dma_update()
{
int32 left_cycles;
uint32 dma_cycles, dma_bytes;
uint8 index = 0;
int dma_cycles = 0;
/* get the appropriate tranfer rate (bytes/line) for this DMA operation */
if ((status&8) || !(reg[1] & 0x40)) index = 2; /* VBLANK or Display OFF */
index += (reg[12] & 1); /* 32 or 40 Horizontal Cells */
/* DMA timings table index */
int index = (4 * dma_type) + ((reg[12] & 1)*2);
if ((status&8) || !(reg[1] & 0x40)) index++;
/* calculate transfer quantity for the remaining 68k cycles */
left_cycles = line_m68k + m68cycles_per_line - count_m68k;
/* DMA transfer rate */
int rate = dma_rates[index];
/* 68k cycles left */
int left_cycles = (line_m68k + m68cycles_per_line) - count_m68k;
if (left_cycles < 0) left_cycles = 0;
dma_bytes = (uint32)(((double)left_cycles / vdp_timings[dma_type][index]) + 0.5);
/* DMA bytes left */
int dma_bytes = (left_cycles * rate) / m68cycles_per_line;
/* determinate DMA length in CPU cycles */
if (dma_length < dma_bytes)
{
/* DMA will be finished during this line */
dma_cycles = (uint32)(((double)dma_length * vdp_timings[dma_type][index]) + 0.5);
dma_cycles = (dma_length * m68cycles_per_line) / rate;
dma_length = 0;
}
else
@ -307,6 +298,7 @@ void dma_update()
dma_length -= dma_bytes;
}
/* update 68k cycles counter */
if (dma_type < 2)
{
/* 68K COPY to V-RAM */