mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-11 08:39:13 +01:00
1bcad428ea
This makes the OS X build more robust and should help pave the way for the integration of the video plugins as well as LTO. There are now no more global class level namespace conflicts left, as evidenced by the fact that Dolphin can be linked with -all_load, not that you would want to. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6958 8ced0084-cf51-0410-be5f-012b33b47a6e
474 lines
13 KiB
C++
474 lines
13 KiB
C++
// Copyright (C) 2003-2009 Dolphin Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official SVN repository and contact information can be found at
|
|
// http://code.google.com/p/dolphin-emu/
|
|
|
|
#include "Common.h"
|
|
|
|
#include "Rasterizer.h"
|
|
#include "HwRasterizer.h"
|
|
#include "EfbInterface.h"
|
|
#include "BPMemLoader.h"
|
|
#include "XFMemLoader.h"
|
|
#include "Tev.h"
|
|
#include "Statistics.h"
|
|
#include "VideoConfig.h"
|
|
|
|
|
|
#define BLOCK_SIZE 2
|
|
|
|
#define CLAMP(x, a, b) (x>b)?b:(x<a)?a:x
|
|
|
|
// returns approximation of log2(f) in s28.4
|
|
// results are close enough to use for LOD
|
|
static inline s32 FixedLog2(float f)
|
|
{
|
|
u32 *x = (u32*)&f;
|
|
s32 logInt = ((*x & 0x7F800000) >> 19) - 2032; // integer part
|
|
s32 logFract = (*x & 0x007fffff) >> 19; // approximate fractional part
|
|
|
|
return logInt + logFract;
|
|
}
|
|
|
|
namespace Rasterizer
|
|
{
|
|
Slope ZSlope;
|
|
Slope WSlope;
|
|
Slope ColorSlopes[2][4];
|
|
Slope TexSlopes[8][3];
|
|
|
|
s32 vertex0X;
|
|
s32 vertex0Y;
|
|
float vertexOffsetX;
|
|
float vertexOffsetY;
|
|
|
|
s32 scissorLeft = 0;
|
|
s32 scissorTop = 0;
|
|
s32 scissorRight = 0;
|
|
s32 scissorBottom = 0;
|
|
|
|
Tev tev;
|
|
RasterBlock rasterBlock;
|
|
|
|
void Init()
|
|
{
|
|
tev.Init();
|
|
}
|
|
|
|
inline int iround(float x)
|
|
{
|
|
int t;
|
|
|
|
#if defined(_WIN32) && !defined(_M_X64)
|
|
__asm
|
|
{
|
|
fld x
|
|
fistp t
|
|
}
|
|
#else
|
|
t = (int)x;
|
|
if((x - t) >= 0.5)
|
|
return t + 1;
|
|
#endif
|
|
|
|
return t;
|
|
}
|
|
|
|
void SetScissor()
|
|
{
|
|
int xoff = bpmem.scissorOffset.x * 2 - 342;
|
|
int yoff = bpmem.scissorOffset.y * 2 - 342;
|
|
|
|
scissorLeft = bpmem.scissorTL.x - xoff - 342;
|
|
if (scissorLeft < 0) scissorLeft = 0;
|
|
|
|
scissorTop = bpmem.scissorTL.y - yoff - 342;
|
|
if (scissorTop < 0) scissorTop = 0;
|
|
|
|
scissorRight = bpmem.scissorBR.x - xoff - 341;
|
|
if (scissorRight > EFB_WIDTH) scissorRight = EFB_WIDTH;
|
|
|
|
scissorBottom = bpmem.scissorBR.y - yoff - 341;
|
|
if (scissorBottom > EFB_HEIGHT) scissorBottom = EFB_HEIGHT;
|
|
}
|
|
|
|
void SetTevReg(int reg, int comp, bool konst, s16 color)
|
|
{
|
|
tev.SetRegColor(reg, comp, konst, color);
|
|
}
|
|
|
|
inline void Draw(s32 x, s32 y, s32 xi, s32 yi)
|
|
{
|
|
INCSTAT(stats.thisFrame.rasterizedPixels);
|
|
|
|
float dx = vertexOffsetX + (float)(x - vertex0X);
|
|
float dy = vertexOffsetY + (float)(y - vertex0Y);
|
|
|
|
float zFloat = 1.0f + ZSlope.GetValue(dx, dy);
|
|
if (zFloat < 0.0f || zFloat > 1.0f)
|
|
return;
|
|
|
|
s32 z = (s32)(zFloat * 0x00ffffff);
|
|
|
|
if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable)
|
|
{
|
|
// early z
|
|
if (!EfbInterface::ZCompare(x, y, z))
|
|
return;
|
|
}
|
|
|
|
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
|
|
|
|
tev.Position[0] = x;
|
|
tev.Position[1] = y;
|
|
tev.Position[2] = z;
|
|
|
|
// colors
|
|
for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
|
|
{
|
|
for(int comp = 0; comp < 4; comp++)
|
|
tev.Color[i][comp] = (u8)ColorSlopes[i][comp].GetValue(dx, dy);
|
|
}
|
|
|
|
// tex coords
|
|
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
|
|
{
|
|
// multiply by 128 because TEV stores stores UVs as s17.7
|
|
tev.Uv[i].s = (s32)(pixel.Uv[i][0] * 128);
|
|
tev.Uv[i].t = (s32)(pixel.Uv[i][1] * 128);
|
|
}
|
|
|
|
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
|
|
{
|
|
tev.IndirectLod[i] = rasterBlock.IndirectLod[i];
|
|
tev.IndirectLinear[i] = rasterBlock.IndirectLinear[i];
|
|
}
|
|
|
|
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
|
|
{
|
|
tev.TextureLod[i] = rasterBlock.TextureLod[i];
|
|
tev.TextureLinear[i] = rasterBlock.TextureLinear[i];
|
|
}
|
|
|
|
tev.Draw();
|
|
}
|
|
|
|
void InitTriangle(float X1, float Y1, s32 xi, s32 yi)
|
|
{
|
|
vertex0X = xi;
|
|
vertex0Y = yi;
|
|
|
|
// adjust a little less than 0.5
|
|
const float adjust = 0.495f;
|
|
|
|
vertexOffsetX = ((float)xi - X1) + adjust;
|
|
vertexOffsetY = ((float)yi - Y1) + adjust;
|
|
}
|
|
|
|
void InitSlope(Slope *slope, float f1, float f2, float f3, float DX31, float DX12, float DY12, float DY31)
|
|
{
|
|
float DF31 = f3 - f1;
|
|
float DF21 = f2 - f1;
|
|
float a = DF31 * -DY12 - DF21 * DY31;
|
|
float b = DX31 * DF21 + DX12 * DF31;
|
|
float c = -DX12 * DY31 - DX31 * -DY12;
|
|
slope->dfdx = -a / c;
|
|
slope->dfdy = -b / c;
|
|
slope->f0 = f1;
|
|
}
|
|
|
|
inline void CalculateLOD(s32 &lod, bool &linear, u32 texmap, u32 texcoord)
|
|
{
|
|
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
|
|
u8 subTexmap = texmap & 3;
|
|
|
|
// LOD calculation requires data from the texture mode for bias, etc.
|
|
// it does not seem to use the actual texture size
|
|
TexMode0& tm0 = texUnit.texMode0[subTexmap];
|
|
TexMode1& tm1 = texUnit.texMode1[subTexmap];
|
|
|
|
float sDelta, tDelta;
|
|
if (tm0.diag_lod)
|
|
{
|
|
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
|
|
float *uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];
|
|
|
|
sDelta = fabsf(uv0[0] - uv1[0]);
|
|
tDelta = fabsf(uv0[1] - uv1[1]);
|
|
}
|
|
else
|
|
{
|
|
float *uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
|
|
float *uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
|
|
float *uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];
|
|
|
|
sDelta = max(fabsf(uv0[0] - uv1[0]), fabsf(uv0[0] - uv2[0]));
|
|
tDelta = max(fabsf(uv0[1] - uv1[1]), fabsf(uv0[1] - uv2[1]));
|
|
}
|
|
|
|
// get LOD in s28.4
|
|
lod = FixedLog2(max(sDelta, tDelta));
|
|
|
|
// bias is s2.5
|
|
int bias = tm0.lod_bias;
|
|
bias >>= 1;
|
|
lod += bias;
|
|
|
|
linear = ((lod > 0 && (tm0.min_filter & 4)) || (lod <= 0 && tm0.mag_filter));
|
|
|
|
// order of checks matters
|
|
// should be:
|
|
// if lod > max then max
|
|
// else if lod < min then min
|
|
lod = CLAMP(lod, (s32)tm1.min_lod, (s32)tm1.max_lod);
|
|
}
|
|
|
|
void BuildBlock(s32 blockX, s32 blockY)
|
|
{
|
|
for (s32 yi = 0; yi < BLOCK_SIZE; yi++)
|
|
{
|
|
for (s32 xi = 0; xi < BLOCK_SIZE; xi++)
|
|
{
|
|
RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi];
|
|
|
|
float dx = vertexOffsetX + (float)(xi + blockX - vertex0X);
|
|
float dy = vertexOffsetY + (float)(yi + blockY - vertex0Y);
|
|
|
|
float invW = 1.0f / WSlope.GetValue(dx, dy);
|
|
pixel.InvW = invW;
|
|
|
|
// tex coords
|
|
for (unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
|
|
{
|
|
float projection;
|
|
if (xfregs.texMtxInfo[i].projection)
|
|
{
|
|
float q = TexSlopes[i][2].GetValue(dx, dy) * invW;
|
|
projection = invW / q;
|
|
}
|
|
else
|
|
projection = invW;
|
|
|
|
pixel.Uv[i][0] = TexSlopes[i][0].GetValue(dx, dy) * projection;
|
|
pixel.Uv[i][1] = TexSlopes[i][1].GetValue(dx, dy) * projection;
|
|
}
|
|
}
|
|
}
|
|
|
|
u32 indref = bpmem.tevindref.hex;
|
|
for (unsigned int i = 0; i < bpmem.genMode.numindstages; i++)
|
|
{
|
|
u32 texmap = indref & 3;
|
|
indref >>= 3;
|
|
u32 texcoord = indref & 3;
|
|
indref >>= 3;
|
|
|
|
CalculateLOD(rasterBlock.IndirectLod[i], rasterBlock.IndirectLinear[i], texmap, texcoord);
|
|
}
|
|
|
|
for (unsigned int i = 0; i <= bpmem.genMode.numtevstages; i++)
|
|
{
|
|
int stageOdd = i&1;
|
|
TwoTevStageOrders &order = bpmem.tevorders[i >> 1];
|
|
if(order.getEnable(stageOdd))
|
|
{
|
|
u32 texmap = order.getTexMap(stageOdd);
|
|
u32 texcoord = order.getTexCoord(stageOdd);
|
|
|
|
CalculateLOD(rasterBlock.TextureLod[i], rasterBlock.TextureLinear[i], texmap, texcoord);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DrawTriangleFrontFace(OutputVertexData *v0, OutputVertexData *v1, OutputVertexData *v2)
|
|
{
|
|
INCSTAT(stats.thisFrame.numTrianglesDrawn);
|
|
|
|
if (g_SWVideoConfig.bHwRasterizer)
|
|
{
|
|
HwRasterizer::DrawTriangleFrontFace(v0, v1, v2);
|
|
return;
|
|
}
|
|
|
|
// adapted from http://www.devmaster.net/forums/showthread.php?t=1884
|
|
|
|
// 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output
|
|
// could also take floor and adjust -8
|
|
const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9;
|
|
const s32 Y2 = iround(16.0f * v1->screenPosition[1]) - 9;
|
|
const s32 Y3 = iround(16.0f * v2->screenPosition[1]) - 9;
|
|
|
|
const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9;
|
|
const s32 X2 = iround(16.0f * v1->screenPosition[0]) - 9;
|
|
const s32 X3 = iround(16.0f * v2->screenPosition[0]) - 9;
|
|
|
|
// Deltas
|
|
const s32 DX12 = X1 - X2;
|
|
const s32 DX23 = X2 - X3;
|
|
const s32 DX31 = X3 - X1;
|
|
|
|
const s32 DY12 = Y1 - Y2;
|
|
const s32 DY23 = Y2 - Y3;
|
|
const s32 DY31 = Y3 - Y1;
|
|
|
|
// Fixed-pos32 deltas
|
|
const s32 FDX12 = DX12 << 4;
|
|
const s32 FDX23 = DX23 << 4;
|
|
const s32 FDX31 = DX31 << 4;
|
|
|
|
const s32 FDY12 = DY12 << 4;
|
|
const s32 FDY23 = DY23 << 4;
|
|
const s32 FDY31 = DY31 << 4;
|
|
|
|
// Bounding rectangle
|
|
s32 minx = (min(min(X1, X2), X3) + 0xF) >> 4;
|
|
s32 maxx = (max(max(X1, X2), X3) + 0xF) >> 4;
|
|
s32 miny = (min(min(Y1, Y2), Y3) + 0xF) >> 4;
|
|
s32 maxy = (max(max(Y1, Y2), Y3) + 0xF) >> 4;
|
|
|
|
// scissor
|
|
minx = max(minx, scissorLeft);
|
|
maxx = min(maxx, scissorRight);
|
|
miny = max(miny, scissorTop);
|
|
maxy = min(maxy, scissorBottom);
|
|
|
|
if (minx >= maxx || miny >= maxy)
|
|
return;
|
|
|
|
// Setup slopes
|
|
float fltx1 = v0->screenPosition.x;
|
|
float flty1 = v0->screenPosition.y;
|
|
float fltdx31 = v2->screenPosition.x - fltx1;
|
|
float fltdx12 = fltx1 - v1->screenPosition.x;
|
|
float fltdy12 = flty1 - v1->screenPosition.y;
|
|
float fltdy31 = v2->screenPosition.y - flty1;
|
|
|
|
InitTriangle(fltx1, flty1, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4);
|
|
|
|
float w[3] = { 1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w };
|
|
InitSlope(&WSlope, w[0], w[1], w[2], fltdx31, fltdx12, fltdy12, fltdy31);
|
|
|
|
InitSlope(&ZSlope, v0->screenPosition[2], v1->screenPosition[2], v2->screenPosition[2], fltdx31, fltdx12, fltdy12, fltdy31);
|
|
|
|
for(unsigned int i = 0; i < bpmem.genMode.numcolchans; i++)
|
|
{
|
|
for(int comp = 0; comp < 4; comp++)
|
|
InitSlope(&ColorSlopes[i][comp], v0->color[i][comp], v1->color[i][comp], v2->color[i][comp], fltdx31, fltdx12, fltdy12, fltdy31);
|
|
}
|
|
|
|
for(unsigned int i = 0; i < bpmem.genMode.numtexgens; i++)
|
|
{
|
|
for(int comp = 0; comp < 3; comp++)
|
|
InitSlope(&TexSlopes[i][comp], v0->texCoords[i][comp] * w[0], v1->texCoords[i][comp] * w[1], v2->texCoords[i][comp] * w[2], fltdx31, fltdx12, fltdy12, fltdy31);
|
|
}
|
|
|
|
// Start in corner of 8x8 block
|
|
minx &= ~(BLOCK_SIZE - 1);
|
|
miny &= ~(BLOCK_SIZE - 1);
|
|
|
|
// Half-edge constants
|
|
s32 C1 = DY12 * X1 - DX12 * Y1;
|
|
s32 C2 = DY23 * X2 - DX23 * Y2;
|
|
s32 C3 = DY31 * X3 - DX31 * Y3;
|
|
|
|
// Correct for fill convention
|
|
if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
|
|
if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
|
|
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
|
|
|
|
// Loop through blocks
|
|
for(s32 y = miny; y < maxy; y += BLOCK_SIZE)
|
|
{
|
|
for(s32 x = minx; x < maxx; x += BLOCK_SIZE)
|
|
{
|
|
// Corners of block
|
|
s32 x0 = x << 4;
|
|
s32 x1 = (x + BLOCK_SIZE - 1) << 4;
|
|
s32 y0 = y << 4;
|
|
s32 y1 = (y + BLOCK_SIZE - 1) << 4;
|
|
|
|
// Evaluate half-space functions
|
|
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
|
|
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
|
|
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
|
|
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
|
|
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
|
|
|
|
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
|
|
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
|
|
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
|
|
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
|
|
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
|
|
|
|
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
|
|
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
|
|
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
|
|
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
|
|
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
|
|
|
|
// Skip block when outside an edge
|
|
if(a == 0x0 || b == 0x0 || c == 0x0) continue;
|
|
|
|
BuildBlock(x, y);
|
|
|
|
// Accept whole block when totally covered
|
|
if(a == 0xF && b == 0xF && c == 0xF)
|
|
{
|
|
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
|
|
{
|
|
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
|
|
{
|
|
Draw(x + ix, y + iy, ix, iy);
|
|
}
|
|
}
|
|
}
|
|
else // Partially covered block
|
|
{
|
|
s32 CY1 = C1 + DX12 * y0 - DY12 * x0;
|
|
s32 CY2 = C2 + DX23 * y0 - DY23 * x0;
|
|
s32 CY3 = C3 + DX31 * y0 - DY31 * x0;
|
|
|
|
for(s32 iy = 0; iy < BLOCK_SIZE; iy++)
|
|
{
|
|
s32 CX1 = CY1;
|
|
s32 CX2 = CY2;
|
|
s32 CX3 = CY3;
|
|
|
|
for(s32 ix = 0; ix < BLOCK_SIZE; ix++)
|
|
{
|
|
if(CX1 > 0 && CX2 > 0 && CX3 > 0)
|
|
{
|
|
Draw(x + ix, y + iy, ix, iy);
|
|
}
|
|
|
|
CX1 -= FDY12;
|
|
CX2 -= FDY23;
|
|
CX3 -= FDY31;
|
|
}
|
|
|
|
CY1 += FDX12;
|
|
CY2 += FDX23;
|
|
CY3 += FDX31;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|