/*
 *  desktop -- The 3dfx Desktop Demo 
 *  COPYRIGHT 3DFX INTERACTIVE, INC. 1999
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "basics.h"
/*#include "tlib.h"*/
#include "clip.h"
/*#include "cpudetect.h"*/
#include "xforms.h"


//#define USE_ASM

// local variables
const float ONE = 1.0f;
static Matrix gMat;
static long gAllOutcodes;;
static unsigned short g_fpu_cw_old, g_fpu_cw_new;
static VertexData *gPrevVert;

// function prototypes
void XformVertex_ASM(VertexData *vert, float *v);
void XformVertex_C(VertexData *vert, float *v);

// function pointers
#ifdef USE_ASM
void (*XformVertex)(VertexData *vert, float *v) = XformVertex_ASM;
#else
void (*XformVertex)(VertexData *vert, float *v) = XformVertex_C;
#endif

// external variables
extern float gMinClipX, gMinClipY, gMinClipZ, gMaxClipX, gMaxClipY, gMaxClipZ;
extern float gMinClipZInv, gMaxClipZInv;
extern int gSignMaskTable[8];


void InitializeXforms(const CPUcaps *cpu_caps)
{
#ifdef USE_ASM
	if (cpu_caps->b_SIMD_support)
	{
		XformVertex = XformVertex_ASM;
	}
	else
	{
		XformVertex = XformVertex_ASM;
	}
#else
	XformVertex = XformVertex_C;
#endif // USE_ASM
}

void SetXformsMatrix(Matrix m)
{
	MatrixCopy(gMat, m);
}

void BeginXforms()
{
	gPrevVert = NULL;
	gAllOutcodes = 0;

	SetFPUprecision(PRECISION_24_BIT);
}

unsigned long EndXforms()
{
	if (gPrevVert)
	{
		gPrevVert->flags &= ~VERTEX_OUTCODE_MASK;
		gPrevVert->flags |= ComputeOutcode(&gPrevVert->vertex);
		gAllOutcodes |= gPrevVert->flags;
	}

	RestoreFPUprecision();

	return gAllOutcodes;
}

#ifdef USE_ASM
// this function will transform the vector v by the matrix m
// it'll then compute 1/w and homogenize (i.e. compute x/w and y/w)
// it'll also calculate the clipping outcodes (in integer while wating for the divide)
// the vert's x, y, z, oow, and flags fields get set accordingly (z must be 1/oow or w, and is used when clipping)
// it assumes that v[3] is 1.0f pre-transform
__declspec(naked) void XformVertex_ASM(VertexData *vert, float *v)
{
	__asm
	{
		mov			edx, [esp + 8] // edx = v
		lea			ecx, [gMat]

		push		edi
		push		esi

		fld			dword ptr [ecx + 48 + 0]
		// m30
		fmul		dword ptr [edx + 0]
		// m30*v0 (2)
		fld			dword ptr [ecx + 48 + 4]
		// m31
		// m30*v0 (1)
		fmul		dword ptr [edx + 4]
		// m31*v1 (2)
		// m30*v0 (0)
		fxch		st(1)
		// m30*v0 (0)
		// m31*v1 (2)
		fadd		dword ptr [ecx + 48 + 12]
		// m30*v0 + m33 (2)
		// m31*v1 (1)
		fld			dword ptr [ecx + 48 + 8]
		// m32
		// m30*v0 + m33 (1)
		// m31*v1 (0)
		fmul		dword ptr [edx + 8]
		// m32*v2 (2)
		// m30*v0 + m33 (0)
		// m31*v1 (0)
		fxch		st(1)
		// m30*v0 + m33 (0)
		// m32*v2 (2)
		// m31*v1 (0)
		faddp		st(2), st
		// m32*v2 (1)
		// m30*v0 + m31*v1 + m33 (2)
		fld			dword ptr [ecx + 0 + 0]
		// m00
		// m32*v2 (0)
		// m30*v0 + m31*v1 + m33 (1)
		fmul		dword ptr [edx + 0]
		// m00*v0 (2)
		// m32*v2 (0)
		// m30*v0 + m31*v1 + m33 (0)
		fxch		st(1)
		// m32*v2 (0)
		// m00*v0 (2)
		// m30*v0 + m31*v1 + m33 (0)
		faddp		st(2), st
		// m00*v0 (1)
		// W = m30*v0 + m31*v1 + m32*v2 + m33 (2)
		fld			dword ptr [ecx + 0 + 4]
		// m01
		// m00*v0 (0)
		// W (1)
		fmul		dword ptr [edx + 4]
		// m01*v1 (2)
		// m00*v0 (0)
		// W (0)
		fxch		st(1)
		// m00*v0 (0)
		// m01*v1 (2)
		// W (0)
		fadd		dword ptr [ecx + 0 + 12]
		// m00*v0 + m03 (2)
		// m01*v1 (1)
		// W (0)
		fld			dword ptr [ecx + 0 + 8]
		// m02
		// m00*v0 + m03 (1)
		// m01*v1 (0)
		// W (0)
		fmul		dword ptr [edx + 8]
		// m02*v2 (2)
		// m00*v0 + m03 (0)
		// m01*v1 (0)
		// W (0)
		fxch		st(1)
		// m00*v0 + m03 (0)
		// m02*v2 (2)
		// m01*v1 (0)
		// W (0)
		faddp		st(2), st
		// m02*v2 (1)
		// m00*v0 + m01*v1 + m03 (2)
		// W (0)
		fld			dword ptr [ecx + 16 + 0]
		// m10
		// m02*v2 (0)
		// m00*v0 + m01*v1 + m03 (1)
		// W (0)
		fmul		dword ptr [edx + 0]
		// m10*v0 (2)
		// m02*v2 (0)
		// m00*v0 + m01*v1 + m03 (0)
		// W (0)
		fxch		st(1)
		// m02*v2 (0)
		// m10*v0 (2)
		// m00*v0 + m01*v1 + m03 (0)
		// W (0)
		faddp		st(2), st
		// m10*v0 (1)
		// X = m00*v0 + m01*v1 + m02*v2 + m03 (2)
		// W (0)
		fld			dword ptr [ecx + 16 + 4]
		// m11
		// m10*v0 (0)
		// X (1)
		// W (0)
		fmul		dword ptr [edx + 4]
		// m11*v1 (2)
		// m10*v0 (0)
		// X (0)
		// W (0)
		fxch		st(1)
		// m10*v0 (0)
		// m11*v1 (2)
		// X (0)
		// W (0)
		fadd		dword ptr [ecx + 16 + 12]
		// m10*v0 + m13 (2)
		// m11*v1 (1)
		// X (0)
		// W (0)
		fld			dword ptr [ecx + 16 + 8]
		// m12
		// m10*v0 + m13 (1)
		// m11*v1 (0)
		// X (0)
		// W (0)
		fmul		dword ptr [edx + 8]
		// m12*v2 (2)
		// m10*v0 + m13 (0)
		// m11*v1 (0)
		// X (0)
		// W (0)
		fxch		st(1)
		// m10*v0 + m13 (0)
		// m12*v2 (2)
		// m11*v1 (0)
		// X (0)
		// W (0)
		faddp		st(2), st
		// m12*v2 (1)
		// m10*v0 + m11*v1 + m13 (2)
		// X (0)
		// W (0)

		push		ebx
		mov			edx, [gPrevVert]

		fld			dword ptr [ONE]
		// 1.0f
		// m12*v2 (0)
		// m10*v0 + m11*v1 + m13 (0)
		// X (0)
		// W (0)
		fxch		st(1)
		// m12*v2 (0)
		// 1.0f
		// m10*v0 + m11*v1 + m13 (0)
		// X (0)
		// W (0)
		faddp		st(2), st
		// 1.0f
		// Y = m10*v0 + m11*v1 + m12*v2 + m13 (2)
		// X (0)
		// W (0)
		fdiv		st, st(3)
		// 1/W (18)
		// Y (1)
		// X (0)
		// W (0)

		// ******** do some integer math why the float divide is computing ********
		// the divide should only take 18 clocks since we should be in 24-bit fpu mode
		test		edx, edx
		jz			SETUP_VERTEX

		xor			eax, eax // eax = outcodes
		xor			ecx, ecx // ecx = sign mask

		mov			ebx, (GrVertex)[edx].z // ebx = z
		mov			esi, [gMaxClipZ]

		mov			edi, [gMinClipZ]
		cmp			esi, ebx // maxz - z

		adc			eax, eax
		cmp			ebx, edi // z - minz

		adc			eax, eax
		add			ebx, ebx // carry if ebx = z < 0

		adc			ecx, ecx
		mov			esi, [gMaxClipY]

		mov			ebx, (GrVertex)[edx].y // ebx = y
		mov			edi, [gMinClipY]

		cmp			esi, ebx // maxy - y
		mov			esi, [gMaxClipX]

		adc			eax, eax
		cmp			ebx, edi // y - miny

		adc			eax, eax
		add			ebx, ebx // carry if ebx = y < 0

		adc			ecx, ecx
		mov			ebx, (GrVertex)[edx].x // ebx = x

		mov			edi, [gMinClipX]
		cmp			esi, ebx // maxx - x

		adc			eax, eax
		cmp			ebx, edi // x - minx

		adc			eax, eax
		add			ebx, ebx // carry if ebx = x < 0

		adc			ecx, ecx
		mov			ebx, (GrVertex)[edx].z // ebx = z

		sar			ebx, 31 // sign mask of z
		mov			ecx, [gSignMaskTable + 4*ecx] // sign mask of zzyyxx

		xor			eax, ecx // outcodes if z > 0
		and			ebx, 0xf

		xor			eax, ebx
		mov			ecx, [gAllOutcodes]

		and			dword ptr (VertexData)[edx].flags, ~VERTEX_OUTCODE_MASK
		or			ecx, eax

		or			dword ptr (VertexData)[edx].flags, eax
		mov			[gAllOutcodes], ecx

SETUP_VERTEX:
		mov			eax, [esp + 16] // eax = vert
		pop			ebx

		// 1/W (0)
		// Y (0)
		// X (0)
		// W (0)
		fmul		st(2), st
		// 1/W (0)
		// Y (0)
		// X/W (2)
		// W (0)

		pop			esi
		mov			[gPrevVert], eax

		fmul		st(1), st
		// 1/W (0)
		// Y/W (2)
		// X/W (0)
		// W (0)

		pop			edi

		fstp		dword ptr (GrVertex)[eax].oow
		fstp		dword ptr (GrVertex)[eax].y
		fstp		dword ptr (GrVertex)[eax].x
		fstp		dword ptr (GrVertex)[eax].z

		ret
	}
}
#endif

void XformVertex_C(VertexData *vert, float *v)
{
	Vector pt;

	pt[W] = gMat[3][0]*v[0] + gMat[3][1]*v[1] + gMat[3][2]*v[2] + gMat[3][3];
	pt[X] = gMat[0][0]*v[0] + gMat[0][1]*v[1] + gMat[0][2]*v[2] + gMat[0][3];
	pt[Y] = gMat[1][0]*v[0] + gMat[1][1]*v[1] + gMat[1][2]*v[2] + gMat[1][3];

	vert->vertex.oow = 1.0f/pt[W];

	if (gPrevVert)
	{
		gPrevVert->flags &= ~VERTEX_OUTCODE_MASK;
		gPrevVert->flags |= ComputeOutcode(&gPrevVert->vertex);
		gAllOutcodes |= gPrevVert->flags;
	}

	pt[X] *= vert->vertex.oow;
	pt[Y] *= vert->vertex.oow;

	vert->vertex.x = pt[X];
	vert->vertex.y = pt[Y];
	vert->vertex.z = pt[W];

	gPrevVert = vert;
}

#include "lighting.h"
void XformXVertex(XVertex *verts, int num_verts)
{
	Vector pt;
	int i;
	float *v;
	VertexData *vert;

	for (i=0; i<num_verts; i++)
	{
		v = &verts[i].point[X];
		vert = &verts[i].xformed_vert;

		pt[W] = gMat[3][0]*v[X] + gMat[3][1]*v[Y] + gMat[3][2]*v[Z] + gMat[3][3];
		pt[X] = gMat[0][0]*v[X] + gMat[0][1]*v[Y] + gMat[0][2]*v[Z] + gMat[0][3];
		pt[Y] = gMat[1][0]*v[X] + gMat[1][1]*v[Y] + gMat[1][2]*v[Z] + gMat[1][3];
		pt[Z] = gMat[2][0]*v[X] + gMat[2][1]*v[Y] + gMat[2][2]*v[Z] + gMat[2][3];

		vert->vertex.oow = 1.f / pt[W];

		if (gPrevVert)
		{
			gPrevVert->flags &= ~VERTEX_OUTCODE_MASK;
			gPrevVert->flags |= ComputeOutcode(&gPrevVert->vertex);
			gAllOutcodes |= gPrevVert->flags;
		}

		pt[X] *= vert->vertex.oow;
		pt[Y] *= vert->vertex.oow;
		//vert->vertex.oow = 1.f / pt[Z];
		vert->vertex.x = pt[X];
		vert->vertex.y = pt[Y];
		vert->vertex.z = pt[W];

		DirLightDiffuseAndSpecular(verts[i].norm, &verts[i].diffuse, &verts[i].specular);

		gPrevVert = vert;
	}
}
