;----------------------------------------------------------------------------- ; Torque Game Engine ; Copyright (C) GarageGames.com, Inc. ;----------------------------------------------------------------------------- segment .data storeebp dd 0 srcPoints dd 0 srcColors dd 0 srcIndices dd 0 numPoints dd 0 two55 dd 0x437F0000 alpha dd 0 %ifdef LINUX ; No underscore needed for ELF object files %define _texGen0 texGen0 %define _texGen1 texGen1 %define _fogCoordinatePointer fogCoordinatePointer %endif extern _texGen0 extern _texGen1 extern _fogCoordinatePointer segment .text ; ; these macros are good for both functions ; %define in_dst [ebp+8] %define in_src_points [ebp+12] %define in_src_indices [ebp+16] %define in_numpoints [ebp+20] %define in_srcColors [ebp+24] ; Valid only for SP ; CodeWarrior sucks :P %ifdef LINUX global processTriFan processTriFan: %else global _processTriFan _processTriFan: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp1: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov edx, [esi + 12] ; f mov [edi + 0], eax ; <- x mov [edi + 4], ebx ; <- y mov [edi + 8], ecx ; <- z mov [edi + 12], edx ; <- f ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 16] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 20] ; tc0.t ; tc1.s fld dword [_texGen1 + 0] ; tg1.s.x fmul dword [esi + 0] fld dword [_texGen1 + 4] ; tg1.s.y fmul dword [esi + 4] fld dword [_texGen1 + 8] ; tg1.s.z fmul dword [esi + 8] fld dword [_texGen1 + 12] ; tg1.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 24] ; tc1.s ; tc1.t fld dword [_texGen1 + 16] ; tg1.t.x fmul dword [esi + 0] fld dword [_texGen1 + 20] ; tg1.t.y fmul dword [esi + 4] fld dword [_texGen1 + 24] ; tg1.t.z fmul dword [esi + 8] fld dword [_texGen1 + 28] ; tg1.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 28] ; tc1.t add edi, 32 inc ebp cmp ebp, [numPoints] jl near procPointLp1 mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp ret ; More suckage %ifdef LINUX global processTriFanSP processTriFanSP: %else global _processTriFanSP _processTriFanSP: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov eax, in_srcColors mov [srcColors], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp2: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov edx, [srcColors] ; color mov [edi + 0], eax ; <- x lea edx, [edx + ebp*4] ; color mov [edi + 4], ebx ; <- y mov edx, [edx] ; color mov [edi + 8], ecx ; <- z mov [edi + 12], edx ; color ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 16] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 20] ; tc0.t add edi, 32 inc ebp cmp ebp, [numPoints] jl near procPointLp2 mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp ret ; More suckage %ifdef LINUX global processTriFanVC_TF processTriFanVC_TF: %else global _processTriFanVC_TF _processTriFanVC_TF: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov eax, in_srcColors mov [srcColors], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp4: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] ; Fog tex coord mov ebx, [_fogCoordinatePointer] shr eax, 1 ; idx /= 2 lea ebx, [ebx + eax] mov ecx, [ebx + 0]; mov edx, [ebx + 4]; mov [edi + 16], ecx mov [edi + 20], edx mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov edx, [srcColors] ; color mov [edi + 0], eax ; <- x lea edx, [edx + ebp*4] ; color mov [edi + 4], ebx ; <- y mov edx, [edx] ; color mov [edi + 8], ecx ; <- z mov [edi + 12], edx ; color ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 24] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 28] ; tc0.t add edi, 32 inc ebp cmp ebp, [numPoints] jl near procPointLp4 mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp pop ebp ret ; More suckagea %ifdef LINUX global processTriFanSP_FC processTriFanSP_FC: %else global _processTriFanSP_FC _processTriFanSP_FC: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov eax, in_srcColors mov [srcColors], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp2_fc: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov edx, [esi + 12] ; fc mov [edi + 0], eax ; <- x mov [edi + 4], ebx ; <- y mov [edi + 8], ecx ; <- z mov [edi + 24], edx ; <- fc (lmcoord.x) mov edx, [srcColors] ; color lea edx, [edx + ebp*4] ; color mov edx, [edx] ; color mov [edi + 12], edx ; color ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 16] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 20] ; tc0.t add edi, 32 inc ebp cmp ebp, [numPoints] jl near procPointLp2_fc mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp ret ; CodeWarrior still sucks :P %ifdef LINUX global processTriFanFC_VB processTriFanFC_VB: %else global _processTriFanFC_VB _processTriFanFC_VB: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp1_fc_vb: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov edx, 0xFFFFFFFF ; c mov [edi + 0], eax ; <- x mov [edi + 4], ebx ; <- y mov [edi + 8], ecx ; <- z mov [edi + 12], edx ; <- c fld dword [esi + 12] fld dword [two55] fmulp st1, st0 fistp dword [alpha] mov eax, 255 sub eax, [alpha] cmp eax, 0 jge near procPointLp1a_fc_vb mov eax, 0 procPointLp1a_fc_vb: shl eax, 24 mov [edi + 16], eax ; <- f ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 28] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 32] ; tc0.t ; tc1.s fld dword [_texGen1 + 0] ; tg1.s.x fmul dword [esi + 0] fld dword [_texGen1 + 4] ; tg1.s.y fmul dword [esi + 4] fld dword [_texGen1 + 8] ; tg1.s.z fmul dword [esi + 8] fld dword [_texGen1 + 12] ; tg1.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 20] ; tc1.s ; tc1.t fld dword [_texGen1 + 16] ; tg1.t.x fmul dword [esi + 0] fld dword [_texGen1 + 20] ; tg1.t.y fmul dword [esi + 4] fld dword [_texGen1 + 24] ; tg1.t.z fmul dword [esi + 8] fld dword [_texGen1 + 28] ; tg1.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 24] ; tc1.t add edi, 36 inc ebp cmp ebp, [numPoints] jl near procPointLp1_fc_vb mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp ret ; More suckagea %ifdef LINUX global processTriFanSP_FC_VB processTriFanSP_FC_VB: %else global _processTriFanSP_FC_VB _processTriFanSP_FC_VB: %endif ; prologue push ebp mov ebp, esp push eax push ebx push ecx push edi push esi ; Store the destination and source pointers mov eax, in_src_points mov [srcPoints], eax mov eax, in_src_indices mov [srcIndices], eax mov eax, in_numpoints mov [numPoints], eax mov eax, in_srcColors mov [srcColors], eax mov edi, in_dst mov [storeebp], ebp xor ebp, ebp procPointLp2_fc_vb: ; This could be faster mov esi, [srcIndices] lea esi, [esi + ebp*4] mov eax, dword [esi] shl eax, 4 ; idx *= 16 mov esi, [srcPoints] lea esi, [esi + eax] mov eax, [esi + 0] ; x mov ebx, [esi + 4] ; y mov ecx, [esi + 8] ; z mov [edi + 0], eax ; <- x mov [edi + 4], ebx ; <- y mov [edi + 8], ecx ; <- z fld dword [esi + 12] fld dword [two55] fmulp st1, st0 fistp dword [alpha] mov eax, 255 sub eax, [alpha] cmp eax, 0 jge near procPointLp2a_fc_vb mov eax, 0 procPointLp2a_fc_vb: shl eax, 24 mov [edi + 16], eax ; <- fc mov edx, [srcColors] ; color lea edx, [edx + ebp*4] ; color mov edx, [edx] ; color mov eax, edx mov ebx, 0x00FF00FF and edx, ebx not ebx rol edx, 16 and eax, ebx or edx, eax mov [edi + 12], edx ; color ; tc0.s fld dword [_texGen0 + 0] ; tg0.s.x fmul dword [esi + 0] fld dword [_texGen0 + 4] ; tg0.s.y fmul dword [esi + 4] fld dword [_texGen0 + 8] ; tg0.s.z fmul dword [esi + 8] fld dword [_texGen0 + 12] ; tg0.s.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 20] ; tc0.s ; tc0.t fld dword [_texGen0 + 16] ; tg0.t.x fmul dword [esi + 0] fld dword [_texGen0 + 20] ; tg0.t.y fmul dword [esi + 4] fld dword [_texGen0 + 24] ; tg0.t.z fmul dword [esi + 8] fld dword [_texGen0 + 28] ; tg0.t.w faddp st3, st0 faddp st1, st0 faddp st1, st0 fstp dword [edi + 24] ; tc0.t add edi, 28 inc ebp cmp ebp, [numPoints] jl near procPointLp2_fc_vb mov ebp, [storeebp] ; epilogue pop esi pop edi pop ecx pop ebx pop eax pop ebp ret