1134 lines
32 KiB
C++
1134 lines
32 KiB
C++
// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
|
|
// Copyright (C) 2010 Winch Gate Property Limited
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as
|
|
// published by the Free Software Foundation, either version 3 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#include "std3d.h"
|
|
|
|
#include "nel/misc/common.h"
|
|
#include "nel/3d/lod_character_manager.h"
|
|
#include "nel/3d/lod_character_shape.h"
|
|
#include "nel/3d/lod_character_shape_bank.h"
|
|
#include "nel/3d/lod_character_instance.h"
|
|
#include "nel/misc/hierarchical_timer.h"
|
|
#include "nel/misc/fast_floor.h"
|
|
#include "nel/3d/lod_character_texture.h"
|
|
#include "nel/3d/ray_mesh.h"
|
|
#include "nel/misc/file.h"
|
|
#include "nel/misc/algo.h"
|
|
#include "nel/misc/fast_mem.h"
|
|
#include "nel/misc/system_info.h"
|
|
|
|
|
|
using namespace std;
|
|
using namespace NLMISC;
|
|
|
|
namespace NL3D
|
|
{
|
|
|
|
|
|
// ***************************************************************************
|
|
// Dest is without Normal because precomputed
|
|
#define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
|
|
#define NL3D_CLOD_VERTEX_SIZE 24
|
|
#define NL3D_CLOD_UV_OFF 12
|
|
#define NL3D_CLOD_COLOR_OFF 20
|
|
|
|
// size (in block) of the big texture.
|
|
#define NL3D_CLOD_TEXT_NLOD_WIDTH 16
|
|
#define NL3D_CLOD_TEXT_NLOD_HEIGHT 16
|
|
#define NL3D_CLOD_TEXT_NUM_IDS NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
|
|
#define NL3D_CLOD_BIGTEXT_WIDTH NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
|
|
#define NL3D_CLOD_BIGTEXT_HEIGHT NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
|
|
|
|
// Default texture color. Alpha must be 255
|
|
#define NL3D_CLOD_DEFAULT_TEXCOLOR CRGBA(255,255,255,255)
|
|
|
|
|
|
// ***************************************************************************
|
|
CLodCharacterManager::CLodCharacterManager()
|
|
{
|
|
_MaxNumVertices= 3000;
|
|
_NumVBHard= 8;
|
|
_Rendering= false;
|
|
_LockDone= false;
|
|
|
|
// setup the texture.
|
|
_BigTexture= new CTextureBlank;
|
|
// The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
|
|
// NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
|
|
_BigTexture->setReleasable(false);
|
|
// create the bitmap.
|
|
_BigTexture->resize(NL3D_CLOD_BIGTEXT_WIDTH, NL3D_CLOD_BIGTEXT_HEIGHT, CBitmap::RGBA);
|
|
// Format of texture, 16 bits and no mipmaps.
|
|
_BigTexture->setUploadFormat(ITexture::RGB565);
|
|
_BigTexture->setFilterMode(ITexture::Linear, ITexture::LinearMipMapOff);
|
|
_BigTexture->setWrapS(ITexture::Clamp);
|
|
_BigTexture->setWrapT(ITexture::Clamp);
|
|
|
|
// Alloc free Ids
|
|
_FreeIds.resize(NL3D_CLOD_TEXT_NUM_IDS);
|
|
for(uint i=0;i<_FreeIds.size();i++)
|
|
{
|
|
_FreeIds[i]= i;
|
|
}
|
|
|
|
// setup the material
|
|
_Material.initUnlit();
|
|
_Material.setAlphaTest(true);
|
|
_Material.setDoubleSided(true);
|
|
_Material.setTexture(0, _BigTexture);
|
|
|
|
// setup for lighting, Default for Ryzom setup
|
|
_LightCorrectionMatrix.rotateZ((float)Pi/2);
|
|
_LightCorrectionMatrix.invert();
|
|
NL_SET_IB_NAME(_Triangles, "CLodCharacterManager::_Triangles");
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
CLodCharacterManager::~CLodCharacterManager()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::reset()
|
|
{
|
|
nlassert(!isRendering());
|
|
|
|
// delete shapeBanks.
|
|
for(uint i=0;i<_ShapeBankArray.size();i++)
|
|
{
|
|
if(_ShapeBankArray[i])
|
|
delete _ShapeBankArray[i];
|
|
}
|
|
|
|
// clears containers
|
|
contReset(_ShapeBankArray);
|
|
contReset(_ShapeMap);
|
|
|
|
// reset render part.
|
|
_VertexStream.release();
|
|
}
|
|
|
|
// ***************************************************************************
|
|
uint32 CLodCharacterManager::createShapeBank()
|
|
{
|
|
// search a free entry
|
|
for(uint i=0;i<_ShapeBankArray.size();i++)
|
|
{
|
|
// if ree, use it.
|
|
if(_ShapeBankArray[i]==NULL)
|
|
{
|
|
_ShapeBankArray[i]= new CLodCharacterShapeBank;
|
|
return i;
|
|
}
|
|
}
|
|
|
|
// no free entrey, resize array.
|
|
_ShapeBankArray.push_back(new CLodCharacterShapeBank);
|
|
return (uint32)_ShapeBankArray.size()-1;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
const CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId) const
|
|
{
|
|
if(bankId>=_ShapeBankArray.size())
|
|
return NULL;
|
|
else
|
|
return _ShapeBankArray[bankId];
|
|
}
|
|
|
|
// ***************************************************************************
|
|
CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId)
|
|
{
|
|
if(bankId>=_ShapeBankArray.size())
|
|
return NULL;
|
|
else
|
|
return _ShapeBankArray[bankId];
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::deleteShapeBank(uint32 bankId)
|
|
{
|
|
if(bankId>=_ShapeBankArray.size())
|
|
{
|
|
if(_ShapeBankArray[bankId])
|
|
{
|
|
delete _ShapeBankArray[bankId];
|
|
_ShapeBankArray[bankId]= NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ***************************************************************************
|
|
sint32 CLodCharacterManager::getShapeIdByName(const std::string &name) const
|
|
{
|
|
CstItStrIdMap it= _ShapeMap.find(name);
|
|
if(it==_ShapeMap.end())
|
|
return -1;
|
|
else
|
|
return it->second;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
const CLodCharacterShape *CLodCharacterManager::getShape(uint32 shapeId) const
|
|
{
|
|
// split the id
|
|
uint bankId= shapeId >> 16;
|
|
uint shapeInBankId= shapeId &0xFFFF;
|
|
|
|
// if valid bankId
|
|
const CLodCharacterShapeBank *shapeBank= getShapeBank(bankId);
|
|
if(shapeBank)
|
|
{
|
|
// return the shape from the bank
|
|
return shapeBank->getShape(shapeInBankId);
|
|
}
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
bool CLodCharacterManager::compile()
|
|
{
|
|
bool error= false;
|
|
|
|
// clear the map
|
|
contReset(_ShapeMap);
|
|
|
|
// build the map
|
|
for(uint i=0; i<_ShapeBankArray.size(); i++)
|
|
{
|
|
if(_ShapeBankArray[i])
|
|
{
|
|
// Parse all Shapes
|
|
for(uint j=0; j<_ShapeBankArray[i]->getNumShapes(); j++)
|
|
{
|
|
// build the shape Id
|
|
uint shapeId= (i<<16) + j;
|
|
|
|
// get the shape
|
|
const CLodCharacterShape *shape= _ShapeBankArray[i]->getShape(j);
|
|
if(shape)
|
|
{
|
|
const string &name= shape->getName();
|
|
ItStrIdMap it= _ShapeMap.find(name);
|
|
if(it == _ShapeMap.end())
|
|
// insert the id in the map
|
|
_ShapeMap.insert(make_pair(name, shapeId));
|
|
else
|
|
{
|
|
error= true;
|
|
nlwarning("Found a Character Lod with same name in the manager: %s", name.c_str());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// ***************************************************************************
|
|
// Render
|
|
// ***************************************************************************
|
|
// ***************************************************************************
|
|
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::setMaxVertex(uint32 maxVertex)
|
|
{
|
|
// we must not be between beginRender() and endRender()
|
|
nlassert(!isRendering());
|
|
_MaxNumVertices= maxVertex;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::setVertexStreamNumVBHard(uint32 numVBHard)
|
|
{
|
|
// we must not be between beginRender() and endRender()
|
|
nlassert(!isRendering());
|
|
_NumVBHard= numVBHard;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::beginRender(IDriver *driver, const CVector &managerPos)
|
|
{
|
|
H_AUTO( NL3D_CharacterLod_beginRender );
|
|
|
|
// we must not be between beginRender() and endRender()
|
|
nlassert(!isRendering());
|
|
|
|
// Reset render
|
|
//=================
|
|
_CurrentVertexId=0;
|
|
_CurrentTriId= 0;
|
|
|
|
// update Driver.
|
|
//=================
|
|
nlassert(driver);
|
|
|
|
// test change of vertexStream setup
|
|
bool mustChangeVertexStream= _VertexStream.getDriver() != driver;
|
|
if(!mustChangeVertexStream)
|
|
{
|
|
mustChangeVertexStream= _MaxNumVertices != _VertexStream.getMaxVertices();
|
|
mustChangeVertexStream= mustChangeVertexStream || _NumVBHard != _VertexStream.getNumVB();
|
|
}
|
|
// re-init?
|
|
if( mustChangeVertexStream )
|
|
{
|
|
// chech offset
|
|
CVertexBuffer vb;
|
|
vb.setVertexFormat(NL3D_CLOD_VERTEX_FORMAT);
|
|
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
|
|
nlassert( NL3D_CLOD_UV_OFF == vb.getTexCoordOff());
|
|
nlassert( NL3D_CLOD_COLOR_OFF == vb.getColorOff());
|
|
|
|
// Setup the vertex stream
|
|
_VertexStream.release();
|
|
_VertexStream.init(driver, NL3D_CLOD_VERTEX_FORMAT, _MaxNumVertices, _NumVBHard, "CLodManagerVB", false); // nb : don't use volatile lock as we keep the buffer locked
|
|
}
|
|
|
|
// prepare for render.
|
|
//=================
|
|
|
|
// Do not Lock Buffer now (will be done at the first instance added)
|
|
nlassert(!_LockDone);
|
|
_VertexSize= _VertexStream.getVertexSize();
|
|
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
|
|
nlassert( _VertexSize == NL3D_CLOD_VERTEX_SIZE ); // Vector + Normal + UV + RGBA
|
|
|
|
|
|
// Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
|
|
if(_Triangles.getNumIndexes()<_MaxNumVertices * 2)
|
|
{
|
|
_Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
|
|
_Triangles.setNumIndexes(_MaxNumVertices * 2);
|
|
}
|
|
|
|
// Local manager matrix
|
|
_ManagerMatrixPos= managerPos;
|
|
|
|
// Ok, start rendering
|
|
_Rendering= true;
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
static inline void computeLodLighting(CRGBA &lightRes, const CVector &lightObjectSpace, const CVector &normalPtr, CRGBA ambient, CRGBA diffuse)
|
|
{
|
|
float f= lightObjectSpace * normalPtr;
|
|
sint f8= NLMISC::OptFastFloor(f);
|
|
fastClamp8(f8);
|
|
sint r,g,b;
|
|
r= (diffuse.R * f8)>>8;
|
|
g= (diffuse.G * f8)>>8;
|
|
b= (diffuse.B * f8)>>8;
|
|
r+= ambient.R;
|
|
g+= ambient.G;
|
|
b+= ambient.B;
|
|
fastClamp8(r);
|
|
fastClamp8(g);
|
|
fastClamp8(b);
|
|
lightRes.R= r;
|
|
lightRes.G= g;
|
|
lightRes.B= b;
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instance, const CMatrix &worldMatrix,
|
|
CRGBA paramAmbient, CRGBA paramDiffuse, const CVector &lightDir)
|
|
{
|
|
H_AUTO ( NL3D_CharacterLod_AddRenderKey )
|
|
|
|
nlassert(_VertexStream.getDriver());
|
|
// we must be between beginRender() and endRender()
|
|
nlassert(isRendering());
|
|
|
|
|
|
// regroup all variables that will be accessed in the ASM loop (minimize cache problems)
|
|
uint numVertices;
|
|
const CLodCharacterShape::CVector3s *vertPtr;
|
|
const CVector *normalPtr;
|
|
const CUV *uvPtr;
|
|
const uint8 *alphaPtr;
|
|
CVector lightObjectSpace;
|
|
CVector matPos;
|
|
float a00, a01, a02;
|
|
float a10, a11, a12;
|
|
float a20, a21, a22;
|
|
uint64 blank= 0;
|
|
CRGBA ambient= paramAmbient;
|
|
CRGBA diffuse= paramDiffuse;
|
|
// For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
|
|
ambient.A= 0;
|
|
diffuse.A= 0;
|
|
|
|
|
|
// Get the Shape and current key.
|
|
//=============
|
|
|
|
// get the shape
|
|
const CLodCharacterShape *clod= getShape(instance.ShapeId);
|
|
// if not found quit, return true
|
|
if(!clod)
|
|
return true;
|
|
|
|
// get UV/Normal array. NULL => error
|
|
normalPtr= clod->getNormals();
|
|
// get UV of the instance
|
|
uvPtr= instance.getUVs();
|
|
// uvPtr is NULL means that initInstance() has not been called!!
|
|
nlassert(normalPtr && uvPtr);
|
|
|
|
// get the anim key
|
|
CVector unPackScaleFactor;
|
|
vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
|
|
// if not found quit, return true
|
|
if(!vertPtr)
|
|
return true;
|
|
// get num verts
|
|
numVertices= clod->getNumVertices();
|
|
|
|
// empty shape??
|
|
if(numVertices==0)
|
|
return true;
|
|
|
|
// If too many vertices, quit, returning false.
|
|
if(_CurrentVertexId+numVertices > _MaxNumVertices)
|
|
return false;
|
|
|
|
// get alpha array
|
|
static vector<uint8> defaultAlphaArray;
|
|
// get the instance alpha if correctly setuped
|
|
if(instance.VertexAlphas.size() == numVertices)
|
|
{
|
|
alphaPtr= &instance.VertexAlphas[0];
|
|
}
|
|
// if error, take 255 as alpha.
|
|
else
|
|
{
|
|
// NB: still use an array. This case should never arise, but support it not at full optim.
|
|
if(defaultAlphaArray.size()<numVertices)
|
|
defaultAlphaArray.resize(numVertices, 255);
|
|
alphaPtr= &defaultAlphaArray[0];
|
|
}
|
|
|
|
// Lock Buffer if not done
|
|
//=============
|
|
|
|
// Do this after code above because we are sure that we will fill something (numVertices>0)
|
|
if(!_LockDone)
|
|
{
|
|
_VertexData= _VertexStream.lock();
|
|
_LockDone= true;
|
|
}
|
|
|
|
// After lock, For D3D, the VertexColor may be in BGRA format
|
|
if(_VertexStream.isBRGA())
|
|
{
|
|
// then swap only the B and R (no cpu cycle added per vertex)
|
|
ambient.swapBR();
|
|
diffuse.swapBR();
|
|
}
|
|
|
|
|
|
// Prepare Transform
|
|
//=============
|
|
|
|
// HTimerInfo: all this block takes 0.1%
|
|
|
|
// Get matrix pos.
|
|
matPos= worldMatrix.getPos();
|
|
// compute in manager space.
|
|
matPos -= _ManagerMatrixPos;
|
|
// Get rotation line vectors
|
|
const float *worldM= worldMatrix.get();
|
|
a00= worldM[0]; a01= worldM[4]; a02= worldM[8];
|
|
a10= worldM[1]; a11= worldM[5]; a12= worldM[9];
|
|
a20= worldM[2]; a21= worldM[6]; a22= worldM[10];
|
|
|
|
// get the light in object space.
|
|
// Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
|
|
lightObjectSpace.x= a00 * lightDir.x + a10 * lightDir.y + a20 * lightDir.z;
|
|
lightObjectSpace.y= a01 * lightDir.x + a11 * lightDir.y + a21 * lightDir.z;
|
|
lightObjectSpace.z= a02 * lightDir.x + a12 * lightDir.y + a22 * lightDir.z;
|
|
// animation User correction
|
|
lightObjectSpace= _LightCorrectionMatrix.mulVector(lightObjectSpace);
|
|
// normalize, and neg for Dot Product.
|
|
lightObjectSpace.normalize();
|
|
lightObjectSpace= -lightObjectSpace;
|
|
// preMul by 255 for RGBA uint8
|
|
lightObjectSpace*= 255;
|
|
|
|
// multiply matrix with scale factor for Pos.
|
|
a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
|
|
a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
|
|
a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
|
|
|
|
// get dst Array.
|
|
uint8 *dstPtr;
|
|
dstPtr= _VertexData + _CurrentVertexId * _VertexSize;
|
|
|
|
|
|
/* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
|
|
maybe because the cache miss improve //ism a bit below.
|
|
*/
|
|
|
|
// Fill the VB
|
|
//=============
|
|
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
|
|
// optimized version
|
|
if(CSystemInfo::hasMMX())
|
|
{
|
|
H_AUTO( NL3D_CharacterLod_vertexFill );
|
|
|
|
if(numVertices)
|
|
{
|
|
sint f8;
|
|
|
|
/* NB: order is important for AGP filling optimisation in dstPtr
|
|
|
|
Pentium2+ optimisation notes:
|
|
|
|
- "uop" comment formating:
|
|
A/B means "A micro-ops in port 0, and B micro-ops in port 2". (port 1 is very rare for FPU)
|
|
A/B/C/D means "A micro-ops in port 0, B in port 2, C in port 3 and D in port 4".
|
|
The number in () is the delay (if any).
|
|
- the "compute lighting part" must done first, because of the "fistp f8" mem writes that must
|
|
be place far away from the "mov eax, f8" read in clamp lighting part
|
|
(else seems that it crashes all the //ism)
|
|
- No need to Interleave on Pentium2+. But prevents "write/read stall" by putting the write
|
|
far away from the next read. Else stall of 3 cycles + BIG BREAK OF //ism (I think).
|
|
This had save me 120 cycles / 240 !!!
|
|
|
|
BenchResults:
|
|
- The "transform vertex part" and "all next part" cost 42 cycles, but is somewhat optimal:
|
|
63 uop (=> min 21 cycles), but 36 uop in the P0 port (=> this is the bottleneck)
|
|
- The lighting part adds 1 cycle only ????? (44 cycles) But still relevant and optimal:
|
|
43 uop in port P0!!!!
|
|
- The UV part adds 4 cycles (47) (should not since 0 in Port P0), still acceptable.
|
|
- The clamp part adds 3 cycles (50), and add 11 cycles in "P0 or P1" (but heavy dependency)
|
|
If we assume all goes into P1, it should takes 0... still acceptable (optimal==43?)
|
|
- The alpha part adds 2 cycles (52, optimal=45). OK.
|
|
- The modulate part adds 15 cycles. OK
|
|
|
|
TOTAL: 67 cycles in theory (write in RAM, no cache miss problem)
|
|
BENCH: ASM version: 91 cycles (Write in AGP, some cache miss problems, still good against 67)
|
|
C version: 316 cycles.
|
|
*/
|
|
__asm
|
|
{
|
|
mov edi, dstPtr
|
|
theLoop:
|
|
// **** compute lighting
|
|
mov esi,normalPtr // uop: 0/1
|
|
// dot3
|
|
fld dword ptr [esi] // uop: 0/1
|
|
fmul lightObjectSpace.x // uop: 1/1 (5)
|
|
fld dword ptr [esi+4] // uop: 0/1
|
|
fmul lightObjectSpace.y // uop: 1/1 (5)
|
|
faddp st(1),st // uop: 1/0 (3)
|
|
fld dword ptr [esi+8] // uop: 0/1
|
|
fmul lightObjectSpace.z // uop: 1/1 (5)
|
|
faddp st(1),st // uop: 1/0 (3)
|
|
fistp f8 // uop: 2/0/1/1 (5)
|
|
// next
|
|
add esi, 12 // uop: 1/0
|
|
mov normalPtr, esi // uop: 0/0/1/1
|
|
|
|
|
|
// **** transform vertex, and store
|
|
mov esi, vertPtr // uop: 0/1
|
|
fild word ptr[esi] // uop: 3/1 (5)
|
|
fild word ptr[esi+2] // uop: 3/1 (5)
|
|
fild word ptr[esi+4] // uop: 3/1 (5)
|
|
// x
|
|
fld a00 // uop: 0/1
|
|
fmul st, st(3) // uop: 1/0 (5)
|
|
fld a01 // uop: 0/1
|
|
fmul st, st(3) // uop: 1/0 (5)
|
|
faddp st(1), st // uop: 1/0 (3)
|
|
fld a02 // uop: 0/1
|
|
fmul st, st(2) // uop: 1/0 (5)
|
|
faddp st(1), st // uop: 1/0 (3)
|
|
fld matPos.x // uop: 0/1
|
|
faddp st(1), st // uop: 1/0 (3)
|
|
fstp dword ptr[edi] // uop: 0/0/1/1
|
|
// y
|
|
fld a10
|
|
fmul st, st(3)
|
|
fld a11
|
|
fmul st, st(3)
|
|
faddp st(1), st
|
|
fld a12
|
|
fmul st, st(2)
|
|
faddp st(1), st
|
|
fld matPos.y
|
|
faddp st(1), st
|
|
fstp dword ptr[edi+4]
|
|
// z
|
|
fld a20
|
|
fmul st, st(3)
|
|
fld a21
|
|
fmul st, st(3)
|
|
faddp st(1), st
|
|
fld a22
|
|
fmul st, st(2)
|
|
faddp st(1), st
|
|
fld matPos.z
|
|
faddp st(1), st
|
|
fstp dword ptr[edi+8]
|
|
// flush stack
|
|
fstp st // uop: 1/0
|
|
fstp st // uop: 1/0
|
|
fstp st // uop: 1/0
|
|
// next
|
|
add esi, 6 // uop: 1/0
|
|
mov vertPtr, esi // uop: 0/0/1/1
|
|
|
|
|
|
// **** copy uv
|
|
mov esi, uvPtr // uop: 0/1
|
|
mov eax, [esi] // uop: 0/1
|
|
mov [edi+NL3D_CLOD_UV_OFF], eax // uop: 0/0/1/1
|
|
mov ebx, [esi+4] // uop: 0/1
|
|
mov [edi+NL3D_CLOD_UV_OFF+4], ebx // uop: 0/0/1/1
|
|
// next
|
|
add esi, 8 // uop: 1/0
|
|
mov uvPtr, esi // uop: 0/0/1/1
|
|
|
|
|
|
// **** Clamp lighting
|
|
// clamp to 0 only. will be clamped to 255 by MMX
|
|
mov eax, f8 // uop: 0/1
|
|
cmp eax, 0x80000000 // if>=0 => CF=1
|
|
sbb ebx, ebx // if>=0 => CF==1 => ebx=0xFFFFFFFF
|
|
and eax, ebx // if>=0 => eax unchanged, else eax=0 (clamped)
|
|
|
|
|
|
// **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
|
|
movd mm0, eax // 0000000L uop: 1/0
|
|
packuswb mm0, mm0 // 000L000L uop: 1/0 (p1)
|
|
packuswb mm0, mm0 // 0L0L0L0L uop: 1/0 (p1)
|
|
movd mm1, diffuse // uop: 0/1
|
|
punpcklbw mm1, blank // uop: 1/1 (p1)
|
|
pmullw mm0, mm1 // diffuse*L uop: 1/0 (3)
|
|
psrlw mm0, 8 // 0A0B0G0R uop: 1/0 (p1)
|
|
packuswb mm0, blank // 0000ABGR uop: 1/1 (p1)
|
|
movd mm2, ambient // uop: 0/1
|
|
paddusb mm0, mm2 // uop: 1/0
|
|
movd ebx, mm0 // ebx= AABBGGRR uop: 1/0
|
|
// NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
|
|
emms // uop: 11/0 (6). (?????)
|
|
|
|
|
|
// **** append alpha, and store
|
|
mov esi, alphaPtr // uop: 0/1
|
|
movzx eax, byte ptr[esi] // uop: 0/1
|
|
shl eax, 24 // uop: 1/0
|
|
add ebx, eax // uop: 1/0
|
|
// now, ebx= AABBGGRR
|
|
mov [edi+NL3D_CLOD_COLOR_OFF], ebx // uop: 0/0/1/1
|
|
// next
|
|
add esi, 1 // uop: 1/0
|
|
mov alphaPtr, esi // uop: 0/0/1/1
|
|
|
|
|
|
// **** next
|
|
add edi, NL3D_CLOD_VERTEX_SIZE // uop: 1/0
|
|
|
|
mov eax, numVertices // uop: 0/1
|
|
dec eax // uop: 1/0
|
|
mov numVertices, eax // uop: 0/0/1/1
|
|
|
|
jnz theLoop // uop: 1/1 (p1)
|
|
|
|
// To have same behavior than c code
|
|
mov dstPtr, edi
|
|
}
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
H_AUTO( NL3D_CharacterLod_vertexFill );
|
|
|
|
CVector fVect;
|
|
|
|
for(;numVertices>0;)
|
|
{
|
|
// NB: order is important for AGP filling optimisation
|
|
// transform vertex, and store.
|
|
CVector *dstVector= (CVector*)dstPtr;
|
|
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
|
|
++vertPtr;
|
|
dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
|
|
dstVector->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
|
|
dstVector->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
|
|
// Copy UV
|
|
*(CUV*)(dstPtr + NL3D_CLOD_UV_OFF)= *uvPtr;
|
|
++uvPtr;
|
|
|
|
// Compute Lighting.
|
|
CRGBA lightRes;
|
|
computeLodLighting(lightRes, lightObjectSpace, *normalPtr, ambient, diffuse);
|
|
++normalPtr;
|
|
lightRes.A= *alphaPtr;
|
|
++alphaPtr;
|
|
// store.
|
|
*((CRGBA*)(dstPtr + NL3D_CLOD_COLOR_OFF))= lightRes;
|
|
|
|
// next
|
|
dstPtr+= NL3D_CLOD_VERTEX_SIZE;
|
|
numVertices--;
|
|
}
|
|
}
|
|
|
|
// Add Primitives.
|
|
//=============
|
|
|
|
{
|
|
H_AUTO( NL3D_CharacterLod_primitiveFill )
|
|
|
|
// get number of tri indexes
|
|
uint numTriIdxs= clod->getNumTriangles() * 3;
|
|
|
|
// Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
|
|
if(numTriIdxs)
|
|
{
|
|
// realloc tris if needed.
|
|
if(_CurrentTriId+numTriIdxs > _Triangles.getNumIndexes())
|
|
{
|
|
_Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
|
|
_Triangles.setNumIndexes(_CurrentTriId+numTriIdxs);
|
|
}
|
|
|
|
// reindex and copy tris
|
|
CIndexBufferReadWrite iba;
|
|
_Triangles.lock(iba);
|
|
const TLodCharacterIndexType *srcIdx= clod->getTriangleArray();
|
|
nlassert(sizeof(TLodCharacterIndexType) == _Triangles.getIndexNumBytes());
|
|
TLodCharacterIndexType *dstIdx= (TLodCharacterIndexType *) iba.getPtr()+_CurrentTriId;
|
|
for(;numTriIdxs>0;numTriIdxs--, srcIdx++, dstIdx++)
|
|
{
|
|
*dstIdx= *srcIdx + _CurrentVertexId;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Next
|
|
//=============
|
|
|
|
// Inc Vertex count.
|
|
_CurrentVertexId+= clod->getNumVertices();
|
|
// Inc Prim count.
|
|
_CurrentTriId+= clod->getNumTriangles() * 3;
|
|
|
|
|
|
// key added
|
|
return true;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::endRender()
|
|
{
|
|
H_AUTO ( NL3D_CharacterLod_endRender );
|
|
|
|
IDriver *driver= _VertexStream.getDriver();
|
|
nlassert(driver);
|
|
// we must be between beginRender() and endRender()
|
|
nlassert(isRendering());
|
|
|
|
// if something rendered
|
|
if(_LockDone)
|
|
{
|
|
// UnLock Buffer.
|
|
_VertexStream.unlock(_CurrentVertexId);
|
|
_LockDone= false;
|
|
|
|
// Render the VBuffer and the primitives.
|
|
if(_CurrentTriId>0)
|
|
{
|
|
// setup matrix.
|
|
CMatrix managerMatrix;
|
|
managerMatrix.setPos(_ManagerMatrixPos);
|
|
driver->setupModelMatrix(managerMatrix);
|
|
|
|
// active VB
|
|
_VertexStream.activate();
|
|
|
|
// render triangles
|
|
driver->activeIndexBuffer(_Triangles);
|
|
driver->renderTriangles(_Material, 0, _CurrentTriId/3);
|
|
}
|
|
|
|
// swap Stream VBHard
|
|
_VertexStream.swapVBHard();
|
|
}
|
|
|
|
// Ok, end rendering
|
|
_Rendering= false;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::setupNormalCorrectionMatrix(const CMatrix &normalMatrix)
|
|
{
|
|
_LightCorrectionMatrix= normalMatrix;
|
|
_LightCorrectionMatrix.setPos(CVector::Null);
|
|
_LightCorrectionMatrix.invert();
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
// ***************************************************************************
|
|
// Texturing.
|
|
// ***************************************************************************
|
|
// ***************************************************************************
|
|
|
|
|
|
// ***************************************************************************
|
|
CLodCharacterTmpBitmap::CLodCharacterTmpBitmap()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterTmpBitmap::reset()
|
|
{
|
|
// setup a 1*1 bitmap
|
|
_Bitmap.resize(1);
|
|
_Bitmap[0]= CRGBA::Black;
|
|
_WidthPower=0;
|
|
_UShift= 8;
|
|
_VShift= 8;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterTmpBitmap::build(const NLMISC::CBitmap &bmpIn)
|
|
{
|
|
uint width= bmpIn.getWidth();
|
|
uint height= bmpIn.getHeight();
|
|
nlassert(width>0 && width<=256);
|
|
nlassert(height>0 && height<=256);
|
|
|
|
// resize bitmap.
|
|
_Bitmap.resize(width*height);
|
|
_WidthPower= getPowerOf2(width);
|
|
// compute shift
|
|
_UShift= 8-getPowerOf2(width);
|
|
_VShift= 8-getPowerOf2(height);
|
|
|
|
// convert the bitmap.
|
|
CBitmap bmp= bmpIn;
|
|
bmp.convertToType(CBitmap::RGBA);
|
|
CRGBA *src= (CRGBA*)&bmp.getPixels()[0];
|
|
CRGBA *dst= _Bitmap.getPtr();
|
|
for(sint nPix= width*height;nPix>0;nPix--, src++, dst++)
|
|
{
|
|
*dst= *src;
|
|
}
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterTmpBitmap::build(CRGBA col)
|
|
{
|
|
// setup a 1*1 bitmap and set it with col
|
|
reset();
|
|
_Bitmap[0]= col;
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::initInstance(CLodCharacterInstance &instance)
|
|
{
|
|
// first release in (maybe) other manager.
|
|
if(instance._Owner)
|
|
instance._Owner->releaseInstance(instance);
|
|
|
|
// get the shape
|
|
const CLodCharacterShape *clod= getShape(instance.ShapeId);
|
|
// if not found quit
|
|
if(!clod)
|
|
return;
|
|
// get Uvs.
|
|
const CUV *uvSrc= clod->getUVs();
|
|
nlassert(uvSrc);
|
|
|
|
|
|
// Ok, init header
|
|
instance._Owner= this;
|
|
instance._UVs.resize(clod->getNumVertices());
|
|
|
|
// allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
|
|
if(_FreeIds.empty())
|
|
{
|
|
// set a "Not enough memory" id
|
|
instance._TextureId= NL3D_CLOD_TEXT_NUM_IDS;
|
|
CUV uv(0,0);
|
|
fill(instance._UVs.begin(), instance._UVs.end(), uv);
|
|
}
|
|
// else OK, can instanciate the Uvs.
|
|
else
|
|
{
|
|
// get the id.
|
|
instance._TextureId= _FreeIds.back();
|
|
_FreeIds.pop_back();
|
|
// get the x/y.
|
|
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
// compute the scale/bias to apply to Uvs.
|
|
float scaleU= 1.0f / NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
float scaleV= 1.0f / NL3D_CLOD_TEXT_NLOD_HEIGHT;
|
|
float biasU= (float)xId / NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
float biasV= (float)yId / NL3D_CLOD_TEXT_NLOD_HEIGHT;
|
|
// apply it to each UVs.
|
|
CUV *uvDst= &instance._UVs[0];
|
|
for(uint i=0; i<instance._UVs.size();i++)
|
|
{
|
|
uvDst[i].U= biasU + uvSrc[i].U*scaleU;
|
|
uvDst[i].V= biasV + uvSrc[i].V*scaleV;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::releaseInstance(CLodCharacterInstance &instance)
|
|
{
|
|
if(instance._Owner==NULL)
|
|
return;
|
|
nlassert(this==instance._Owner);
|
|
|
|
// if the id is not a "Not enough memory" id, release it.
|
|
if(instance._TextureId>=0 && instance._TextureId<NL3D_CLOD_TEXT_NUM_IDS)
|
|
_FreeIds.push_back(instance._TextureId);
|
|
|
|
// reset the instance
|
|
instance._Owner= NULL;
|
|
instance._TextureId= -1;
|
|
contReset(instance._UVs);
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
CRGBA *CLodCharacterManager::getTextureInstance(CLodCharacterInstance &instance)
|
|
{
|
|
nlassert(instance._Owner==this);
|
|
nlassert(instance._TextureId!=-1);
|
|
// if the texture id is a "not enough memory", quit.
|
|
if(instance._TextureId==NL3D_CLOD_TEXT_NUM_IDS)
|
|
return NULL;
|
|
|
|
// get the x/y.
|
|
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
|
|
// get the ptr on the correct pixel.
|
|
CRGBA *pix= (CRGBA*)&_BigTexture->getPixels(0)[0];
|
|
return pix + yId*NL3D_CLOD_TEXT_HEIGHT*NL3D_CLOD_BIGTEXT_WIDTH + xId*NL3D_CLOD_TEXT_WIDTH;
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
bool CLodCharacterManager::startTextureCompute(CLodCharacterInstance &instance)
|
|
{
|
|
CRGBA *dst= getTextureInstance(instance);
|
|
if(!dst)
|
|
return false;
|
|
|
|
// erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
|
|
CRGBA col= NL3D_CLOD_DEFAULT_TEXCOLOR;
|
|
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
|
|
{
|
|
// erase the line
|
|
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
|
|
dst[x]= col;
|
|
// Next line
|
|
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::addTextureCompute(CLodCharacterInstance &instance, const CLodCharacterTexture &lodTexture)
|
|
{
|
|
CRGBA *dst= getTextureInstance(instance);
|
|
if(!dst)
|
|
return;
|
|
|
|
// get lookup ptr.
|
|
nlassert(lodTexture.Texture.size()==NL3D_CLOD_TEXT_SIZE);
|
|
const CLodCharacterTexture::CTUVQ *lookUpPtr= &lodTexture.Texture[0];
|
|
|
|
// apply the lodTexture, taking only better quality (ie nearer 0)
|
|
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
|
|
{
|
|
// erase the line
|
|
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
|
|
{
|
|
CLodCharacterTexture::CTUVQ lut= *lookUpPtr;
|
|
// if this quality is better than the one stored
|
|
if(lut.Q<dst[x].A)
|
|
{
|
|
// get what texture to read, and read the pixel.
|
|
CRGBA col= _TmpBitmaps[lut.T].getPixel(lut.U, lut.V);
|
|
// set quality.
|
|
col.A= lut.Q;
|
|
// set in dest
|
|
dst[x]= col;
|
|
}
|
|
|
|
// next lookup
|
|
lookUpPtr++;
|
|
}
|
|
// Next line
|
|
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
|
|
}
|
|
}
|
|
|
|
// ***************************************************************************
|
|
void CLodCharacterManager::endTextureCompute(CLodCharacterInstance &instance, uint numBmpToReset)
|
|
{
|
|
CRGBA *dst= getTextureInstance(instance);
|
|
if(!dst)
|
|
return;
|
|
|
|
// reset All Alpha values to 255 => no AlphaTest problems
|
|
for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
|
|
{
|
|
// erase the line
|
|
for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
|
|
{
|
|
dst[x].A= 255;
|
|
}
|
|
// Next line
|
|
dst+= NL3D_CLOD_BIGTEXT_WIDTH;
|
|
}
|
|
|
|
// If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
|
|
if(instance._TextureId==0)
|
|
*(CRGBA*)&_BigTexture->getPixels(0)[0]= NL3D_CLOD_DEFAULT_TEXCOLOR;
|
|
|
|
// get the x/y.
|
|
uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
|
|
// touch the texture for Driver update.
|
|
_BigTexture->touchRect(
|
|
CRect(xId*NL3D_CLOD_TEXT_WIDTH, yId*NL3D_CLOD_TEXT_HEIGHT, NL3D_CLOD_TEXT_WIDTH, NL3D_CLOD_TEXT_HEIGHT) );
|
|
|
|
// reset tmpBitmaps / free memory.
|
|
for(uint i=0; i<numBmpToReset; i++)
|
|
{
|
|
_TmpBitmaps[i].reset();
|
|
}
|
|
|
|
// TestYoyo
|
|
/*NLMISC::COFile f("tam.tga");
|
|
_BigTexture->writeTGA(f,32);*/
|
|
}
|
|
|
|
|
|
// ***************************************************************************
|
|
bool CLodCharacterManager::fastIntersect(const CLodCharacterInstance &instance, const NLMISC::CMatrix &toRaySpace, float &dist2D, float &distZ, bool computeDist2D)
|
|
{
|
|
H_AUTO ( NL3D_CharacterLod_fastIntersect )
|
|
|
|
uint numVertices;
|
|
const CLodCharacterShape::CVector3s *vertPtr;
|
|
CVector matPos;
|
|
float a00, a01, a02;
|
|
float a10, a11, a12;
|
|
float a20, a21, a22;
|
|
|
|
|
|
// Get the Shape and current key.
|
|
//=============
|
|
|
|
// get the shape
|
|
const CLodCharacterShape *clod= getShape(instance.ShapeId);
|
|
// if not found quit
|
|
if(!clod)
|
|
return false;
|
|
|
|
// get the anim key
|
|
CVector unPackScaleFactor;
|
|
vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
|
|
// if not found quit
|
|
if(!vertPtr)
|
|
return false;
|
|
// get num verts
|
|
numVertices= clod->getNumVertices();
|
|
|
|
// empty shape??
|
|
if(numVertices==0)
|
|
return false;
|
|
|
|
// Prepare Transform
|
|
//=============
|
|
|
|
// Get matrix pos.
|
|
matPos= toRaySpace.getPos();
|
|
// Get rotation line vectors
|
|
const float *rayM= toRaySpace.get();
|
|
a00= rayM[0]; a01= rayM[4]; a02= rayM[8];
|
|
a10= rayM[1]; a11= rayM[5]; a12= rayM[9];
|
|
a20= rayM[2]; a21= rayM[6]; a22= rayM[10];
|
|
|
|
// multiply matrix with scale factor for Pos.
|
|
a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
|
|
a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
|
|
a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
|
|
|
|
// get dst Array.
|
|
// enlarge temp buffer
|
|
static std::vector<CVector> lodInRaySpace;
|
|
if(numVertices>lodInRaySpace.size())
|
|
lodInRaySpace.resize(numVertices);
|
|
CVector *dstPtr= &lodInRaySpace[0];
|
|
|
|
|
|
// Fill the temp skin
|
|
//=============
|
|
{
|
|
CVector fVect;
|
|
|
|
for(;numVertices>0;)
|
|
{
|
|
// transform vertex, and store.
|
|
fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
|
|
++vertPtr;
|
|
dstPtr->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
|
|
dstPtr->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
|
|
dstPtr->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
|
|
|
|
// next
|
|
dstPtr++;
|
|
numVertices--;
|
|
}
|
|
}
|
|
|
|
// Test intersection
|
|
//=============
|
|
|
|
return CRayMesh::getRayIntersection(lodInRaySpace, clod->getTriangleIndices(), dist2D, distZ, computeDist2D);
|
|
}
|
|
|
|
|
|
} // NL3D
|