2012-05-29 13:31:11 +00:00
// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
# include "std3d.h"
# include "nel/misc/common.h"
# include "nel/3d/lod_character_manager.h"
# include "nel/3d/lod_character_shape.h"
# include "nel/3d/lod_character_shape_bank.h"
# include "nel/3d/lod_character_instance.h"
# include "nel/misc/hierarchical_timer.h"
# include "nel/misc/fast_floor.h"
# include "nel/3d/lod_character_texture.h"
# include "nel/3d/ray_mesh.h"
# include "nel/misc/file.h"
# include "nel/misc/algo.h"
# include "nel/misc/fast_mem.h"
# include "nel/misc/system_info.h"
using namespace std ;
using namespace NLMISC ;
namespace NL3D
{
// ***************************************************************************
// Dest is without Normal because precomputed
# define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
# define NL3D_CLOD_VERTEX_SIZE 24
# define NL3D_CLOD_UV_OFF 12
# define NL3D_CLOD_COLOR_OFF 20
// size (in block) of the big texture.
# define NL3D_CLOD_TEXT_NLOD_WIDTH 16
# define NL3D_CLOD_TEXT_NLOD_HEIGHT 16
# define NL3D_CLOD_TEXT_NUM_IDS NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
# define NL3D_CLOD_BIGTEXT_WIDTH NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
# define NL3D_CLOD_BIGTEXT_HEIGHT NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
// Default texture color. Alpha must be 255
# define NL3D_CLOD_DEFAULT_TEXCOLOR CRGBA(255,255,255,255)
// ***************************************************************************
CLodCharacterManager : : CLodCharacterManager ( )
{
_MaxNumVertices = 3000 ;
_NumVBHard = 8 ;
_Rendering = false ;
_LockDone = false ;
// setup the texture.
_BigTexture = new CTextureBlank ;
// The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
// NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
_BigTexture - > setReleasable ( false ) ;
// create the bitmap.
_BigTexture - > resize ( NL3D_CLOD_BIGTEXT_WIDTH , NL3D_CLOD_BIGTEXT_HEIGHT , CBitmap : : RGBA ) ;
// Format of texture, 16 bits and no mipmaps.
_BigTexture - > setUploadFormat ( ITexture : : RGB565 ) ;
_BigTexture - > setFilterMode ( ITexture : : Linear , ITexture : : LinearMipMapOff ) ;
_BigTexture - > setWrapS ( ITexture : : Clamp ) ;
_BigTexture - > setWrapT ( ITexture : : Clamp ) ;
// Alloc free Ids
_FreeIds . resize ( NL3D_CLOD_TEXT_NUM_IDS ) ;
for ( uint i = 0 ; i < _FreeIds . size ( ) ; i + + )
{
_FreeIds [ i ] = i ;
}
// setup the material
_Material . initUnlit ( ) ;
_Material . setAlphaTest ( true ) ;
_Material . setDoubleSided ( true ) ;
_Material . setTexture ( 0 , _BigTexture ) ;
// setup for lighting, Default for Ryzom setup
_LightCorrectionMatrix . rotateZ ( ( float ) Pi / 2 ) ;
_LightCorrectionMatrix . invert ( ) ;
NL_SET_IB_NAME ( _Triangles , " CLodCharacterManager::_Triangles " ) ;
}
// ***************************************************************************
CLodCharacterManager : : ~ CLodCharacterManager ( )
{
reset ( ) ;
}
// ***************************************************************************
void CLodCharacterManager : : reset ( )
{
nlassert ( ! isRendering ( ) ) ;
// delete shapeBanks.
for ( uint i = 0 ; i < _ShapeBankArray . size ( ) ; i + + )
{
if ( _ShapeBankArray [ i ] )
delete _ShapeBankArray [ i ] ;
}
// clears containers
contReset ( _ShapeBankArray ) ;
contReset ( _ShapeMap ) ;
// reset render part.
_VertexStream . release ( ) ;
}
// ***************************************************************************
uint32 CLodCharacterManager : : createShapeBank ( )
{
// search a free entry
for ( uint i = 0 ; i < _ShapeBankArray . size ( ) ; i + + )
{
// if ree, use it.
if ( _ShapeBankArray [ i ] = = NULL )
{
_ShapeBankArray [ i ] = new CLodCharacterShapeBank ;
return i ;
}
}
// no free entrey, resize array.
_ShapeBankArray . push_back ( new CLodCharacterShapeBank ) ;
return ( uint32 ) _ShapeBankArray . size ( ) - 1 ;
}
// ***************************************************************************
const CLodCharacterShapeBank * CLodCharacterManager : : getShapeBank ( uint32 bankId ) const
{
if ( bankId > = _ShapeBankArray . size ( ) )
return NULL ;
else
return _ShapeBankArray [ bankId ] ;
}
// ***************************************************************************
CLodCharacterShapeBank * CLodCharacterManager : : getShapeBank ( uint32 bankId )
{
if ( bankId > = _ShapeBankArray . size ( ) )
return NULL ;
else
return _ShapeBankArray [ bankId ] ;
}
// ***************************************************************************
void CLodCharacterManager : : deleteShapeBank ( uint32 bankId )
{
if ( bankId > = _ShapeBankArray . size ( ) )
{
if ( _ShapeBankArray [ bankId ] )
{
delete _ShapeBankArray [ bankId ] ;
_ShapeBankArray [ bankId ] = NULL ;
}
}
}
// ***************************************************************************
sint32 CLodCharacterManager : : getShapeIdByName ( const std : : string & name ) const
{
CstItStrIdMap it = _ShapeMap . find ( name ) ;
if ( it = = _ShapeMap . end ( ) )
return - 1 ;
else
return it - > second ;
}
// ***************************************************************************
const CLodCharacterShape * CLodCharacterManager : : getShape ( uint32 shapeId ) const
{
// split the id
uint bankId = shapeId > > 16 ;
uint shapeInBankId = shapeId & 0xFFFF ;
// if valid bankId
const CLodCharacterShapeBank * shapeBank = getShapeBank ( bankId ) ;
if ( shapeBank )
{
// return the shape from the bank
return shapeBank - > getShape ( shapeInBankId ) ;
}
else
return NULL ;
}
// ***************************************************************************
bool CLodCharacterManager : : compile ( )
{
bool error = false ;
// clear the map
contReset ( _ShapeMap ) ;
// build the map
for ( uint i = 0 ; i < _ShapeBankArray . size ( ) ; i + + )
{
if ( _ShapeBankArray [ i ] )
{
// Parse all Shapes
for ( uint j = 0 ; j < _ShapeBankArray [ i ] - > getNumShapes ( ) ; j + + )
{
// build the shape Id
uint shapeId = ( i < < 16 ) + j ;
// get the shape
const CLodCharacterShape * shape = _ShapeBankArray [ i ] - > getShape ( j ) ;
if ( shape )
{
const string & name = shape - > getName ( ) ;
ItStrIdMap it = _ShapeMap . find ( name ) ;
if ( it = = _ShapeMap . end ( ) )
// insert the id in the map
_ShapeMap . insert ( make_pair ( name , shapeId ) ) ;
else
{
error = true ;
nlwarning ( " Found a Character Lod with same name in the manager: %s " , name . c_str ( ) ) ;
}
}
}
}
}
return error ;
}
// ***************************************************************************
// ***************************************************************************
// Render
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
void CLodCharacterManager : : setMaxVertex ( uint32 maxVertex )
{
// we must not be between beginRender() and endRender()
nlassert ( ! isRendering ( ) ) ;
_MaxNumVertices = maxVertex ;
}
// ***************************************************************************
void CLodCharacterManager : : setVertexStreamNumVBHard ( uint32 numVBHard )
{
// we must not be between beginRender() and endRender()
nlassert ( ! isRendering ( ) ) ;
_NumVBHard = numVBHard ;
}
// ***************************************************************************
void CLodCharacterManager : : beginRender ( IDriver * driver , const CVector & managerPos )
{
H_AUTO ( NL3D_CharacterLod_beginRender ) ;
// we must not be between beginRender() and endRender()
nlassert ( ! isRendering ( ) ) ;
// Reset render
//=================
_CurrentVertexId = 0 ;
_CurrentTriId = 0 ;
// update Driver.
//=================
nlassert ( driver ) ;
// test change of vertexStream setup
bool mustChangeVertexStream = _VertexStream . getDriver ( ) ! = driver ;
if ( ! mustChangeVertexStream )
{
mustChangeVertexStream = _MaxNumVertices ! = _VertexStream . getMaxVertices ( ) ;
mustChangeVertexStream = mustChangeVertexStream | | _NumVBHard ! = _VertexStream . getNumVB ( ) ;
}
// re-init?
if ( mustChangeVertexStream )
{
// chech offset
CVertexBuffer vb ;
vb . setVertexFormat ( NL3D_CLOD_VERTEX_FORMAT ) ;
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
nlassert ( NL3D_CLOD_UV_OFF = = vb . getTexCoordOff ( ) ) ;
nlassert ( NL3D_CLOD_COLOR_OFF = = vb . getColorOff ( ) ) ;
// Setup the vertex stream
_VertexStream . release ( ) ;
_VertexStream . init ( driver , NL3D_CLOD_VERTEX_FORMAT , _MaxNumVertices , _NumVBHard , " CLodManagerVB " , false ) ; // nb : don't use volatile lock as we keep the buffer locked
}
// prepare for render.
//=================
// Do not Lock Buffer now (will be done at the first instance added)
nlassert ( ! _LockDone ) ;
_VertexSize = _VertexStream . getVertexSize ( ) ;
// NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
nlassert ( _VertexSize = = NL3D_CLOD_VERTEX_SIZE ) ; // Vector + Normal + UV + RGBA
// Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
if ( _Triangles . getNumIndexes ( ) < _MaxNumVertices * 2 )
{
_Triangles . setFormat ( NL_LOD_CHARACTER_INDEX_FORMAT ) ;
_Triangles . setNumIndexes ( _MaxNumVertices * 2 ) ;
}
// Local manager matrix
_ManagerMatrixPos = managerPos ;
// Ok, start rendering
_Rendering = true ;
}
// ***************************************************************************
static inline void computeLodLighting ( CRGBA & lightRes , const CVector & lightObjectSpace , const CVector & normalPtr , CRGBA ambient , CRGBA diffuse )
{
float f = lightObjectSpace * normalPtr ;
sint f8 = NLMISC : : OptFastFloor ( f ) ;
fastClamp8 ( f8 ) ;
sint r , g , b ;
r = ( diffuse . R * f8 ) > > 8 ;
g = ( diffuse . G * f8 ) > > 8 ;
b = ( diffuse . B * f8 ) > > 8 ;
r + = ambient . R ;
g + = ambient . G ;
b + = ambient . B ;
fastClamp8 ( r ) ;
fastClamp8 ( g ) ;
fastClamp8 ( b ) ;
lightRes . R = r ;
lightRes . G = g ;
lightRes . B = b ;
}
// ***************************************************************************
bool CLodCharacterManager : : addRenderCharacterKey ( CLodCharacterInstance & instance , const CMatrix & worldMatrix ,
CRGBA paramAmbient , CRGBA paramDiffuse , const CVector & lightDir )
{
H_AUTO ( NL3D_CharacterLod_AddRenderKey )
nlassert ( _VertexStream . getDriver ( ) ) ;
// we must be between beginRender() and endRender()
nlassert ( isRendering ( ) ) ;
// regroup all variables that will be accessed in the ASM loop (minimize cache problems)
uint numVertices ;
const CLodCharacterShape : : CVector3s * vertPtr ;
const CVector * normalPtr ;
const CUV * uvPtr ;
const uint8 * alphaPtr ;
CVector lightObjectSpace ;
CVector matPos ;
float a00 , a01 , a02 ;
float a10 , a11 , a12 ;
float a20 , a21 , a22 ;
uint64 blank = 0 ;
CRGBA ambient = paramAmbient ;
CRGBA diffuse = paramDiffuse ;
// For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
ambient . A = 0 ;
diffuse . A = 0 ;
// Get the Shape and current key.
//=============
// get the shape
const CLodCharacterShape * clod = getShape ( instance . ShapeId ) ;
// if not found quit, return true
if ( ! clod )
return true ;
// get UV/Normal array. NULL => error
normalPtr = clod - > getNormals ( ) ;
// get UV of the instance
uvPtr = instance . getUVs ( ) ;
// uvPtr is NULL means that initInstance() has not been called!!
nlassert ( normalPtr & & uvPtr ) ;
// get the anim key
CVector unPackScaleFactor ;
vertPtr = clod - > getAnimKey ( instance . AnimId , instance . AnimTime , instance . WrapMode , unPackScaleFactor ) ;
// if not found quit, return true
if ( ! vertPtr )
return true ;
// get num verts
numVertices = clod - > getNumVertices ( ) ;
// empty shape??
if ( numVertices = = 0 )
return true ;
// If too many vertices, quit, returning false.
if ( _CurrentVertexId + numVertices > _MaxNumVertices )
return false ;
// get alpha array
static vector < uint8 > defaultAlphaArray ;
// get the instance alpha if correctly setuped
if ( instance . VertexAlphas . size ( ) = = numVertices )
{
alphaPtr = & instance . VertexAlphas [ 0 ] ;
}
// if error, take 255 as alpha.
else
{
// NB: still use an array. This case should never arise, but support it not at full optim.
if ( defaultAlphaArray . size ( ) < numVertices )
defaultAlphaArray . resize ( numVertices , 255 ) ;
alphaPtr = & defaultAlphaArray [ 0 ] ;
}
// Lock Buffer if not done
//=============
// Do this after code above because we are sure that we will fill something (numVertices>0)
if ( ! _LockDone )
{
_VertexData = _VertexStream . lock ( ) ;
_LockDone = true ;
}
// After lock, For D3D, the VertexColor may be in BGRA format
if ( _VertexStream . isBRGA ( ) )
{
// then swap only the B and R (no cpu cycle added per vertex)
ambient . swapBR ( ) ;
diffuse . swapBR ( ) ;
}
// Prepare Transform
//=============
// HTimerInfo: all this block takes 0.1%
// Get matrix pos.
matPos = worldMatrix . getPos ( ) ;
// compute in manager space.
matPos - = _ManagerMatrixPos ;
// Get rotation line vectors
const float * worldM = worldMatrix . get ( ) ;
a00 = worldM [ 0 ] ; a01 = worldM [ 4 ] ; a02 = worldM [ 8 ] ;
a10 = worldM [ 1 ] ; a11 = worldM [ 5 ] ; a12 = worldM [ 9 ] ;
a20 = worldM [ 2 ] ; a21 = worldM [ 6 ] ; a22 = worldM [ 10 ] ;
// get the light in object space.
// Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
lightObjectSpace . x = a00 * lightDir . x + a10 * lightDir . y + a20 * lightDir . z ;
lightObjectSpace . y = a01 * lightDir . x + a11 * lightDir . y + a21 * lightDir . z ;
lightObjectSpace . z = a02 * lightDir . x + a12 * lightDir . y + a22 * lightDir . z ;
// animation User correction
lightObjectSpace = _LightCorrectionMatrix . mulVector ( lightObjectSpace ) ;
// normalize, and neg for Dot Product.
lightObjectSpace . normalize ( ) ;
lightObjectSpace = - lightObjectSpace ;
// preMul by 255 for RGBA uint8
lightObjectSpace * = 255 ;
// multiply matrix with scale factor for Pos.
a00 * = unPackScaleFactor . x ; a01 * = unPackScaleFactor . y ; a02 * = unPackScaleFactor . z ;
a10 * = unPackScaleFactor . x ; a11 * = unPackScaleFactor . y ; a12 * = unPackScaleFactor . z ;
a20 * = unPackScaleFactor . x ; a21 * = unPackScaleFactor . y ; a22 * = unPackScaleFactor . z ;
// get dst Array.
uint8 * dstPtr ;
dstPtr = _VertexData + _CurrentVertexId * _VertexSize ;
/* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
maybe because the cache miss improve //ism a bit below.
*/
// Fill the VB
//=============
# if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
// optimized version
if ( CSystemInfo : : hasMMX ( ) )
{
H_AUTO ( NL3D_CharacterLod_vertexFill ) ;
if ( numVertices )
{
sint f8 ;
/* NB: order is important for AGP filling optimisation in dstPtr
Pentium2 + optimisation notes :
- " uop " comment formating :
A / B means " A micro-ops in port 0, and B micro-ops in port 2 " . ( port 1 is very rare for FPU )
A / B / C / D means " A micro-ops in port 0, B in port 2, C in port 3 and D in port 4 " .
The number in ( ) is the delay ( if any ) .
- the " compute lighting part " must done first , because of the " fistp f8 " mem writes that must
be place far away from the " mov eax, f8 " read in clamp lighting part
( else seems that it crashes all the //ism)
- No need to Interleave on Pentium2 + . But prevents " write/read stall " by putting the write
far away from the next read . Else stall of 3 cycles + BIG BREAK OF //ism (I think).
This had save me 120 cycles / 240 ! ! !
BenchResults :
- The " transform vertex part " and " all next part " cost 42 cycles , but is somewhat optimal :
63 uop ( = > min 21 cycles ) , but 36 uop in the P0 port ( = > this is the bottleneck )
- The lighting part adds 1 cycle only ? ? ? ? ? ( 44 cycles ) But still relevant and optimal :
43 uop in port P0 ! ! ! !
- The UV part adds 4 cycles ( 47 ) ( should not since 0 in Port P0 ) , still acceptable .
- The clamp part adds 3 cycles ( 50 ) , and add 11 cycles in " P0 or P1 " ( but heavy dependency )
If we assume all goes into P1 , it should takes 0. . . still acceptable ( optimal = = 43 ? )
- The alpha part adds 2 cycles ( 52 , optimal = 45 ) . OK .
- The modulate part adds 15 cycles . OK
TOTAL : 67 cycles in theory ( write in RAM , no cache miss problem )
BENCH : ASM version : 91 cycles ( Write in AGP , some cache miss problems , still good against 67 )
C version : 316 cycles .
*/
__asm
{
mov edi , dstPtr
theLoop :
// **** compute lighting
mov esi , normalPtr // uop: 0/1
// dot3
fld dword ptr [ esi ] // uop: 0/1
fmul lightObjectSpace . x // uop: 1/1 (5)
fld dword ptr [ esi + 4 ] // uop: 0/1
fmul lightObjectSpace . y // uop: 1/1 (5)
faddp st ( 1 ) , st // uop: 1/0 (3)
fld dword ptr [ esi + 8 ] // uop: 0/1
fmul lightObjectSpace . z // uop: 1/1 (5)
faddp st ( 1 ) , st // uop: 1/0 (3)
fistp f8 // uop: 2/0/1/1 (5)
// next
add esi , 12 // uop: 1/0
mov normalPtr , esi // uop: 0/0/1/1
// **** transform vertex, and store
mov esi , vertPtr // uop: 0/1
fild word ptr [ esi ] // uop: 3/1 (5)
fild word ptr [ esi + 2 ] // uop: 3/1 (5)
fild word ptr [ esi + 4 ] // uop: 3/1 (5)
// x
fld a00 // uop: 0/1
fmul st , st ( 3 ) // uop: 1/0 (5)
fld a01 // uop: 0/1
fmul st , st ( 3 ) // uop: 1/0 (5)
faddp st ( 1 ) , st // uop: 1/0 (3)
fld a02 // uop: 0/1
fmul st , st ( 2 ) // uop: 1/0 (5)
faddp st ( 1 ) , st // uop: 1/0 (3)
fld matPos . x // uop: 0/1
faddp st ( 1 ) , st // uop: 1/0 (3)
fstp dword ptr [ edi ] // uop: 0/0/1/1
// y
fld a10
fmul st , st ( 3 )
fld a11
fmul st , st ( 3 )
faddp st ( 1 ) , st
fld a12
fmul st , st ( 2 )
faddp st ( 1 ) , st
fld matPos . y
faddp st ( 1 ) , st
fstp dword ptr [ edi + 4 ]
// z
fld a20
fmul st , st ( 3 )
fld a21
fmul st , st ( 3 )
faddp st ( 1 ) , st
fld a22
fmul st , st ( 2 )
faddp st ( 1 ) , st
fld matPos . z
faddp st ( 1 ) , st
fstp dword ptr [ edi + 8 ]
// flush stack
fstp st // uop: 1/0
fstp st // uop: 1/0
fstp st // uop: 1/0
// next
add esi , 6 // uop: 1/0
mov vertPtr , esi // uop: 0/0/1/1
// **** copy uv
mov esi , uvPtr // uop: 0/1
mov eax , [ esi ] // uop: 0/1
mov [ edi + NL3D_CLOD_UV_OFF ] , eax // uop: 0/0/1/1
mov ebx , [ esi + 4 ] // uop: 0/1
mov [ edi + NL3D_CLOD_UV_OFF + 4 ] , ebx // uop: 0/0/1/1
// next
add esi , 8 // uop: 1/0
mov uvPtr , esi // uop: 0/0/1/1
// **** Clamp lighting
// clamp to 0 only. will be clamped to 255 by MMX
mov eax , f8 // uop: 0/1
cmp eax , 0x80000000 // if>=0 => CF=1
sbb ebx , ebx // if>=0 => CF==1 => ebx=0xFFFFFFFF
and eax , ebx // if>=0 => eax unchanged, else eax=0 (clamped)
// **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
movd mm0 , eax // 0000000L uop: 1/0
packuswb mm0 , mm0 // 000L000L uop: 1/0 (p1)
packuswb mm0 , mm0 // 0L0L0L0L uop: 1/0 (p1)
movd mm1 , diffuse // uop: 0/1
punpcklbw mm1 , blank // uop: 1/1 (p1)
pmullw mm0 , mm1 // diffuse*L uop: 1/0 (3)
psrlw mm0 , 8 // 0A0B0G0R uop: 1/0 (p1)
packuswb mm0 , blank // 0000ABGR uop: 1/1 (p1)
movd mm2 , ambient // uop: 0/1
paddusb mm0 , mm2 // uop: 1/0
movd ebx , mm0 // ebx= AABBGGRR uop: 1/0
// NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
emms // uop: 11/0 (6). (?????)
// **** append alpha, and store
mov esi , alphaPtr // uop: 0/1
movzx eax , byte ptr [ esi ] // uop: 0/1
shl eax , 24 // uop: 1/0
add ebx , eax // uop: 1/0
// now, ebx= AABBGGRR
mov [ edi + NL3D_CLOD_COLOR_OFF ] , ebx // uop: 0/0/1/1
// next
add esi , 1 // uop: 1/0
mov alphaPtr , esi // uop: 0/0/1/1
// **** next
add edi , NL3D_CLOD_VERTEX_SIZE // uop: 1/0
mov eax , numVertices // uop: 0/1
dec eax // uop: 1/0
mov numVertices , eax // uop: 0/0/1/1
jnz theLoop // uop: 1/1 (p1)
// To have same behavior than c code
mov dstPtr , edi
}
}
}
else
# endif
{
H_AUTO ( NL3D_CharacterLod_vertexFill ) ;
CVector fVect ;
for ( ; numVertices > 0 ; )
{
// NB: order is important for AGP filling optimisation
// transform vertex, and store.
CVector * dstVector = ( CVector * ) dstPtr ;
fVect . x = vertPtr - > x ; fVect . y = vertPtr - > y ; fVect . z = vertPtr - > z ;
+ + vertPtr ;
dstVector - > x = a00 * fVect . x + a01 * fVect . y + a02 * fVect . z + matPos . x ;
dstVector - > y = a10 * fVect . x + a11 * fVect . y + a12 * fVect . z + matPos . y ;
dstVector - > z = a20 * fVect . x + a21 * fVect . y + a22 * fVect . z + matPos . z ;
// Copy UV
* ( CUV * ) ( dstPtr + NL3D_CLOD_UV_OFF ) = * uvPtr ;
+ + uvPtr ;
// Compute Lighting.
CRGBA lightRes ;
computeLodLighting ( lightRes , lightObjectSpace , * normalPtr , ambient , diffuse ) ;
+ + normalPtr ;
lightRes . A = * alphaPtr ;
+ + alphaPtr ;
// store.
* ( ( CRGBA * ) ( dstPtr + NL3D_CLOD_COLOR_OFF ) ) = lightRes ;
// next
dstPtr + = NL3D_CLOD_VERTEX_SIZE ;
numVertices - - ;
}
}
// Add Primitives.
//=============
{
H_AUTO ( NL3D_CharacterLod_primitiveFill )
// get number of tri indexes
uint numTriIdxs = clod - > getNumTriangles ( ) * 3 ;
// Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
if ( numTriIdxs )
{
// realloc tris if needed.
if ( _CurrentTriId + numTriIdxs > _Triangles . getNumIndexes ( ) )
{
_Triangles . setFormat ( NL_LOD_CHARACTER_INDEX_FORMAT ) ;
_Triangles . setNumIndexes ( _CurrentTriId + numTriIdxs ) ;
}
// reindex and copy tris
CIndexBufferReadWrite iba ;
_Triangles . lock ( iba ) ;
const TLodCharacterIndexType * srcIdx = clod - > getTriangleArray ( ) ;
nlassert ( sizeof ( TLodCharacterIndexType ) = = _Triangles . getIndexNumBytes ( ) ) ;
TLodCharacterIndexType * dstIdx = ( TLodCharacterIndexType * ) iba . getPtr ( ) + _CurrentTriId ;
for ( ; numTriIdxs > 0 ; numTriIdxs - - , srcIdx + + , dstIdx + + )
{
* dstIdx = * srcIdx + _CurrentVertexId ;
}
}
}
// Next
//=============
// Inc Vertex count.
_CurrentVertexId + = clod - > getNumVertices ( ) ;
// Inc Prim count.
_CurrentTriId + = clod - > getNumTriangles ( ) * 3 ;
// key added
return true ;
}
// ***************************************************************************
void CLodCharacterManager : : endRender ( )
{
H_AUTO ( NL3D_CharacterLod_endRender ) ;
IDriver * driver = _VertexStream . getDriver ( ) ;
nlassert ( driver ) ;
// we must be between beginRender() and endRender()
nlassert ( isRendering ( ) ) ;
// if something rendered
if ( _LockDone )
{
// UnLock Buffer.
_VertexStream . unlock ( _CurrentVertexId ) ;
_LockDone = false ;
// Render the VBuffer and the primitives.
if ( _CurrentTriId > 0 )
{
// setup matrix.
CMatrix managerMatrix ;
managerMatrix . setPos ( _ManagerMatrixPos ) ;
driver - > setupModelMatrix ( managerMatrix ) ;
// active VB
_VertexStream . activate ( ) ;
// render triangles
driver - > activeIndexBuffer ( _Triangles ) ;
driver - > renderTriangles ( _Material , 0 , _CurrentTriId / 3 ) ;
}
// swap Stream VBHard
_VertexStream . swapVBHard ( ) ;
}
// Ok, end rendering
_Rendering = false ;
}
// ***************************************************************************
void CLodCharacterManager : : setupNormalCorrectionMatrix ( const CMatrix & normalMatrix )
{
_LightCorrectionMatrix = normalMatrix ;
_LightCorrectionMatrix . setPos ( CVector : : Null ) ;
_LightCorrectionMatrix . invert ( ) ;
}
// ***************************************************************************
// ***************************************************************************
// Texturing.
// ***************************************************************************
// ***************************************************************************
// ***************************************************************************
CLodCharacterTmpBitmap : : CLodCharacterTmpBitmap ( )
{
reset ( ) ;
}
// ***************************************************************************
void CLodCharacterTmpBitmap : : reset ( )
{
// setup a 1*1 bitmap
_Bitmap . resize ( 1 ) ;
_Bitmap [ 0 ] = CRGBA : : Black ;
_WidthPower = 0 ;
_UShift = 8 ;
_VShift = 8 ;
}
// ***************************************************************************
void CLodCharacterTmpBitmap : : build ( const NLMISC : : CBitmap & bmpIn )
{
uint width = bmpIn . getWidth ( ) ;
uint height = bmpIn . getHeight ( ) ;
nlassert ( width > 0 & & width < = 256 ) ;
nlassert ( height > 0 & & height < = 256 ) ;
// resize bitmap.
_Bitmap . resize ( width * height ) ;
_WidthPower = getPowerOf2 ( width ) ;
// compute shift
_UShift = 8 - getPowerOf2 ( width ) ;
_VShift = 8 - getPowerOf2 ( height ) ;
// convert the bitmap.
CBitmap bmp = bmpIn ;
bmp . convertToType ( CBitmap : : RGBA ) ;
CRGBA * src = ( CRGBA * ) & bmp . getPixels ( ) [ 0 ] ;
CRGBA * dst = _Bitmap . getPtr ( ) ;
for ( sint nPix = width * height ; nPix > 0 ; nPix - - , src + + , dst + + )
{
* dst = * src ;
}
}
// ***************************************************************************
void CLodCharacterTmpBitmap : : build ( CRGBA col )
{
// setup a 1*1 bitmap and set it with col
reset ( ) ;
_Bitmap [ 0 ] = col ;
}
// ***************************************************************************
void CLodCharacterManager : : initInstance ( CLodCharacterInstance & instance )
{
// first release in (maybe) other manager.
if ( instance . _Owner )
instance . _Owner - > releaseInstance ( instance ) ;
// get the shape
const CLodCharacterShape * clod = getShape ( instance . ShapeId ) ;
// if not found quit
if ( ! clod )
return ;
// get Uvs.
const CUV * uvSrc = clod - > getUVs ( ) ;
nlassert ( uvSrc ) ;
// Ok, init header
instance . _Owner = this ;
instance . _UVs . resize ( clod - > getNumVertices ( ) ) ;
// allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
if ( _FreeIds . empty ( ) )
{
// set a "Not enough memory" id
instance . _TextureId = NL3D_CLOD_TEXT_NUM_IDS ;
CUV uv ( 0 , 0 ) ;
fill ( instance . _UVs . begin ( ) , instance . _UVs . end ( ) , uv ) ;
}
// else OK, can instanciate the Uvs.
else
{
// get the id.
instance . _TextureId = _FreeIds . back ( ) ;
_FreeIds . pop_back ( ) ;
// get the x/y.
uint xId = instance . _TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH ;
uint yId = instance . _TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH ;
// compute the scale/bias to apply to Uvs.
float scaleU = 1.0f / NL3D_CLOD_TEXT_NLOD_WIDTH ;
float scaleV = 1.0f / NL3D_CLOD_TEXT_NLOD_HEIGHT ;
float biasU = ( float ) xId / NL3D_CLOD_TEXT_NLOD_WIDTH ;
float biasV = ( float ) yId / NL3D_CLOD_TEXT_NLOD_HEIGHT ;
// apply it to each UVs.
CUV * uvDst = & instance . _UVs [ 0 ] ;
for ( uint i = 0 ; i < instance . _UVs . size ( ) ; i + + )
{
uvDst [ i ] . U = biasU + uvSrc [ i ] . U * scaleU ;
uvDst [ i ] . V = biasV + uvSrc [ i ] . V * scaleV ;
}
}
}
// ***************************************************************************
void CLodCharacterManager : : releaseInstance ( CLodCharacterInstance & instance )
{
if ( instance . _Owner = = NULL )
return ;
nlassert ( this = = instance . _Owner ) ;
// if the id is not a "Not enough memory" id, release it.
if ( instance . _TextureId > = 0 & & instance . _TextureId < NL3D_CLOD_TEXT_NUM_IDS )
_FreeIds . push_back ( instance . _TextureId ) ;
// reset the instance
instance . _Owner = NULL ;
instance . _TextureId = - 1 ;
contReset ( instance . _UVs ) ;
}
// ***************************************************************************
CRGBA * CLodCharacterManager : : getTextureInstance ( CLodCharacterInstance & instance )
{
nlassert ( instance . _Owner = = this ) ;
nlassert ( instance . _TextureId ! = - 1 ) ;
// if the texture id is a "not enough memory", quit.
if ( instance . _TextureId = = NL3D_CLOD_TEXT_NUM_IDS )
return NULL ;
// get the x/y.
uint xId = instance . _TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH ;
uint yId = instance . _TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH ;
// get the ptr on the correct pixel.
CRGBA * pix = ( CRGBA * ) & _BigTexture - > getPixels ( 0 ) [ 0 ] ;
return pix + yId * NL3D_CLOD_TEXT_HEIGHT * NL3D_CLOD_BIGTEXT_WIDTH + xId * NL3D_CLOD_TEXT_WIDTH ;
}
// ***************************************************************************
bool CLodCharacterManager : : startTextureCompute ( CLodCharacterInstance & instance )
{
CRGBA * dst = getTextureInstance ( instance ) ;
if ( ! dst )
return false ;
// erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
CRGBA col = NL3D_CLOD_DEFAULT_TEXCOLOR ;
for ( uint y = 0 ; y < NL3D_CLOD_TEXT_HEIGHT ; y + + )
{
// erase the line
for ( uint x = 0 ; x < NL3D_CLOD_TEXT_WIDTH ; x + + )
dst [ x ] = col ;
// Next line
dst + = NL3D_CLOD_BIGTEXT_WIDTH ;
}
return true ;
}
// ***************************************************************************
void CLodCharacterManager : : addTextureCompute ( CLodCharacterInstance & instance , const CLodCharacterTexture & lodTexture )
{
CRGBA * dst = getTextureInstance ( instance ) ;
if ( ! dst )
return ;
// get lookup ptr.
nlassert ( lodTexture . Texture . size ( ) = = NL3D_CLOD_TEXT_SIZE ) ;
const CLodCharacterTexture : : CTUVQ * lookUpPtr = & lodTexture . Texture [ 0 ] ;
// apply the lodTexture, taking only better quality (ie nearer 0)
for ( uint y = 0 ; y < NL3D_CLOD_TEXT_HEIGHT ; y + + )
{
// erase the line
for ( uint x = 0 ; x < NL3D_CLOD_TEXT_WIDTH ; x + + )
{
CLodCharacterTexture : : CTUVQ lut = * lookUpPtr ;
// if this quality is better than the one stored
if ( lut . Q < dst [ x ] . A )
{
// get what texture to read, and read the pixel.
CRGBA col = _TmpBitmaps [ lut . T ] . getPixel ( lut . U , lut . V ) ;
// set quality.
col . A = lut . Q ;
// set in dest
dst [ x ] = col ;
}
// next lookup
lookUpPtr + + ;
}
// Next line
dst + = NL3D_CLOD_BIGTEXT_WIDTH ;
}
}
// ***************************************************************************
void CLodCharacterManager : : endTextureCompute ( CLodCharacterInstance & instance , uint numBmpToReset )
{
CRGBA * dst = getTextureInstance ( instance ) ;
if ( ! dst )
return ;
// reset All Alpha values to 255 => no AlphaTest problems
for ( uint y = 0 ; y < NL3D_CLOD_TEXT_HEIGHT ; y + + )
{
// erase the line
for ( uint x = 0 ; x < NL3D_CLOD_TEXT_WIDTH ; x + + )
{
dst [ x ] . A = 255 ;
}
// Next line
dst + = NL3D_CLOD_BIGTEXT_WIDTH ;
}
// If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
if ( instance . _TextureId = = 0 )
* ( CRGBA * ) & _BigTexture - > getPixels ( 0 ) [ 0 ] = NL3D_CLOD_DEFAULT_TEXCOLOR ;
// get the x/y.
uint xId = instance . _TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH ;
uint yId = instance . _TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH ;
// touch the texture for Driver update.
_BigTexture - > touchRect (
CRect ( xId * NL3D_CLOD_TEXT_WIDTH , yId * NL3D_CLOD_TEXT_HEIGHT , NL3D_CLOD_TEXT_WIDTH , NL3D_CLOD_TEXT_HEIGHT ) ) ;
// reset tmpBitmaps / free memory.
for ( uint i = 0 ; i < numBmpToReset ; i + + )
{
_TmpBitmaps [ i ] . reset ( ) ;
}
// TestYoyo
/*NLMISC::COFile f("tam.tga");
_BigTexture - > writeTGA ( f , 32 ) ; */
}
// ***************************************************************************
bool CLodCharacterManager : : fastIntersect ( const CLodCharacterInstance & instance , const NLMISC : : CMatrix & toRaySpace , float & dist2D , float & distZ , bool computeDist2D )
{
H_AUTO ( NL3D_CharacterLod_fastIntersect )
uint numVertices ;
const CLodCharacterShape : : CVector3s * vertPtr ;
CVector matPos ;
float a00 , a01 , a02 ;
float a10 , a11 , a12 ;
float a20 , a21 , a22 ;
// Get the Shape and current key.
//=============
// get the shape
const CLodCharacterShape * clod = getShape ( instance . ShapeId ) ;
// if not found quit
if ( ! clod )
return false ;
// get the anim key
CVector unPackScaleFactor ;
vertPtr = clod - > getAnimKey ( instance . AnimId , instance . AnimTime , instance . WrapMode , unPackScaleFactor ) ;
// if not found quit
if ( ! vertPtr )
return false ;
// get num verts
numVertices = clod - > getNumVertices ( ) ;
// empty shape??
if ( numVertices = = 0 )
return false ;
// Prepare Transform
//=============
// Get matrix pos.
matPos = toRaySpace . getPos ( ) ;
// Get rotation line vectors
const float * rayM = toRaySpace . get ( ) ;
a00 = rayM [ 0 ] ; a01 = rayM [ 4 ] ; a02 = rayM [ 8 ] ;
a10 = rayM [ 1 ] ; a11 = rayM [ 5 ] ; a12 = rayM [ 9 ] ;
a20 = rayM [ 2 ] ; a21 = rayM [ 6 ] ; a22 = rayM [ 10 ] ;
// multiply matrix with scale factor for Pos.
a00 * = unPackScaleFactor . x ; a01 * = unPackScaleFactor . y ; a02 * = unPackScaleFactor . z ;
a10 * = unPackScaleFactor . x ; a11 * = unPackScaleFactor . y ; a12 * = unPackScaleFactor . z ;
a20 * = unPackScaleFactor . x ; a21 * = unPackScaleFactor . y ; a22 * = unPackScaleFactor . z ;
// get dst Array.
// enlarge temp buffer
static std : : vector < CVector > lodInRaySpace ;
if ( numVertices > lodInRaySpace . size ( ) )
lodInRaySpace . resize ( numVertices ) ;
CVector * dstPtr = & lodInRaySpace [ 0 ] ;
// Fill the temp skin
//=============
{
CVector fVect ;
for ( ; numVertices > 0 ; )
{
// transform vertex, and store.
fVect . x = vertPtr - > x ; fVect . y = vertPtr - > y ; fVect . z = vertPtr - > z ;
+ + vertPtr ;
dstPtr - > x = a00 * fVect . x + a01 * fVect . y + a02 * fVect . z + matPos . x ;
dstPtr - > y = a10 * fVect . x + a11 * fVect . y + a12 * fVect . z + matPos . y ;
dstPtr - > z = a20 * fVect . x + a21 * fVect . y + a22 * fVect . z + matPos . z ;
// next
dstPtr + + ;
numVertices - - ;
}
}
// Test intersection
//=============
return CRayMesh : : getRayIntersection ( lodInRaySpace , clod - > getTriangleIndices ( ) , dist2D , distZ , computeDist2D ) ;
}
} // NL3D