khanat-opennel-code/code/nel/include/nel/misc/fast_mem.h

84 lines
2.7 KiB
C++

// NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
// Copyright (C) 2010 Winch Gate Property Limited
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef NL_FAST_MEM_H
#define NL_FAST_MEM_H
#include "types_nl.h"
namespace NLMISC
{
//typedef void *memcpyptr(void *dts, const void *src, size_t nbytes);
/**
* Functions for Fast Memory manipulation with Pentium-class processors.
* From http://www.sgi.com/developers/technology/irix/resources/asc_cpu.html
* \author Lionel Berenguier
* \author Nevrax France
* \date 2002
*/
class CFastMem
{
public:
/**
* This is a function pointer that points on the best memcpy function available depending of the OS and proc.
* In the best case, it will use memcpySSE(), and in worst case, it'll use the libc memcpy()
* Simply use it this way: CFastMem::memcpy(dst, src, size);
*/
static void *(*memcpy)(void *dts, const void *src, size_t nbytes);
/**
* Fast precaching of memory in L1 cache using SSE or MMX where available
* (NB: others methods don't do the test)
* nbytes should not override 4K
*/
static void precache(const void *src, uint nbytes);
/////////////////////////////////////////////
/**
* Fast memcpy using SSE instructions: prefetchnta and movntq. Can be called only if SSE and MMX is supported
* NB: Copy per block of 4K through L1 cache
* Result is typically 420 Mo/s instead of 150 Mo/s.
*/
static void *memcpySSE(void *dst, const void *src, size_t nbytes);
/**
* Fast precaching of memory in L1 cache using MMX/SSE instructions: movq and prefetchnta
* Result is typically 880 Mo/s (surely slower because of overhead).
* nbytes should not override 4K
*/
static void precacheSSE(const void *src, uint nbytes);
/**
* Fast precaching of memory in L1 cache using MMX instructions only: movq
* Result is typically 720 Mo/s (surely slower because of overhead).
* Hence prefer precacheSSE() when available.
* nbytes should not override 4K
*/
static void precacheMMX(const void *src, uint nbytes);
};
} // NLMISC
#endif // NL_FAST_MEM_H
/* End of fast_mem.h */