00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef FXMATHS_H
00023 #define FXMATHS_H
00024
00025 #include "FXProcess.h"
00026 #include "QThread.h"
00027 #include "FXStream.h"
00028 #include "qmemarray.h"
00029 #include <math.h>
00030 #include "FXVec2f.h"
00031 #include "FXVec2d.h"
00032 #include "FXVec3f.h"
00033 #include "FXVec3d.h"
00034 #include "FXMat3f.h"
00035 #include "FXMat3d.h"
00036 #include "FXVec4f.h"
00037 #include "FXVec4d.h"
00038 #include "FXMat4f.h"
00039 #include "FXMat4d.h"
00040 #if _M_IX86_FP>=3 || defined(__SSE3__)
00041 #include "pmmintrin.h"
00042 #endif
00043 #if _M_IX86_FP>=4 || defined(__SSE4__)
00044 #include "smmintrin.h"
00045
00046 #endif
00047
00048 namespace FX {
00049
00054 namespace Maths {
00079
00080 template<typename type> inline const type &min(const type &a, const type &b) { return (a<b) ? a : b; }
00082 template<typename type> inline const type &max(const type &a, const type &b) { return (a>b) ? a : b; }
00084 template<typename type> inline type sqrt(const type &v) { return ::sqrt(v); }
00085 template<> inline float sqrt<float>(const float &v) { return ::sqrtf(v); }
00087 template<typename type> inline type rcp(const type &v) { return 1/v; }
00089 template<typename type> inline type rsqrt(const type &v) { return 1/sqrt(v); }
00090
00091 namespace Impl
00092 {
00093 template<typename type, bool minus> struct CalcInfinity;
00094 template<bool minus> struct CalcInfinity<float, minus>
00095 {
00096 union
00097 {
00098 FXuint integer;
00099 float floatingpoint;
00100 };
00101 CalcInfinity()
00102 {
00103 integer=minus ? 0xff800000 : 0x7f800000;
00104 }
00105 operator const float &()
00106 {
00107 return floatingpoint;
00108 }
00109 };
00110 template<bool minus> struct CalcInfinity<double, minus>
00111 {
00112 union
00113 {
00114 FXulong integer;
00115 double floatingpoint;
00116 };
00117 CalcInfinity()
00118 {
00119 integer=minus ? 0xfff0000000000000ULL : 0x7ff0000000000000ULL;
00120 }
00121 operator const double &()
00122 {
00123 return floatingpoint;
00124 }
00125 };
00126 template<typename type, bool minus> struct InfinityValue
00127 {
00128 static const CalcInfinity<type, minus> &value()
00129 {
00130 static const CalcInfinity<type, minus> foo;
00131 return foo;
00132 }
00133 };
00134
00135 template<typename type> struct CalcNaN;
00136 template<> struct CalcNaN<float>
00137 {
00138 union
00139 {
00140 FXuint integer;
00141 float floatingpoint;
00142 };
00143 CalcNaN()
00144 {
00145 integer=0x7fffffff;
00146 }
00147 operator const float &() const
00148 {
00149 return floatingpoint;
00150 }
00151 };
00152 template<> struct CalcNaN<double>
00153 {
00154 union
00155 {
00156 FXulong integer;
00157 double floatingpoint;
00158 };
00159 CalcNaN()
00160 {
00161 integer=0x7fffffffffffffffULL;
00162 }
00163 operator const double &() const
00164 {
00165 return floatingpoint;
00166 }
00167 };
00168 template<typename type> struct NaNValue
00169 {
00170 static const CalcNaN<type> &value()
00171 {
00172 static const CalcNaN<type> foo;
00173 return foo;
00174 }
00175 };
00176 template<unsigned int size> struct TwoPowerMemAligner { };
00177 #if defined(_MSC_VER)
00178
00179 template<> struct FXMEMALIGNED(4) TwoPowerMemAligner<4> { TwoPowerMemAligner() { assert(!((FXuval)this & 3)); } };
00180 template<> struct FXMEMALIGNED(8) TwoPowerMemAligner<8> { TwoPowerMemAligner() { assert(!((FXuval)this & 7)); } };
00181 template<> struct FXMEMALIGNED(16) TwoPowerMemAligner<16> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00182 template<> struct FXMEMALIGNED(32) TwoPowerMemAligner<32> { TwoPowerMemAligner() { assert(!((FXuval)this & 31)); } };
00183 template<> struct FXMEMALIGNED(64) TwoPowerMemAligner<64> { TwoPowerMemAligner() { assert(!((FXuval)this & 63)); } };
00184 template<> struct FXMEMALIGNED(128) TwoPowerMemAligner<128> { TwoPowerMemAligner() { assert(!((FXuval)this & 127)); } };
00185 template<> struct FXMEMALIGNED(256) TwoPowerMemAligner<256> { TwoPowerMemAligner() { assert(!((FXuval)this & 255)); } };
00186 template<> struct FXMEMALIGNED(512) TwoPowerMemAligner<512> { TwoPowerMemAligner() { assert(!((FXuval)this & 511)); } };
00187 template<> struct FXMEMALIGNED(1024) TwoPowerMemAligner<1024> { TwoPowerMemAligner() { assert(!((FXuval)this & 1023)); } };
00188 #elif defined(__GNUC__)
00189 #if __GNUC__<4 || (__GNUC__==4 && __GNUC_MINOR__<3)
00190 #warning Before v4.3 GCC will not allow selective structure alignment - FX::Maths::Vector will not be aligned!
00191 #define FXVECTOR_BUGGYGCCALIGNMENTHACK FXMEMALIGNED(16)
00192 #else
00193
00194
00195 template<> struct FXMEMALIGNED(4) TwoPowerMemAligner<4> { TwoPowerMemAligner() { assert(!((FXuval)this & 3)); } };
00196 template<> struct FXMEMALIGNED(8) TwoPowerMemAligner<8> { TwoPowerMemAligner() { assert(!((FXuval)this & 7)); } };
00197 template<> struct FXMEMALIGNED(16) TwoPowerMemAligner<16> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00198 template<> struct FXMEMALIGNED(32) TwoPowerMemAligner<32> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00199 template<> struct FXMEMALIGNED(64) TwoPowerMemAligner<64> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00200 template<> struct FXMEMALIGNED(128) TwoPowerMemAligner<128> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00201 template<> struct FXMEMALIGNED(256) TwoPowerMemAligner<256> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00202 template<> struct FXMEMALIGNED(512) TwoPowerMemAligner<512> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00203 template<> struct FXMEMALIGNED(1024) TwoPowerMemAligner<1024> { TwoPowerMemAligner() { assert(!((FXuval)this & 15)); } };
00204 #endif
00205 #endif
00206 #ifndef FXVECTOR_BUGGYGCCALIGNMENTHACK
00207 #define FXVECTOR_BUGGYGCCALIGNMENTHACK
00208 #endif
00209 template<typename type, unsigned int A, class supertype, bool _isArithmetic, bool _isInteger, typename SIMDType=char> class VectorBase
00210 {
00211 protected:
00212 union
00213 {
00214 type data[A];
00215 SIMDType v;
00216 };
00217 public:
00219 typedef type TYPE;
00221 static const unsigned int DIMENSION=A;
00223 static const bool isArithmetic=_isArithmetic;
00225 static const bool isInteger=_isInteger;
00226 VectorBase() { }
00227 operator const supertype &() const { return static_cast<const supertype &>(*this); }
00228 VectorBase(const VectorBase &o) { for(FXuint n=0; n<A; n++) data[n]=o.data[n]; }
00229 VectorBase &operator=(const VectorBase &o) { for(FXuint n=0; n<A; n++) data[n]=o.data[n]; return *this; }
00230 explicit VectorBase(const type *d) { for(FXuint n=0; n<A; n++) data[n]=d ? d[n] : 0; }
00231 explicit VectorBase(const type &d) { for(FXuint n=0; n<A; n++) data[n]=d; }
00233 type operator[](unsigned int i) const { assert(i<A); return data[i]; }
00235 VectorBase &set(unsigned int i, const type &d) { assert(i<A); data[i]=d; return *this; }
00236 };
00237 #define VECTOR1OP(op) VectorBase operator op () const { VectorBase ret; for(FXuint n=0; n<A; n++) ret.data[n]=op Base::data[n]; return ret; }
00238 #define VECTOR2OP(op) VectorBase operator op (const VectorBase &o) const { VectorBase ret; for(FXuint n=0; n<A; n++) ret.data[n]=Base::data[n] op o.data[n]; return ret; }
00239 #define VECTORP2OP(op) VectorBase &operator op (const VectorBase &o) { for(FXuint n=0; n<A; n++) Base::data[n] op o.data[n]; return *this; }
00240 #define VECTORFUNC(op) friend VectorBase op(const VectorBase &o) { VectorBase ret; for(FXuint n=0; n<A; n++) ret.data[n]= Maths::op (o.data[n]); return ret; }
00241 #define VECTOR2FUNC(op) friend VectorBase op(const VectorBase &a, const VectorBase &b) { VectorBase ret; for(FXuint n=0; n<A; n++) ret.data[n]= op (a.data[n], b.data[n]); return ret; }
00242 template<typename type, unsigned int A, class supertype, bool isInteger, typename SIMDType> class VectorBase<type, A, supertype, true, isInteger, SIMDType> : private TwoPowerMemAligner<sizeof(type)*A>, public VectorBase<type, A, supertype, false, false, SIMDType>
00243 {
00244 typedef VectorBase<type, A, supertype, false, false, SIMDType> Base;
00245 public:
00246 static const bool isArithmetic=true;
00247 VectorBase() { }
00248 explicit VectorBase(const type *d) : Base(d) { }
00249 explicit VectorBase(const type &d) : Base(d) { }
00250
00251 VECTOR1OP(+) VECTOR1OP(-) VECTOR1OP(!)
00252 VECTOR2OP(+) VECTOR2OP(-) VECTOR2OP(*) VECTOR2OP(/)
00253 VECTORP2OP(+=) VECTORP2OP(-=) VECTORP2OP(*=) VECTORP2OP(/=)
00254
00255 VECTOR2OP( == ) VECTOR2OP( != ) VECTOR2OP( < ) VECTOR2OP( <= ) VECTOR2OP( > ) VECTOR2OP( >= )
00256 VECTOR2OP( && ) VECTOR2OP( || )
00257
00258 VECTORFUNC(sqrt) VECTORFUNC(rcp) VECTORFUNC(rsqrt)
00259 VECTOR2FUNC(min) VECTOR2FUNC(max)
00261 friend bool isZero(const VectorBase &a)
00262 {
00263 bool iszero=true;
00264 for(FXuint n=0; n<A && iszero; n++)
00265 iszero=iszero && !a.data[n];
00266 return iszero;
00267 }
00269 friend type sum(const VectorBase &a)
00270 {
00271 type ret=0;
00272 for(FXuint n=0; n<A; n++)
00273 ret+=a.data[n];
00274 return ret;
00275 }
00277 friend type dot(const VectorBase &a, const VectorBase &b)
00278 {
00279 type ret=0;
00280 for(FXuint n=0; n<A; n++)
00281 ret+=a.data[n]*b.data[n];
00282 return ret;
00283 }
00284 };
00285 template<typename type, unsigned int A, class supertype, typename SIMDType> class VectorBase<type, A, supertype, true, true, SIMDType> : public VectorBase<type, A, supertype, true, false, SIMDType>
00286 {
00287 typedef VectorBase<type, A, supertype, true, false, SIMDType> Base;
00288 public:
00289 static const bool isArithmetic=true;
00290 static const bool isInteger=true;
00291 VectorBase() { }
00292 explicit VectorBase(const type *d) : Base(d) { }
00293 explicit VectorBase(const type &d) : Base(d) { }
00294
00295
00296 VECTOR2OP(%) VECTOR2OP(&) VECTOR2OP(|) VECTOR2OP(^) VECTOR2OP(<<) VECTOR2OP(>>) VECTOR1OP(~)
00297 VECTORP2OP(%=) VECTORP2OP(&=) VECTORP2OP(|=) VECTORP2OP(^=) VECTORP2OP(<<=) VECTORP2OP(>>=)
00298
00300 friend VectorBase lshiftvec(const VectorBase &a, int shift)
00301 {
00302 VectorBase ret;
00303 FXuint offset=(shift/8)/sizeof(type);
00304 shift-=(offset*sizeof(type))*8;
00305 for(FXint n=A-1; n>=0; n--)
00306 ret.data[n]=(a.data[n-offset]<<shift)|((0==n) ? 0 : (a.data[n-offset-1]>>(8*sizeof(type)-shift)));
00307 return ret;
00308 }
00310 friend VectorBase rshiftvec(const VectorBase &a, int shift)
00311 {
00312 VectorBase ret;
00313 FXuint offset=(shift/8)/sizeof(type);
00314 shift-=(offset*sizeof(type))*8;
00315 for(FXuint n=0; n<A; n++)
00316 ret.data[n]=(a.data[n+offset]>>shift)|((A==n+offset+1) ? 0 : (a.data[n+offset+1]<<(8*sizeof(type)-shift)));
00317 return ret;
00318 }
00319 };
00320 #undef VECTOR1OP
00321 #undef VECTOR2OP
00322 #undef VECTORP2OP
00323 #undef VECTORFUNC
00324 #undef VECTOR2FUNC
00325
00326
00327 template<class base, typename type, class equivtype> class EquivType : public base
00328 {
00329 public:
00330 EquivType() { }
00331 template<typename F> explicit EquivType(const F &d) : base(d) { }
00332 EquivType &operator=(const equivtype &o) { return *this=*((const EquivType *)&o); }
00333 operator equivtype &() { return *((equivtype *)this); }
00334 operator const equivtype &() const { return *((const equivtype *)this); }
00335 };
00336 template<class base, typename type> class EquivType<base, type, void> : public base
00337 {
00338 public:
00339 EquivType() { }
00340 template<typename F> explicit EquivType(const F &d) : base(d) { }
00341 };
00342
00343
00344 template<class vectortype, unsigned int N, class supertype, bool _isArithmetic=vectortype::isArithmetic, bool _isInteger=vectortype::isInteger> class VectorOfVectors
00345 {
00346 protected:
00347 vectortype vectors[N];
00348 public:
00350 typedef typename vectortype::TYPE TYPE;
00352 static const unsigned int DIMENSION=vectortype::DIMENSION*N;
00354 static const bool isArithmetic=_isArithmetic;
00356 static const bool isInteger=_isInteger;
00357 VectorOfVectors() { }
00358 operator const supertype &() const { return static_cast<const supertype &>(*this); }
00359 VectorOfVectors(const VectorOfVectors &o)
00360 {
00361 for(FXuint n=0; n<N; n++)
00362 vectors[n]=o.vectors[n];
00363 }
00364 VectorOfVectors &operator=(const VectorOfVectors &o)
00365 {
00366 for(FXuint n=0; n<N; n++)
00367 vectors[n]=o.vectors[n];
00368 return *this;
00369 }
00370 explicit VectorOfVectors(const TYPE *d)
00371 {
00372 for(FXuint n=0; n<N; n++)
00373 vectors[n]=vectortype(FXOFFSETPTR(d, n*sizeof(vectortype)));
00374 }
00375 explicit VectorOfVectors(const TYPE &d)
00376 {
00377 for(FXuint n=0; n<N; n++)
00378 vectors[n]=vectortype(d);
00379 }
00381 TYPE operator[](unsigned int i) const { assert(i<DIMENSION); return vectors[i/vectortype::DIMENSION][i%vectortype::DIMENSION]; }
00383 VectorOfVectors &set(unsigned int i, const TYPE &d) { assert(i<DIMENSION); vectors[i/vectortype::DIMENSION].set(i%vectortype::DIMENSION, d); return *this; }
00384 };
00385 #define VECTOR1OP(op) VectorOfVectors operator op () const { VectorOfVectors ret; for(FXuint n=0; n<N; n++) ret.vectors[n]=op Base::vectors[n]; return ret; }
00386 #define VECTOR2OP(op) VectorOfVectors operator op (const VectorOfVectors &o) const { VectorOfVectors ret; for(FXuint n=0; n<N; n++) ret.vectors[n]=Base::vectors[n] op o.vectors[n]; return ret; }
00387 #define VECTORP2OP(op) VectorOfVectors &operator op (const VectorOfVectors &o) { for(FXuint n=0; n<N; n++) Base::vectors[n] op o.vectors[n]; return *this; }
00388 #define VECTORFUNC(op) friend VectorOfVectors op(const VectorOfVectors &o) { VectorOfVectors ret; for(FXuint n=0; n<N; n++) ret.vectors[n]= op (o.vectors[n]); return ret; }
00389 #define VECTOR2FUNC(op) friend VectorOfVectors op(const VectorOfVectors &a, const VectorOfVectors &b) { VectorOfVectors ret; for(FXuint n=0; n<N; n++) ret.vectors[n]= op (a.vectors[n], b.vectors[n]); return ret; }
00390 template<class vectortype, unsigned int N, class supertype, bool isInteger> class VectorOfVectors<vectortype, N, supertype, true, isInteger> : public VectorOfVectors<vectortype, N, supertype, false, false>
00391 {
00392 protected:
00393 typedef VectorOfVectors<vectortype, N, supertype, false, false> Base;
00394 public:
00395 typedef typename vectortype::TYPE TYPE;
00396 VectorOfVectors() { }
00397 explicit VectorOfVectors(const TYPE *d) : Base(d) { }
00398 explicit VectorOfVectors(const TYPE &d) : Base(d) { }
00399
00400 VECTOR1OP(+) VECTOR1OP(-) VECTOR1OP(!)
00401 VECTOR2OP(+) VECTOR2OP(-) VECTOR2OP(*) VECTOR2OP(/)
00402 VECTORP2OP(+=) VECTORP2OP(-=) VECTORP2OP(*=) VECTORP2OP(/=)
00403
00404 VECTOR2OP( == ) VECTOR2OP( != ) VECTOR2OP( < ) VECTOR2OP( <= ) VECTOR2OP( > ) VECTOR2OP( >= )
00405 VECTOR2OP( && ) VECTOR2OP( || )
00406
00407 VECTORFUNC(sqrt) VECTORFUNC(rcp) VECTORFUNC(rsqrt)
00408 VECTOR2FUNC(min) VECTOR2FUNC(max)
00410 friend bool isZero(const VectorOfVectors &a)
00411 {
00412 bool iszero=true;
00413 for(FXuint n=0; n<N && iszero; n++)
00414 iszero&=isZero(a.vectors[n]);
00415 return iszero;
00416 }
00418 friend TYPE sum(const VectorOfVectors &a)
00419 {
00420 TYPE ret=0;
00421 for(FXuint n=0; n<N; n++)
00422 ret+=sum(a.vectors[n]);
00423 return ret;
00424 }
00426 friend TYPE dot(const VectorOfVectors &a, const VectorOfVectors &b)
00427 {
00428 TYPE ret=0;
00429 for(FXuint n=0; n<N; n++)
00430 ret+=dot(a.vectors[n], b.vectors[n]);
00431 return ret;
00432 }
00433 };
00434 template<class vectortype, unsigned int N, class supertype> class VectorOfVectors<vectortype, N, supertype, true, true> : public VectorOfVectors<vectortype, N, supertype, true, false>
00435 {
00436 protected:
00437 typedef VectorOfVectors<vectortype, N, supertype, true, false> Base;
00438 public:
00439 typedef typename vectortype::TYPE TYPE;
00440 VectorOfVectors() { }
00441 explicit VectorOfVectors(const TYPE *d) : Base(d) { }
00442 explicit VectorOfVectors(const TYPE &d) : Base(d) { }
00443
00444
00445 VECTOR2OP(%) VECTOR2OP(&) VECTOR2OP(|) VECTOR2OP(^) VECTOR2OP(<<) VECTOR2OP(>>) VECTOR1OP(~)
00446 VECTORP2OP(%=) VECTORP2OP(&=) VECTORP2OP(|=) VECTORP2OP(^=) VECTORP2OP(<<=) VECTORP2OP(>>=)
00447
00448 VECTOR2FUNC(lshiftvec) VECTOR2FUNC(rshiftvec)
00449 };
00450 #undef VECTOR1OP
00451 #undef VECTOR2OP
00452 #undef VECTORP2OP
00453 #undef VECTORFUNC
00454 #undef VECTOR2FUNC
00455 }
00459 template<typename type, bool minus=false> struct InfinityValue : public Impl::InfinityValue<type, minus> { };
00463 template<typename type> struct NaNValue : public Impl::NaNValue<type> { };
00465 template<typename type> inline bool isNaN(type val) throw();
00466 template<> inline bool isNaN<float>(float val) throw() { union { float f; FXuint i; } v; v.f=val; return v.i==NaNValue<float>::value().integer; }
00467 template<> inline bool isNaN<double>(double val) throw() { union { double f; FXulong i; } v; v.f=val; return v.i==NaNValue<double>::value().integer; }
00468
00549 template<typename type, unsigned int A> class Vector : public Impl::EquivType<Impl::VectorBase<type, A, Vector<type, A>, Generic::Traits<type>::isArithmetical, Generic::Traits<type>::isInt>, type, void>
00550 {
00551 typedef Impl::EquivType<Impl::VectorBase<type, A, Vector<type, A>, Generic::Traits<type>::isArithmetical, Generic::Traits<type>::isInt>, type, void> Base;
00552 public:
00553 Vector() { }
00555 explicit Vector(const type *d) : Base(d) { }
00557 explicit Vector(const type &d) : Base(d) { }
00558 };
00559
00560 #define DEFINEVECTOREQUIV(type, A, equivtype) \
00561 template<> class Vector<type, A> : public Impl::EquivType<Impl::VectorBase<type, A, Vector<type, A>, Generic::Traits<type>::isArithmetical, Generic::Traits<type>::isInt>, type, void> \
00562 { \
00563 typedef Impl::EquivType<Impl::VectorBase<type, A, Vector<type, A>, Generic::Traits<type>::isArithmetical, Generic::Traits<type>::isInt>, type, void> Base; \
00564 public: \
00565 Vector() { } \
00566 explicit Vector(const type *d) : Base(d) { } \
00567 explicit Vector(const type &d) : Base(d) { } \
00568 };
00569 DEFINEVECTOREQUIV(FXuchar, 8, FXulong)
00570 DEFINEVECTOREQUIV(FXushort, 4, FXulong)
00571 DEFINEVECTOREQUIV(FXuint, 2, FXulong)
00572 DEFINEVECTOREQUIV(FXuchar, 4, FXuint)
00573 DEFINEVECTOREQUIV(FXushort, 2, FXuint)
00574 DEFINEVECTOREQUIV(FXuchar, 2, FXushort)
00575 DEFINEVECTOREQUIV(float, 2, FXVec2f)
00576 DEFINEVECTOREQUIV(float, 3, FXVec3f)
00577 typedef Vector<float, 2> Vector2f;
00578 typedef Vector<float, 3> Vector3f;
00579 typedef Vector<float, 4> Vector4f;
00580 DEFINEVECTOREQUIV(double, 3, FXVec3d)
00581 typedef Vector<double, 2> Vector2d;
00582 typedef Vector<double, 3> Vector3d;
00583 typedef Vector<double, 4> Vector4d;
00584
00585 template<typename type, unsigned int A> FXStream &operator<<(FXStream &s, const Vector<type, A> &v)
00586 {
00587 for(FXuint n=0; n<A; n++) s << v[n];
00588 return s;
00589 }
00590 template<typename type, unsigned int A> FXStream &operator>>(FXStream &s, Vector<type, A> &v)
00591 {
00592 type t;
00593 for(FXuint n=0; n<A; n++) { s >> t; v.set(n, t); }
00594 return s;
00595 }
00597 #define FXVECTOROFVECTORS(VECTORTYPE, ELEMENTS, equivtype) template<> class Vector<VECTORTYPE::TYPE, ELEMENTS> : public Impl::EquivType<Impl::VectorOfVectors<VECTORTYPE, ELEMENTS/VECTORTYPE::DIMENSION, Vector<VECTORTYPE::TYPE, ELEMENTS> >, VECTORTYPE::TYPE, equivtype> \
00598 { \
00599 typedef Impl::EquivType<Impl::VectorOfVectors<VECTORTYPE, ELEMENTS/VECTORTYPE::DIMENSION, Vector<VECTORTYPE::TYPE, ELEMENTS> >, VECTORTYPE::TYPE, equivtype> Base; \
00600 public: \
00601 Vector() { } \
00602 explicit Vector(const VECTORTYPE::TYPE *d) : Base(d) { } \
00603 explicit Vector(const VECTORTYPE::TYPE &d) : Base(d) { } \
00604 };
00605
00606
00607 #if 1 // Use to disable SIMD optimised versions
00608 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
00609
00610 #if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=1) || (defined(__i386__) && defined(__SSE__))
00611 #define FXVECTOR_SPECIALISEDFLOAT4
00612 template<> class FXVECTOR_BUGGYGCCALIGNMENTHACK Vector<float, 4> : private Impl::TwoPowerMemAligner<16>
00613 {
00614 public:
00615 typedef float TYPE;
00616 static const unsigned int DIMENSION=4;
00617 static const bool isArithmetic=true;
00618 static const bool isInteger=false;
00619 private:
00620 typedef __m128 SSETYPE;
00621 SSETYPE v;
00622 static TYPE int_extract(SSETYPE v, unsigned int i)
00623 {
00624 SSETYPE t;
00625 switch(i)
00626 {
00627 case 0:
00628 t=v; break;
00629 case 1:
00630 t=_mm_shuffle_ps(v, v, 1); break;
00631 case 2:
00632 t=_mm_shuffle_ps(v, v, 2); break;
00633 case 3:
00634 t=_mm_shuffle_ps(v, v, 3); break;
00635 }
00636 TYPE ret;
00637 _mm_store_ss(&ret, t);
00638 return ret;
00639 }
00640 static SSETYPE int_ones()
00641 {
00642 static SSETYPE v=_mm_set1_ps(1);
00643 return v;
00644 }
00645 static SSETYPE int_not()
00646 {
00647 static SSETYPE v=_mm_cmpeq_ps(int_ones(), int_ones());
00648 return v;
00649 }
00650 static SSETYPE int_notones()
00651 {
00652 static SSETYPE v=_mm_xor_ps(int_ones(), int_not());
00653 return v;
00654 }
00655 public:
00656 Vector() { }
00657 Vector(const SSETYPE &_v) { v=_v; }
00658 Vector(const Vector &o) { v=o.v; }
00659 Vector &operator=(const Vector &o) { v=o.v; return *this; }
00660
00661 Vector(const FXVec4f &o) { v=_mm_loadu_ps((float*)&o); }
00662 Vector &operator=(const FXVec4f &o) { v=_mm_loadu_ps((float*)&o); return *this; }
00663 operator FXVec4f &() { return *((FXVec4f *)this); }
00664 operator const FXVec4f &() const { return *((const FXVec4f *)this); }
00665
00666 explicit Vector(const TYPE *d)
00667 {
00668 if(!d) v=_mm_setzero_ps();
00669 else v=_mm_loadu_ps(d);
00670 }
00671 explicit Vector(const TYPE &d)
00672 {
00673 v=_mm_set1_ps(d);
00674 }
00675 TYPE operator[](unsigned int i) const
00676 {
00677 return int_extract(v, i);
00678 }
00679 Vector &set(unsigned int i, const TYPE &d)
00680 {
00681 union { TYPE f[DIMENSION]; SSETYPE v; } t;
00682 assert(i<DIMENSION);
00683 t.v=v;
00684 t.f[i]=d;
00685 v=t.v;
00686 return *this;
00687 }
00688 Vector operator +() const { return *this; }
00689 Vector operator -() const { Vector ret((TYPE *)0); ret-=*this; return ret; }
00690 Vector operator !() const { return _mm_and_ps(int_ones(), _mm_cmpneq_ps(_mm_setzero_ps(), v)); }
00691 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_ ##sseop## _ps(v, o.v); }
00692 #define VECTORP2OP(op, sseop) Vector &operator op (const Vector &o) { v=_mm_ ##sseop## _ps(v, o.v); return *this; }
00693 #define VECTORFUNC(op) friend Vector op(const Vector &o) { return _mm_ ##op## _ps(o.v); }
00694 #define VECTOR2FUNC(op) friend Vector op(const Vector &a, const Vector &b) { return _mm_ ##op## _ps(a.v, b.v); }
00695 VECTOR2OP(+, add) VECTOR2OP(-, sub) VECTOR2OP(*, mul) VECTOR2OP(/, div)
00696 VECTORP2OP(+=, add) VECTORP2OP(-=, sub) VECTORP2OP(*=, mul) VECTORP2OP(/=, div)
00697
00698 #undef VECTOR2OP
00699 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_and_ps(int_ones(), _mm_cmp ##sseop## _ps(v, o.v)); }
00700 VECTOR2OP( == , eq) VECTOR2OP( != , neq) VECTOR2OP( < , lt) VECTOR2OP( <= , le) VECTOR2OP( > , gt) VECTOR2OP( >= , ge)
00701 #undef VECTOR2OP
00702 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_and_ps(int_ones(), _mm_cmpneq_ps(_mm_setzero_ps(), _mm_ ##sseop## _ps(v, o.v))); }
00703 VECTOR2OP( && , and) VECTOR2OP( || , or)
00704
00705 VECTORFUNC(sqrt) VECTORFUNC(rcp) VECTORFUNC(rsqrt)
00706 VECTOR2FUNC(min) VECTOR2FUNC(max)
00707 #undef VECTOR1OP
00708 #undef VECTOR2OP
00709 #undef VECTORP2OP
00710 #undef VECTORFUNC
00711 #undef VECTOR2FUNC
00712 friend bool isZero(const Vector &a)
00713 {
00714 return !_mm_movemask_ps(a.v)&&!_mm_movemask_ps(_mm_sub_ps(_mm_setzero_ps(), a.v));
00715 }
00716 friend TYPE sum(const Vector &a)
00717 {
00718 #if _M_IX86_FP>=3 || defined(__SSE3__)
00719 SSETYPE v=_mm_hadd_ps(a.v, a.v);
00720 return int_extract(_mm_hadd_ps(v, v), 0);
00721 #elif 1
00722
00723
00724 SSETYPE tempA = _mm_shuffle_ps(a.v,a.v, _MM_SHUFFLE(2,0,2,0));
00725 SSETYPE tempB = _mm_shuffle_ps(a.v,a.v, _MM_SHUFFLE(3,1,3,1));
00726 SSETYPE tempC = _mm_add_ps(tempB, tempA);
00727 tempA = _mm_shuffle_ps(tempC,tempC, _MM_SHUFFLE(2,0,2,0));
00728 tempB = _mm_shuffle_ps(tempC,tempC, _MM_SHUFFLE(3,1,3,1));
00729 tempC = _mm_add_ss(tempB, tempA);
00730 return int_extract(tempC, 0);
00731 #else
00732 FXMEMALIGNED(16) TYPE f[DIMENSION];
00733 _mm_store_ps(f, a.v);
00734 return f[0]+f[1]+f[2]+f[3];
00735 #endif
00736 }
00737 friend TYPE dot(const Vector &a, const Vector &b)
00738 {
00739 #if _M_IX86_FP>=4 || defined(__SSE4__)
00740
00741 return int_extract(_mm_dp_ps(a.v, b.v, 0xf1), 0);
00742 #else
00743
00744 return sum(a*b);
00745 #endif
00746 }
00747 friend inline SSETYPE &GetSSEVal(Vector &a) { return a.v; }
00748 friend inline const SSETYPE &GetSSEVal(const Vector &a) { return a.v; }
00749 };
00750 typedef Vector<float, 4> int_SSEOptimised_float4;
00751 FXVECTOROFVECTORS(int_SSEOptimised_float4, 8, void);
00752 FXVECTOROFVECTORS(int_SSEOptimised_float4, 16, void);
00753 FXVECTOROFVECTORS(int_SSEOptimised_float4, 32, void);
00754 FXVECTOROFVECTORS(int_SSEOptimised_float4, 64, void);
00755 FXVECTOROFVECTORS(int_SSEOptimised_float4, 128, void);
00756 FXVECTOROFVECTORS(int_SSEOptimised_float4, 256, void);
00757 #endif
00758 #if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=2) || (defined(__i386__) && defined(__SSE2__))
00759 #define FXVECTOR_SPECIALISEDDOUBLE2
00760 template<> class FXVECTOR_BUGGYGCCALIGNMENTHACK Vector<double, 2> : private Impl::TwoPowerMemAligner<16>
00761 {
00762 public:
00763 typedef double TYPE;
00764 static const unsigned int DIMENSION=2;
00765 static const bool isArithmetic=true;
00766 static const bool isInteger=false;
00767 private:
00768 typedef __m128d SSETYPE;
00769 SSETYPE v;
00770 static TYPE int_extract(SSETYPE v, unsigned int i)
00771 {
00772 SSETYPE t;
00773 switch(i)
00774 {
00775 case 0:
00776 t=v; break;
00777 case 1:
00778 t=_mm_shuffle_pd(v, v, 1); break;
00779 }
00780 TYPE ret;
00781 _mm_store_sd(&ret, t);
00782 return ret;
00783 }
00784 static SSETYPE int_ones()
00785 {
00786 static SSETYPE v=_mm_set1_pd(1);
00787 return v;
00788 }
00789 static SSETYPE int_not()
00790 {
00791 static SSETYPE v=_mm_cmpeq_pd(int_ones(), int_ones());
00792 return v;
00793 }
00794 static SSETYPE int_notones()
00795 {
00796 static SSETYPE v=_mm_xor_pd(int_ones(), int_not());
00797 return v;
00798 }
00799 public:
00800 Vector() { }
00801 Vector(const SSETYPE &_v) { v=_v; }
00802 Vector(const Vector &o) { v=o.v; }
00803 Vector &operator=(const Vector &o) { v=o.v; return *this; }
00804
00805 Vector(const FXVec2d &o) { v=_mm_loadu_pd((double*)&o); }
00806 Vector &operator=(const FXVec2d &o) { v=_mm_loadu_pd((double*)&o); return *this; }
00807 operator FXVec2d &() { return *((FXVec2d *)this); }
00808 operator const FXVec2d &() const { return *((const FXVec2d *)this); }
00809
00810 explicit Vector(const TYPE *d)
00811 {
00812 if(!d) v=_mm_setzero_pd();
00813 else v=_mm_loadu_pd(d);
00814 }
00815 explicit Vector(const TYPE &d)
00816 {
00817 v=_mm_set1_pd(d);
00818 }
00819 TYPE operator[](unsigned int i) const
00820 {
00821 return int_extract(v, i);
00822 }
00823 Vector &set(unsigned int i, const TYPE &d)
00824 {
00825 union { TYPE f[DIMENSION]; SSETYPE v; } t;
00826 assert(i<DIMENSION);
00827 t.v=v;
00828 t.f[i]=d;
00829 v=t.v;
00830 return *this;
00831 }
00832 Vector operator +() const { return *this; }
00833 Vector operator -() const { Vector ret((TYPE *)0); ret-=*this; return ret; }
00834 Vector operator !() const { return _mm_and_pd(int_ones(), _mm_cmpneq_pd(_mm_setzero_pd(), v)); }
00835 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_ ##sseop## _pd(v, o.v); }
00836 #define VECTORP2OP(op, sseop) Vector &operator op (const Vector &o) { v=_mm_ ##sseop## _pd(v, o.v); return *this; }
00837 #define VECTORFUNC(op) friend Vector op(const Vector &o) { return _mm_ ##op## _pd(o.v); }
00838 #define VECTOR2FUNC(op) friend Vector op(const Vector &a, const Vector &b) { return _mm_ ##op## _pd(a.v, b.v); }
00839 VECTOR2OP(+, add) VECTOR2OP(-, sub) VECTOR2OP(*, mul) VECTOR2OP(/, div)
00840 VECTORP2OP(+=, add) VECTORP2OP(-=, sub) VECTORP2OP(*=, mul) VECTORP2OP(/=, div)
00841
00842 #undef VECTOR2OP
00843 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_and_pd(int_ones(), _mm_cmp ##sseop## _pd(v, o.v)); }
00844 VECTOR2OP( == , eq) VECTOR2OP( != , neq) VECTOR2OP( < , lt) VECTOR2OP( <= , le) VECTOR2OP( > , gt) VECTOR2OP( >= , ge)
00845 #undef VECTOR2OP
00846 #define VECTOR2OP(op, sseop) Vector operator op (const Vector &o) const { return _mm_and_pd(int_ones(), _mm_cmpneq_pd(_mm_setzero_pd(), _mm_ ##sseop## _pd(v, o.v))); }
00847 VECTOR2OP( && , and) VECTOR2OP( || , or)
00848
00849 VECTORFUNC(sqrt)
00850 VECTOR2FUNC(min) VECTOR2FUNC(max)
00851 #undef VECTOR1OP
00852 #undef VECTOR2OP
00853 #undef VECTORP2OP
00854 #undef VECTORFUNC
00855 #undef VECTOR2FUNC
00856 friend Vector rcp(const Vector &o) { return _mm_div_pd(_mm_set1_pd(1), o.v); }
00857 friend Vector rsqrt(const Vector &o) { return _mm_div_pd(_mm_set1_pd(1), _mm_sqrt_pd(o.v)); }
00858 friend bool isZero(const Vector &a)
00859 {
00860 return !_mm_movemask_pd(a.v)&&!_mm_movemask_pd(_mm_sub_pd(_mm_setzero_pd(), a.v));
00861 }
00862 friend TYPE sum(const Vector &a)
00863 {
00864 #if _M_IX86_FP>=3 || defined(__SSE3__)
00865 return int_extract(_mm_hadd_pd(a.v, a.v), 0);
00866 #elif 1
00867 SSETYPE tempA = _mm_shuffle_pd(a.v,a.v, _MM_SHUFFLE2(0,0));
00868 SSETYPE tempB = _mm_shuffle_pd(a.v,a.v, _MM_SHUFFLE2(1,1));
00869 return int_extract(_mm_add_sd(tempB, tempA), 0);
00870 #else
00871 FXMEMALIGNED(16) TYPE f[DIMENSION];
00872 _mm_store_pd(f, a.v);
00873 return f[0]+f[1];
00874 #endif
00875 }
00876 friend TYPE dot(const Vector &a, const Vector &b)
00877 {
00878 #if _M_IX86_FP>=4 || defined(__SSE4__)
00879
00880 return int_extract(_mm_dp_pd(a.v, b.v, 0xf1), 0);
00881 #else
00882
00883 return sum(a*b);
00884 #endif
00885 }
00886 friend inline SSETYPE &GetSSEVal(Vector &a) { return a.v; }
00887 friend inline const SSETYPE &GetSSEVal(const Vector &a) { return a.v; }
00888 };
00889 typedef Vector<double, 2> int_SSEOptimised_double2;
00890 FXVECTOROFVECTORS(int_SSEOptimised_double2, 4, FXVec4d);
00891 FXVECTOROFVECTORS(int_SSEOptimised_double2, 8, void);
00892 FXVECTOROFVECTORS(int_SSEOptimised_double2, 16, void);
00893 FXVECTOROFVECTORS(int_SSEOptimised_double2, 32, void);
00894 FXVECTOROFVECTORS(int_SSEOptimised_double2, 64, void);
00895 FXVECTOROFVECTORS(int_SSEOptimised_double2, 128, void);
00896
00897
00898
00899 #define VECTOR2OP_A(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_ ##sseop## _ ##sseending (v, o.v); }
00900 #define VECTORP2OP_A(op, sseop, sseending) Vector &operator op (const Vector &o) { v=_mm_ ##sseop## _ ##sseending (v, o.v); return *this; }
00901 #define VECTOR2OP_A2(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_ ##sseop## _ ##sseending (v, _mm_cvtsi32_si128(_mm_cvtsi128_si32(o.v))); }
00902 #define VECTORP2OP_A2(op, sseop, sseending) Vector &operator op (const Vector &o) { v=_mm_ ##sseop## _ ##sseending (v, _mm_cvtsi32_si128(_mm_cvtsi128_si32(o.v))); return *this; }
00903 #define VECTOR2OP_B(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_ ##sseop## _si128(v, o.v); }
00904 #define VECTORP2OP_B(op, sseop, sseending) Vector &operator op (const Vector &o) { v=_mm_ ##sseop## _si128(v, o.v); return *this; }
00905 #define VECTOR2OP_C(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_and_si128(int_ones(), _mm_cmp ##sseop## _ ##sseending (v, o.v)); }
00906 #define VECTOR2OP_D(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_andnot_si128(_mm_cmp ##sseop## _ ##sseending (v, o.v), int_ones()); }
00907 #define VECTOR2OP_E(op, sseop, sseending) Vector operator op (const Vector &o) const { return _mm_xor_si128(int_ones(), _mm_cmpeq_ ##sseending (_mm_setzero_si128(), _mm_ ##sseop## _si128(v, o.v))); }
00908 #define VECTOR2FUNC(op, sseop, sseending) friend Vector op(const Vector &a, const Vector &b) { return _mm_ ##sseop## _ ##sseending (a.v, b.v); }
00909 #if _M_IX86_FP>=4 || defined(__SSE4__)
00910 #define VECTOR2OP_A_SSE4ONLY(op, sseop, sseending) VECTOR2OP_A(op, sseop, sseending)
00911 #define VECTORP2OP_A_SSE4ONLY(op, sseop, sseending) VECTORP2OP_A(op, sseop, sseending)
00912 #define VECTOR2FUNC_SSE4ONLY(op, sseop, sseending) VECTOR2FUNC(op, sseop, sseending)
00913 #define VECTORISZERO 1==_mm_testc_si128(a.v, _mm_setzero_si128())
00914 #else
00915 #define VECTOR2OP_A_SSE4ONLY(op, sseop, sseending)
00916 #define VECTORP2OP_A_SSE4ONLY(op, sseop, sseending)
00917 #define VECTOR2FUNC_SSE4ONLY(op, sseop, sseending)
00918 #define VECTORISZERO 65535==_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_setzero_si128(), a.v))
00919 #endif
00920 #define VECTORINTEGER(vint, vsize, sseending, signage1, signage2) \
00921 template<> class FXVECTOR_BUGGYGCCALIGNMENTHACK Vector<vint, vsize> : public Impl::VectorBase<vint, vsize, Vector<vint, vsize>, true, true, __m128i> \
00922 { \
00923 private: \
00924 typedef Impl::VectorBase<vint, vsize, Vector<vint, vsize>, true, true, __m128i> Base; \
00925 typedef __m128i SSETYPE; \
00926 static SSETYPE int_ones() \
00927 { \
00928 static SSETYPE v=_mm_set1_ ##sseending (1); \
00929 return v; \
00930 } \
00931 static SSETYPE int_not() \
00932 { \
00933 static SSETYPE v=_mm_cmpeq_ ##sseending (int_ones(), int_ones()); \
00934 return v; \
00935 } \
00936 static SSETYPE int_notones() \
00937 { \
00938 static SSETYPE v=_mm_xor_si128(int_ones(), int_not()); \
00939 return v; \
00940 } \
00941 public: \
00942 Vector() { } \
00943 Vector(const SSETYPE &_v) { v=_v; } \
00944 Vector(const Vector &o) { v=o.v; } \
00945 Vector &operator=(const Vector &o) { v=o.v; return *this; } \
00946 explicit Vector(const TYPE *d) \
00947 { \
00948 if(!d) v=_mm_setzero_si128(); \
00949 else v=_mm_loadu_si128((SSETYPE *) d); \
00950 } \
00951 explicit Vector(const TYPE &d) \
00952 { \
00953 v=_mm_set1_ ##sseending (d); \
00954 } \
00955 Vector operator +() const { return *this; } \
00956 Vector operator -() const { Vector ret((TYPE *)0); ret-=*this; return ret; } \
00957 Vector operator !() const { return _mm_andnot_si128(_mm_cmpeq_ ##sseending (_mm_setzero_si128(), v), int_ones()); } \
00958 Vector operator ~() const { return _mm_xor_si128(int_not(), v); } \
00959 VECTOR2OP_A(+, add, sseending) VECTOR2OP_A(-, sub, sseending) VECTOR2OP_A2(<<, sll, sseending) VECTOR2OP_A2(>>, sr ## signage1, sseending) \
00960 VECTORP2OP_A(+=, add, sseending) VECTORP2OP_A(-=, sub, sseending) VECTORP2OP_A2(<<=, sll, sseending) VECTORP2OP_A2(>>=, sr ## signage1, sseending) \
00961 VECTOR2OP_A_SSE4ONLY(*, mullo, sseending) \
00962 VECTORP2OP_A_SSE4ONLY(*, mullo, sseending) \
00963 \
00964 VECTOR2OP_B( & , and, sseending) VECTOR2OP_B( | , or, sseending) VECTOR2OP_B( ^ , xor, sseending) \
00965 VECTORP2OP_B( &= , and, sseending) VECTORP2OP_B( |= , or, sseending) VECTORP2OP_B( ^= , xor, sseending) \
00966 \
00967 VECTOR2OP_C( == , eq, sseending) VECTOR2OP_C( < , lt, sseending) VECTOR2OP_C( > , gt, sseending) \
00968 VECTOR2OP_D( != , eq, sseending) VECTOR2OP_D( <= , gt, sseending) VECTOR2OP_D( >= , lt, sseending) \
00969 VECTOR2OP_E( && , and, sseending) VECTOR2OP_E( || , or, sseending) \
00970 \
00971 VECTOR2FUNC_SSE4ONLY(min, min, signage2) VECTOR2FUNC_SSE4ONLY(max, max, signage2) \
00972 friend bool isZero(const Vector &a) \
00973 { \
00974 return VECTORISZERO; \
00975 } \
00976 friend Vector lshiftvec(const Vector &a, int shift) \
00977 { \
00978 if(shift&7) \
00979 return lshiftvec(static_cast<const Vector::Base &>(a), shift); \
00980 else switch(shift/8) \
00981 { \
00982 case 0: return a; \
00983 case 1: return _mm_slli_si128(a.v, 1); \
00984 case 2: return _mm_slli_si128(a.v, 2); \
00985 case 3: return _mm_slli_si128(a.v, 3); \
00986 case 4: return _mm_slli_si128(a.v, 4); \
00987 case 5: return _mm_slli_si128(a.v, 5); \
00988 case 6: return _mm_slli_si128(a.v, 6); \
00989 case 7: return _mm_slli_si128(a.v, 7); \
00990 case 8: return _mm_slli_si128(a.v, 8); \
00991 case 9: return _mm_slli_si128(a.v, 9); \
00992 case 10: return _mm_slli_si128(a.v, 10); \
00993 case 11: return _mm_slli_si128(a.v, 11); \
00994 case 12: return _mm_slli_si128(a.v, 12); \
00995 case 13: return _mm_slli_si128(a.v, 13); \
00996 case 14: return _mm_slli_si128(a.v, 14); \
00997 case 15: return _mm_slli_si128(a.v, 15); \
00998 } \
00999 return _mm_setzero_si128(); \
01000 } \
01001 friend Vector rshiftvec(const Vector &a, int shift) \
01002 { \
01003 if(shift&7) \
01004 return rshiftvec(static_cast<const Vector::Base &>(a), shift); \
01005 else switch(shift/8) \
01006 { \
01007 case 0: return a; \
01008 case 1: return _mm_srli_si128(a.v, 1); \
01009 case 2: return _mm_srli_si128(a.v, 2); \
01010 case 3: return _mm_srli_si128(a.v, 3); \
01011 case 4: return _mm_srli_si128(a.v, 4); \
01012 case 5: return _mm_srli_si128(a.v, 5); \
01013 case 6: return _mm_srli_si128(a.v, 6); \
01014 case 7: return _mm_srli_si128(a.v, 7); \
01015 case 8: return _mm_srli_si128(a.v, 8); \
01016 case 9: return _mm_srli_si128(a.v, 9); \
01017 case 10: return _mm_srli_si128(a.v, 10); \
01018 case 11: return _mm_srli_si128(a.v, 11); \
01019 case 12: return _mm_srli_si128(a.v, 12); \
01020 case 13: return _mm_srli_si128(a.v, 13); \
01021 case 14: return _mm_srli_si128(a.v, 14); \
01022 case 15: return _mm_srli_si128(a.v, 15); \
01023 } \
01024 return _mm_setzero_si128(); \
01025 } \
01026 friend inline SSETYPE &GetSSEVal(Vector &a) { return a.v; } \
01027 friend inline const SSETYPE &GetSSEVal(const Vector &a) { return a.v; } \
01028 };
01029
01030
01031 VECTORINTEGER(FXushort, 8, epi16, l, epu16)
01032 VECTORINTEGER(FXshort, 8, epi16, a, epi16)
01033 VECTORINTEGER(FXuint, 4, epi32, l, epu32)
01034 VECTORINTEGER(FXint, 4, epi32, a, epi32)
01035
01036
01037 #undef VECTOR2OP_A
01038 #undef VECTORP2OP_A
01039 #undef VECTOR2OP_A2
01040 #undef VECTORP2OP_A2
01041 #undef VECTOR2OP_B
01042 #undef VECTORP2OP_B
01043 #undef VECTOR2OP_C
01044 #undef VECTOR2OP_D
01045 #undef VECTOR2OP_E
01046 #undef VECTOR2FUNC
01047 #undef VECTOR2OP_A_SSE4ONLY
01048 #undef VECTORP2OP_A_SSE4ONLY
01049 #undef VECTOR2FUNC_SSE4ONLY
01050 #undef VECTORISZERO
01051 #undef VECTORINTEGER
01052 #define VECTORINTEGER(vint, vsize) \
01053 typedef Vector<vint, vsize> int_SSEOptimised_##vint; \
01054 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*2, void); \
01055 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*4, void); \
01056 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*8, void); \
01057 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*16, void); \
01058 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*32, void); \
01059 FXVECTOROFVECTORS(int_SSEOptimised_##vint, vsize*64, void);
01060 VECTORINTEGER(FXushort, 8)
01061 VECTORINTEGER(FXshort, 8)
01062 VECTORINTEGER(FXuint, 4)
01063 VECTORINTEGER(FXint, 4)
01064 #undef VECTORINTEGER
01065 #endif
01066 #endif
01067 #endif
01068
01069 #ifndef FXVECTOR_SPECIALISEDFLOAT4
01070 DEFINEVECTOREQUIV(float, 4, FXVec4f)
01071 #endif
01072 #ifndef FXVECTOR_SPECIALISEDDOUBLE2
01073 DEFINEVECTOREQUIV(double, 2, FXVec2d)
01074 DEFINEVECTOREQUIV(double, 4, FXVec4d)
01075 #endif
01076 #undef DEFINEVECTOREQUIV
01077
01087 template<typename type, unsigned int A> class Array
01088 {
01089 type data[A];
01090 public:
01091 typedef type value_type;
01092 typedef value_type &reference;
01093 typedef const value_type &const_reference;
01094 typedef value_type *iterator;
01095 typedef const value_type *const_iterator;
01096 size_t max_size() const { return A; }
01097
01098 Array() { }
01099 explicit Array(const type *d) { for(FXuint b=0; b<A; b++) data[b]=d ? type(d[b]) : type(); }
01100 bool operator==(const Array &o) const { for(FXuint b=0; b<A; b++) if(data[b]!=o.data[b]) return false; return true; }
01101 bool operator!=(const Array &o) const { for(FXuint b=0; b<A; b++) if(data[b]!=o.data[b]) return true; return false; }
01102
01103 reference at(int i) { assert(i<A); return data[i]; }
01104 const_reference at(int i) const { assert(i<A); return data[i]; }
01105 reference operator[](int i) { return at(i); }
01106 const_reference operator[](int i) const { return at(i); }
01107
01108 reference front() { return data[0]; }
01109 const_reference front() const { return data[0]; }
01110 reference back() { return data[A-1]; }
01111 const_reference back() const { return data[A-1]; }
01112 iterator begin() { return &data[0]; }
01113 const_iterator begin() const { return &data[0]; }
01114 iterator end() { return &data[A]; }
01115 const_iterator end() const { return &data[A]; }
01116 };
01117
01128 namespace Impl {
01129 template<typename type, unsigned int A, unsigned int B, bool isFP> class MatrixIt;
01130
01131 template<typename type, unsigned int N, unsigned int A, unsigned int B> class MatrixIt<Vector<type, N>, A, B, false>;
01132
01133 template<typename type, unsigned int A, unsigned int B> class MatrixIt<type, A, B, true>
01134 {
01135 protected:
01136 Vector<type, A> data[B];
01137 public:
01138 typedef Vector<type, A> VECTORTYPE;
01139 static const unsigned int WIDTH=A, HEIGHT=B;
01140 MatrixIt() { }
01141 explicit MatrixIt(const type *d)
01142 {
01143 for(FXuint b=0; b<B; b++)
01144 data[b]=VECTORTYPE(d ? d+b*A : 0);
01145 }
01146 explicit MatrixIt(const type (*d)[A])
01147 {
01148 for(FXuint b=0; b<B; b++)
01149 data[b]=VECTORTYPE(d ? d[b] : 0);
01150 }
01151 bool operator==(const MatrixIt &o) const
01152 {
01153 for(FXuint b=0; b<B; b++)
01154 if(isZero(data[b]==o.data[b]))
01155 return false;
01156 return true;
01157 }
01158 bool operator!=(const MatrixIt &o) const
01159 {
01160 for(FXuint b=0; b<B; b++)
01161 if(isZero(data[b]==o.data[b]))
01162 return true;
01163 return false;
01164 }
01165 MatrixIt inline operator *(const MatrixIt &_x) const;
01166 template<typename type2, unsigned int A2, unsigned int B2> friend inline MatrixIt<type2, A2, B2, true> transpose(const MatrixIt<type2, A2, B2, true> &v);
01167 };
01168 template<typename type, unsigned int A, unsigned int B> MatrixIt<type, A, B, true> inline MatrixIt<type, A, B, true>::operator *(const MatrixIt<type, A, B, true> &_x) const
01169 {
01170 MatrixIt<type, A, B, true> ret, x(transpose(_x));
01171 for(FXuint b=0; b<B; b++)
01172 for(FXuint a=0; a<A; a++)
01173 ret.data[b].set(a, dot(data[b], x.data[a]));
01174 return ret;
01175 }
01176 template<typename type, unsigned int A, unsigned int B> inline MatrixIt<type, A, B, true> transpose(const MatrixIt<type, A, B, true> &v)
01177 {
01178 MatrixIt<type, A, B, true> ret;
01179 for(FXuint b=0; b<B; b++)
01180 for(FXuint a=0; a<A; a++)
01181 ret.data[b].set(a, v.data[a][b]);
01182 return ret;
01183 }
01184 #ifdef FXVECTOR_SPECIALISEDFLOAT4
01185 template<> inline MatrixIt<float, 4, 4, true> transpose(const MatrixIt<float, 4, 4, true> &v)
01186 {
01187 MatrixIt<float, 4, 4, true> ret(v);
01188 _MM_TRANSPOSE4_PS(GetSSEVal(ret.data[0]), GetSSEVal(ret.data[1]), GetSSEVal(ret.data[2]), GetSSEVal(ret.data[3]));
01189 return ret;
01190 }
01191 #if _M_IX86_FP>=4 || defined(__SSE4__)
01192 template<> MatrixIt<float, 4, 4, true> inline MatrixIt<float, 4, 4, true>::operator *(const MatrixIt<float, 4, 4, true> &_x) const
01193 {
01194 MatrixIt<float, 4, 4, true> ret;
01195 __m128 x[4];
01196 for(FXuint b=0; b<4; b++)
01197 x[b]=GetSSEVal(_x.data[b]);
01198 _MM_TRANSPOSE4_PS(x[0], x[1], x[2], x[3]);
01199 for(FXuint b=0; b<4; b++)
01200 for(FXuint a=0; a<4; a++)
01201 ret.data[b].set(a, dot(data[b], x[a]));
01202 return ret;
01203 }
01204 #else
01205 template<> MatrixIt<float, 4, 4, true> inline MatrixIt<float, 4, 4, true>::operator *(const MatrixIt<float, 4, 4, true> &_x) const
01206 {
01207 MatrixIt<float, 4, 4, true> ret;
01208 __m128 acc, temp, row, x[4];
01209 for(FXuint b=0; b<4; b++)
01210 x[b]=GetSSEVal(_x.data[b]);
01211
01212 for(FXuint b=0; b<4; b++)
01213 {
01214 row=GetSSEVal(data[b]);
01215 temp=_mm_shuffle_ps(row, row, _MM_SHUFFLE(0,0,0,0)); acc=_mm_mul_ps(temp, x[0]);
01216 temp=_mm_shuffle_ps(row, row, _MM_SHUFFLE(1,1,1,1)); acc=_mm_add_ps(acc, _mm_mul_ps(temp, x[1]));
01217 temp=_mm_shuffle_ps(row, row, _MM_SHUFFLE(2,2,2,2)); acc=_mm_add_ps(acc, _mm_mul_ps(temp, x[2]));
01218 temp=_mm_shuffle_ps(row, row, _MM_SHUFFLE(3,3,3,3)); acc=_mm_add_ps(acc, _mm_mul_ps(temp, x[3]));
01219 ret.data[b]=acc;
01220 }
01221 return ret;
01222 }
01223 #endif
01224 #endif
01225 template<typename type, unsigned int A, unsigned int B> class MatrixI : public MatrixIt<type, A, B, Generic::Traits<type>::isFloat>
01226 {
01227 protected:
01228 typedef MatrixIt<type, A, B, Generic::Traits<type>::isFloat> Base;
01229 public:
01230 typedef type value_type;
01231 typedef value_type &reference;
01232 typedef const value_type &const_reference;
01233 typedef value_type *iterator;
01234 typedef const value_type *const_iterator;
01235 size_t max_size() const { return A*B; }
01236
01237 MatrixI() { }
01238 MatrixI(const Base &o) : Base(o) { }
01239 explicit MatrixI(const type *d) : Base(d) { }
01240 explicit MatrixI(const type (*d)[A]) : Base(d) { }
01241
01242 value_type at(int a, int b) { assert(a<A && b<B); return Base::data[b][a]; }
01243 value_type at(int a, int b) const { assert(a<A && b<B); return Base::data[b][a]; }
01244 value_type operator[](int i) { return at(i%A, i/A); }
01245 value_type operator[](int i) const { return at(i%A, i/A); }
01246
01247 reference front() { return Base::data[0]; }
01248 const_reference front() const { return Base::data[0]; }
01249 reference back() { return Base::data[B-1][A-1]; }
01250 const_reference back() const { return Base::data[B-1][A-1]; }
01251 iterator begin() { return &Base::data[0]; }
01252 const_iterator begin() const { return &Base::data[0]; }
01253 iterator end() { return &Base::data[B][0]; }
01254 const_iterator end() const { return &Base::data[B][0]; }
01255 };
01256 }
01257 template<typename type, unsigned int A, unsigned int B> class Matrix : public Impl::EquivType<Impl::MatrixI<type, A, B>, type, void>
01258 {
01259 protected:
01260 typedef Impl::EquivType<Impl::MatrixI<type, A, B>, type, void> Base;
01261 public:
01262 Matrix() { }
01263 Matrix(const Base &o) : Base(o) { }
01264 Matrix(const typename Base::Base &o) : Base(o) { }
01265 explicit Matrix(const type *d) : Base(d) { }
01266 explicit Matrix(const type (*d)[A]) : Base(d) { }
01267 };
01268 #define DEFINEMATRIXEQUIV(type, no, equivtype) \
01269 template<> class Matrix<type, no, no> : public Impl::EquivType<Impl::MatrixI<type, no, no>, type, equivtype> \
01270 { \
01271 typedef Impl::EquivType<Impl::MatrixI<type, no, no>, type, equivtype> Base; \
01272 public: \
01273 Matrix() { } \
01274 Matrix(const Base &o) : Base(o) { } \
01275 Matrix(const Base::Base &o) : Base(o) { } \
01276 explicit Matrix(const type *d) : Base(d) { } \
01277 explicit Matrix(const type (*d)[no]) : Base(d) { } \
01278 Matrix(const equivtype &o) : Base((const type *)&o) { } \
01279 };
01280 DEFINEMATRIXEQUIV(float, 3, FXMat3f)
01281 DEFINEMATRIXEQUIV(float, 4, FXMat4f)
01282 typedef Matrix<float, 3, 3> Matrix3f;
01283 typedef Matrix<float, 4, 4> Matrix4f;
01284 DEFINEMATRIXEQUIV(double, 3, FXMat3d)
01285 DEFINEMATRIXEQUIV(double, 4, FXMat4d)
01286 typedef Matrix<double, 3, 3> Matrix3d;
01287 typedef Matrix<double, 4, 4> Matrix4d;
01288 #undef DEFINEMATRIXEQUIV
01289
01318 #if FOX_BIGENDIAN || (defined(_M_IX86) && _M_IX86_FP==0) || (defined(__i386__) && !defined(__SSE__))
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373 class FRandomness
01374 {
01375 static const FXint NN=312;
01376 static const FXint MM=156;
01377 static const FXulong MATRIX_A=0xB5026F5AA96619E9ULL;
01378 static const FXulong UM=0xFFFFFFFF80000000ULL;
01379 static const FXulong LM=0x7FFFFFFFULL;
01380
01381 FXulong mt[NN];
01382 int mti;
01383 public:
01385 static const bool usingSIMD=false;
01387 FRandomness(FXulong seed) throw() : mti(NN+1)
01388 {
01389 mt[0] = seed;
01390 for (mti=1; mti<NN; mti++)
01391 mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
01392 }
01393
01394 FRandomness(FXuchar *_seed, FXuval len) throw() : mti(NN+1)
01395 {
01396 FXulong *init_key=(FXulong *) _seed, key_length=len;
01397 unsigned long long i, j, k;
01398 mt[0] = 19650218ULL;
01399 for (mti=1; mti<NN; mti++)
01400 mt[mti] = (6364136223846793005ULL * (mt[mti-1] ^ (mt[mti-1] >> 62)) + mti);
01401 i=1; j=0;
01402 k = (NN>key_length ? NN : key_length);
01403 for (; k; k--) {
01404 mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 3935559000370003845ULL))
01405 + init_key[j] + j;
01406 i++; j++;
01407 if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
01408 if (j>=key_length) j=0;
01409 }
01410 for (k=NN-1; k; k--) {
01411 mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 2862933555777941757ULL))
01412 - i;
01413 i++;
01414 if (i>=NN) { mt[0] = mt[NN-1]; i=1; }
01415 }
01416
01417 mt[0] = 1ULL << 63;
01418 }
01419
01421 FXulong int64() throw()
01422 {
01423 int i;
01424 FXulong x;
01425 static FXulong mag01[2]={0ULL, MATRIX_A};
01426
01427 if (mti >= NN) {
01428
01429 for (i=0;i<NN-MM;i++) {
01430 x = (mt[i]&UM)|(mt[i+1]&LM);
01431 mt[i] = mt[i+MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
01432 }
01433 for (;i<NN-1;i++) {
01434 x = (mt[i]&UM)|(mt[i+1]&LM);
01435 mt[i] = mt[i+(MM-NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
01436 }
01437 x = (mt[NN-1]&UM)|(mt[0]&LM);
01438 mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)];
01439
01440 mti = 0;
01441 }
01442
01443 x = mt[mti++];
01444
01445 x ^= (x >> 29) & 0x5555555555555555ULL;
01446 x ^= (x << 17) & 0x71D67FFFEDA60000ULL;
01447 x ^= (x << 37) & 0xFFF7EEE000000000ULL;
01448 x ^= (x >> 43);
01449
01450 return x;
01451 }
01452
01454 void fill(FXuchar *d, FXuval len) throw()
01455 {
01456 for(FXuint n=0; n<(FXuint)len/8; n++)
01457 ((FXulong *)d)[n]=int64();
01458 }
01459
01461 double real1() throw()
01462 {
01463 return (int64() >> 11) * (1.0/9007199254740991.0);
01464 }
01465
01467 double real2() throw()
01468 {
01469 return (int64() >> 11) * (1.0/9007199254740992.0);
01470 }
01471
01473 double real3() throw()
01474 {
01475 return ((int64() >> 12) + 0.5) * (1.0/4503599627370496.0);
01476 }
01477 };
01478 #else
01479 class FRandomness
01480 {
01481 static const int MEXP=19937;
01482 static const int N=(MEXP / 128 + 1);
01483 typedef Vector<FXuint, 4> w128_t;
01484
01485 static const int N32=(N * 4);
01486 static const int POS1=122, SL1=18, SR1=11, SL2=1, SR2=1;
01487 static const FXuint *MASK() throw() { static const FXMEMALIGNED(16) FXuint d[4]={0xdfffffefU, 0xddfecb7fU, 0xbffaffffU, 0xbffffff6U}; return d; }
01488 static const FXuint *PARITY() throw() { static const FXMEMALIGNED(16) FXuint d[4]={0x00000001U, 0x00000000U, 0x00000000U, 0x13c9e684U}; return d; }
01489
01490 w128_t sfmt[N];
01491 FXuval idx;
01492 FXuint *psfmt32;
01493 FXulong *psfmt64;
01494
01495 inline w128_t do_recursion(const w128_t &a, const w128_t &b, const w128_t &c, const w128_t &d) throw()
01496 {
01497 w128_t z(rshiftvec(c, SR2*8));
01498 z^=a;
01499 z^=d<<w128_t(SL1);
01500 z^=lshiftvec(a, SL2*8);
01501 w128_t y(b>>w128_t(SR1));
01502 y&=w128_t(MASK());
01503 z^=y;
01504 return z;
01505 }
01506 inline void gen_rand_all() throw()
01507 {
01508 int i;
01509 w128_t r, r1, r2;
01510
01511 r1 = sfmt[N - 2];
01512 r2 = sfmt[N - 1];
01513 for (i = 0; i < N - POS1; i++)
01514 {
01515 sfmt[i] = r = do_recursion(sfmt[i], sfmt[i + POS1], r1, r2);
01516 r1 = r2;
01517 r2 = r;
01518 }
01519 for (; i < N; i++)
01520 {
01521 sfmt[i] = r = do_recursion(sfmt[i], sfmt[i + POS1 - N], r1, r2);
01522 r1 = r2;
01523 r2 = r;
01524 }
01525 }
01526 inline void gen_rand_array(w128_t *array, FXuint size) throw()
01527 {
01528 FXuint i, j;
01529 w128_t r, r1, r2;
01530
01531 r1 = sfmt[N - 2];
01532 r2 = sfmt[N - 1];
01533 for (i = 0; i < N - POS1; i++)
01534 {
01535 array[i] = r = do_recursion(sfmt[i], sfmt[i + POS1], r1, r2);
01536 r1 = r2;
01537 r2 = r;
01538 }
01539 for (; i < (FXuint) N; i++)
01540 {
01541 array[i] = r = do_recursion(sfmt[i], array[i + POS1 - N], r1, r2);
01542 r1 = r2;
01543 r2 = r;
01544 }
01545
01546 for (; i < size - N; i++)
01547 {
01548 array[i] = r = do_recursion(array[i - N], array[i + POS1 - N], r1, r2);
01549 r1 = r2;
01550 r2 = r;
01551 }
01552 int limit=2*N-size;
01553 for(j = 0; (int) j < limit; j++)
01554 {
01555 sfmt[j]=array[j + size - N];
01556 }
01557 for (; i < size; i++, j++)
01558 {
01559 sfmt[j] = array[i] = r = do_recursion(array[i - N], array[i + POS1 - N], r1, r2);
01560 r1 = r2;
01561 r2 = r;
01562 }
01563 }
01564 void period_certification() throw()
01565 {
01566 int inner = 0;
01567 int i, j;
01568 FXuint work;
01569 static const w128_t parity(PARITY());
01570
01571 for (i = 0; i < 4; i++)
01572 inner ^= psfmt32[i] & parity[i];
01573 for (i = 16; i > 0; i >>= 1)
01574 inner ^= inner >> i;
01575 inner &= 1;
01576
01577 if (inner == 1) {
01578 return;
01579 }
01580
01581 for (i = 0; i < 4; i++) {
01582 work = 1;
01583 for (j = 0; j < 32; j++) {
01584 if ((work & parity[i]) != 0) {
01585 psfmt32[i] ^= work;
01586 return;
01587 }
01588 work = work << 1;
01589 }
01590 }
01591 }
01592 inline FXuint func1(FXuint x) throw() {
01593 return (x ^ (x >> 27)) * (FXuint)1664525UL;
01594 }
01595 inline FXuint func2(FXuint x) throw() {
01596 return (x ^ (x >> 27)) * (FXuint)1566083941UL;
01597 }
01598
01599 void init_by_array(FXuint *init_key, FXuint key_length) throw()
01600 {
01601 FXuint i, j, count;
01602 FXuint r;
01603 int lag;
01604 int mid;
01605 int size = N * 4;
01606
01607 if (size >= 623) {
01608 lag = 11;
01609 } else if (size >= 68) {
01610 lag = 7;
01611 } else if (size >= 39) {
01612 lag = 5;
01613 } else {
01614 lag = 3;
01615 }
01616 mid = (size - lag) / 2;
01617
01618 memset(sfmt, 0x8b, sizeof(sfmt));
01619 if (key_length + 1 > (FXuint) N32) {
01620 count = key_length + 1;
01621 } else {
01622 count = N32;
01623 }
01624 r = func1(psfmt32[0] ^ psfmt32[mid] ^ psfmt32[N32 - 1]);
01625 psfmt32[mid] += r;
01626 r += key_length;
01627 psfmt32[mid + lag] += r;
01628 psfmt32[0] = r;
01629
01630 count--;
01631 for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
01632 r = func1(psfmt32[i] ^ psfmt32[(i + mid) % N32] ^ psfmt32[(i + N32 - 1) % N32]);
01633 psfmt32[(i + mid) % N32] += r;
01634 r += init_key[j] + i;
01635 psfmt32[(i + mid + lag) % N32] += r;
01636 psfmt32[i] = r;
01637 i = (i + 1) % N32;
01638 }
01639 for (; j < count; j++) {
01640 r = func1(psfmt32[i] ^ psfmt32[(i + mid) % N32] ^ psfmt32[(i + N32 - 1) % N32]);
01641 psfmt32[(i + mid) % N32] += r;
01642 r += i;
01643 psfmt32[(i + mid + lag) % N32] += r;
01644 psfmt32[i] = r;
01645 i = (i + 1) % N32;
01646 }
01647 for (j = 0; j < (FXuint) N32; j++) {
01648 r = func2(psfmt32[i] + psfmt32[(i + mid) % N32] + psfmt32[(i + N32 - 1) % N32]);
01649 psfmt32[(i + mid) % N32] ^= r;
01650 r -= i;
01651 psfmt32[(i + mid + lag) % N32] ^= r;
01652 psfmt32[i] = r;
01653 i = (i + 1) % N32;
01654 }
01655
01656 idx = N32;
01657 period_certification();
01658 }
01659 public:
01661 static const bool usingSIMD=true;
01663 FRandomness(FXulong seed) throw() : idx(0), psfmt32((FXuint *)&sfmt), psfmt64((FXulong *)&sfmt)
01664 {
01665 init_by_array((FXuint*)&seed, 2);
01666
01667
01668
01669
01670
01671
01672
01673
01674 }
01676 FRandomness(FXuchar *seed, FXuval len) throw() : idx(0), psfmt32((FXuint *)&sfmt), psfmt64((FXulong *)&sfmt)
01677 {
01678 init_by_array((FXuint*)seed, (FXuint)(len/4));
01679 }
01680
01682 FXulong int64() throw()
01683 {
01684 FXulong r;
01685 assert(idx % 2 == 0);
01686
01687 if (idx >= (FXuint) N32)
01688 {
01689 gen_rand_all();
01690 idx = 0;
01691 }
01692 r = psfmt64[idx / 2];
01693 idx += 2;
01694 return r;
01695 }
01696
01698 void fill(FXuchar *d, FXuval len) throw()
01699 {
01700 gen_rand_array((w128_t *) d, (FXuint)(len/sizeof(w128_t)));
01701 }
01702
01704 double real1() throw()
01705 {
01706 return (int64() >> 11) * (1.0/9007199254740991.0);
01707 }
01708
01710 double real2() throw()
01711 {
01712 return (int64() >> 11) * (1.0/9007199254740992.0);
01713 }
01714
01716 double real3() throw()
01717 {
01718 return ((int64() >> 12) + 0.5) * (1.0/4503599627370496.0);
01719 }
01720 };
01721 #endif
01722
01729 class SysRandomness : protected QMutex, protected FRandomness
01730 {
01731 public:
01732 SysRandomness() : FRandomness(FXProcess::getNsCount()) { }
01733 FXulong int64()
01734 {
01735 QMtxHold h(this);
01736 return FRandomness::int64();
01737 }
01738 double real1()
01739 {
01740 QMtxHold h(this);
01741 return FRandomness::real1();
01742 }
01743 double real2()
01744 {
01745 QMtxHold h(this);
01746 return FRandomness::real2();
01747 }
01748 double real3()
01749 {
01750 QMtxHold h(this);
01751 return FRandomness::real3();
01752 }
01753 };
01754 extern FXAPI SysRandomness SysRandSrc;
01755
01757 inline double normalrand(FRandomness &src, double stddevs) throw()
01758 {
01759 double x, y, r2;
01760 do
01761 {
01762 x=-1+2*src.real3();
01763 y=-1+2*src.real3();
01764 r2=x*x+y*y;
01765 }
01766 while(r2>1.0 || r2==0);
01767 return stddevs*y*::sqrt(-2.0*log(r2)/r2);
01768 }
01769 inline double normalrand(SysRandomness &src, double stddevs) throw()
01770 {
01771 double x, y, r2;
01772 do
01773 {
01774 x=-1+2*src.real3();
01775 y=-1+2*src.real3();
01776 r2=x*x+y*y;
01777 }
01778 while(r2>1.0 || r2==0);
01779 return stddevs*y*::sqrt(-2.0*log(r2)/r2);
01780 }
01781
01783 template<typename type> inline type normaldist(type x, type stddevs) throw()
01784 {
01785 type u=x/fabs(stddevs);
01786 return (1/(sqrt(2*(type) PI)*fabs(stddevs)))*exp(-u*u/2);
01787 }
01788
01790 template<typename type> inline type mean(const type *FXRESTRICT array, FXuval len, FXuint stride=1, type *FXRESTRICT min=0, type *FXRESTRICT max=0, type *FXRESTRICT mode=0) throw()
01791 {
01792 type m=0;
01793 if(min) *min=Generic::BiggestValue<type>::value;
01794 if(max) *max=Generic::BiggestValue<type, true>::value;
01795 if(mode) *mode=0;
01796 for(FXuval n=0; n<len; n+=stride)
01797 {
01798 m+=array[n];
01799 if(min && array[n]<*min) *min=array[n];
01800 if(max && array[n]>*max) *max=array[n];
01801 if(mode && (len/2==n || (len+1)/2==n)) *mode=*mode ? (*mode+array[n])/2 : array[n];
01802 }
01803 return m/len;
01804 }
01805 template<typename type, class allocator> inline type mean(const QMemArray<type, allocator> &array, FXuint stride=1, type *FXRESTRICT min=0, type *FXRESTRICT max=0, type *FXRESTRICT mode=0) throw()
01806 {
01807 return mean(array.data(), array.count(), stride, max, min, mode);
01808 }
01809
01811 template<typename type> inline type variance(const type *FXRESTRICT array, FXuval len, FXuint stride=1, const type *FXRESTRICT _mean=0) throw()
01812 {
01813 type v=0, m=_mean ? *_mean : mean(array, len, stride);
01814 for(FXuval n=0; n<len; n+=stride)
01815 {
01816 const type d=array[n]-m;
01817 v+=d*d;
01818 }
01819 return v/((len/stride)-1);
01820 }
01821 template<typename type, class allocator> inline type variance(const QMemArray<type, allocator> &array, FXuint stride=1, const type *FXRESTRICT _mean=0) throw()
01822 {
01823 return variance(array.data(), array.count(), stride, _mean);
01824 }
01825
01827 template<typename type> inline type stddev(const type *FXRESTRICT array, FXuval len, FXuint stride=1, const type *FXRESTRICT _mean=0) throw()
01828 {
01829 return sqrt(variance(array, len, stride, _mean));
01830 }
01831 template<typename type, class allocator> inline type stddev(const QMemArray<type, allocator> &array, FXuint stride=1, const type *FXRESTRICT _mean=0) throw()
01832 {
01833 return stddev(array.data(), array.count(), stride, _mean);
01834 }
01835
01837 template<unsigned int buckets, typename type> inline Vector<type, buckets+3> distribution(const type *FXRESTRICT array, FXuval len, FXuint stride=1, const type *FXRESTRICT min=0, const type *FXRESTRICT max=0) throw()
01838 {
01839 Vector<type, buckets+3> ret;
01840 const type &_min=(min && max) ? *min : ret[0], &_max=(min && max) ? *max : ret[1];
01841 type &div=ret[2];
01842 if(!min || !max)
01843 mean(array, len, stride, &ret[0], &ret[1]);
01844 div=(_max-_min)/buckets;
01845 for(FXuval n=0; n<len; n+=stride)
01846 {
01847 for(FXuval bucket=0; bucket<buckets; bucket++)
01848 {
01849 if(array[n]>=_min+div*bucket && array[n]<_min+div*(bucket+1))
01850 ++ret[3+bucket];
01851 }
01852 }
01853 return ret;
01854 }
01855 template<unsigned int buckets, typename type, class allocator> inline Vector<type, buckets+3> distribution(const QMemArray<type, allocator> &array, FXuint stride=1, const type *FXRESTRICT min=0, const type *FXRESTRICT max=0) throw()
01856 {
01857 return distribution<type, buckets>(array.data(), array.count(), stride, min, max);
01858 }
01859
01860 }
01861
01862 }
01863
01864 #endif