Show
Ignore:
Timestamp:
09/17/08 18:14:28 (6 years ago)
Author:
robert
Message:

From Mathias Froehlich, "This is a generic optimization that does not depend on any cpu or instruction
set.

The optimization is based on the observation that matrix matrix multiplication
with a dense matrix 4x4 is 43 Operations whereas multiplication with a
transform, or scale matrix is only 4
2 operations. Which is a gain of a
*FACTOR*4* for these special cases.
The change implements these special cases, provides a unit test for these
implementation and converts uses of the expensiver dense matrix matrix
routine with the specialized versions.

Depending on the transform nodes in the scenegraph this change gives a
noticable improovement.
For example the osgforest code using the MatrixTransform? is about 20% slower
than the same codepath using the PositionAttitudeTransform? instead of the
MatrixTransform? with this patch applied.

If I remember right, the sse type optimizations did *not* provide a factor 4
improovement. Also these changes are totally independent of any cpu or
instruction set architecture. So I would prefer to have this current kind of
change instead of some hand coded and cpu dependent assembly stuff. If we
need that hand tuned stuff, these can go on top of this changes which must
provide than hand optimized additional variants for the specialized versions
to give a even better result in the end.

An other change included here is a change to rotation matrix from quaterion
code. There is a sqrt call which couold be optimized away. Since we divide in
effect by sqrt(length)*sqrt(length) which is just length ...
"

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • OpenSceneGraph/trunk/include/osg/Matrixf

    r7890 r8868  
    350350        void postMult( const Matrixf& ); 
    351351 
     352        /** Optimized version of preMult(translate(v)); */ 
     353        inline void preMultTranslate( const Vec3d& v ); 
     354        inline void preMultTranslate( const Vec3f& v ); 
     355        /** Optimized version of postMult(translate(v)); */ 
     356        inline void postMultTranslate( const Vec3d& v ); 
     357        inline void postMultTranslate( const Vec3f& v ); 
     358 
     359        /** Optimized version of preMult(scale(v)); */ 
     360        inline void preMultScale( const Vec3d& v ); 
     361        inline void preMultScale( const Vec3f& v ); 
     362        /** Optimized version of postMult(scale(v)); */ 
     363        inline void postMultScale( const Vec3d& v ); 
     364        inline void postMultScale( const Vec3f& v ); 
     365 
     366        /** Optimized version of preMult(rotate(q)); */ 
     367        inline void preMultRotate( const Quat& q ); 
     368        /** Optimized version of postMult(rotate(q)); */ 
     369        inline void postMultRotate( const Quat& q ); 
     370 
    352371        inline void operator *= ( const Matrixf& other )  
    353372        {    if( this == &other ) { 
     
    642661} 
    643662 
     663inline void Matrixf::preMultTranslate( const Vec3d& v ) 
     664{ 
     665    for (unsigned i = 0; i < 3; ++i) 
     666    { 
     667        double tmp = v[i]; 
     668        if (tmp == 0) 
     669            continue; 
     670        _mat[3][0] += tmp*_mat[i][0]; 
     671        _mat[3][1] += tmp*_mat[i][1]; 
     672        _mat[3][2] += tmp*_mat[i][2]; 
     673        _mat[3][3] += tmp*_mat[i][3]; 
     674    } 
     675} 
     676 
     677inline void Matrixf::preMultTranslate( const Vec3f& v ) 
     678{ 
     679    for (unsigned i = 0; i < 3; ++i) 
     680    { 
     681        float tmp = v[i]; 
     682        if (tmp == 0) 
     683            continue; 
     684        _mat[3][0] += tmp*_mat[i][0]; 
     685        _mat[3][1] += tmp*_mat[i][1]; 
     686        _mat[3][2] += tmp*_mat[i][2]; 
     687        _mat[3][3] += tmp*_mat[i][3]; 
     688    } 
     689} 
     690 
     691inline void Matrixf::postMultTranslate( const Vec3d& v ) 
     692{ 
     693    for (unsigned i = 0; i < 3; ++i) 
     694    { 
     695        double tmp = v[i]; 
     696        if (tmp == 0) 
     697            continue; 
     698        _mat[0][i] += tmp*_mat[0][3]; 
     699        _mat[1][i] += tmp*_mat[1][3]; 
     700        _mat[2][i] += tmp*_mat[2][3]; 
     701        _mat[3][i] += tmp*_mat[3][3]; 
     702    } 
     703} 
     704 
     705inline void Matrixf::postMultTranslate( const Vec3f& v ) 
     706{ 
     707    for (unsigned i = 0; i < 3; ++i) 
     708    { 
     709        float tmp = v[i]; 
     710        if (tmp == 0) 
     711            continue; 
     712        _mat[0][i] += tmp*_mat[0][3]; 
     713        _mat[1][i] += tmp*_mat[1][3]; 
     714        _mat[2][i] += tmp*_mat[2][3]; 
     715        _mat[3][i] += tmp*_mat[3][3]; 
     716    } 
     717} 
     718 
     719inline void Matrixf::preMultScale( const Vec3d& v ) 
     720{ 
     721    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0]; 
     722    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1]; 
     723    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2]; 
     724} 
     725 
     726inline void Matrixf::preMultScale( const Vec3f& v ) 
     727{ 
     728    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0]; 
     729    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1]; 
     730    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2]; 
     731} 
     732 
     733inline void Matrixf::postMultScale( const Vec3d& v ) 
     734{ 
     735    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0]; 
     736    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1]; 
     737    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2]; 
     738} 
     739 
     740inline void Matrixf::postMultScale( const Vec3f& v ) 
     741{ 
     742    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0]; 
     743    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1]; 
     744    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2]; 
     745} 
     746 
     747 
     748inline void Matrixf::preMultRotate( const Quat& q ) 
     749{ 
     750    if (q.zeroRotation()) 
     751        return; 
     752    Matrixf r; 
     753    r.setRotate(q); 
     754    preMult(r); 
     755} 
     756 
     757inline void Matrixf::postMultRotate( const Quat& q ) 
     758{ 
     759    if (q.zeroRotation()) 
     760        return; 
     761    Matrixf r; 
     762    r.setRotate(q); 
     763    postMult(r); 
     764} 
    644765 
    645766inline Vec3f operator* (const Vec3f& v, const Matrixf& m )