Show
Ignore:
Timestamp:
09/17/08 18:14:28 (6 years ago)
Author:
robert
Message:

From Mathias Froehlich, "This is a generic optimization that does not depend on any cpu or instruction
set.

The optimization is based on the observation that matrix matrix multiplication
with a dense matrix 4x4 is 43 Operations whereas multiplication with a
transform, or scale matrix is only 4
2 operations. Which is a gain of a
*FACTOR*4* for these special cases.
The change implements these special cases, provides a unit test for these
implementation and converts uses of the expensiver dense matrix matrix
routine with the specialized versions.

Depending on the transform nodes in the scenegraph this change gives a
noticable improovement.
For example the osgforest code using the MatrixTransform? is about 20% slower
than the same codepath using the PositionAttitudeTransform? instead of the
MatrixTransform? with this patch applied.

If I remember right, the sse type optimizations did *not* provide a factor 4
improovement. Also these changes are totally independent of any cpu or
instruction set architecture. So I would prefer to have this current kind of
change instead of some hand coded and cpu dependent assembly stuff. If we
need that hand tuned stuff, these can go on top of this changes which must
provide than hand optimized additional variants for the specialized versions
to give a even better result in the end.

An other change included here is a change to rotation matrix from quaterion
code. There is a sqrt call which couold be optimized away. Since we divide in
effect by sqrt(length)*sqrt(length) which is just length ...
"

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • OpenSceneGraph/trunk/include/osg/Matrixd

    r7890 r8868  
    348348        void postMult( const Matrixd& ); 
    349349 
     350        /** Optimized version of preMult(translate(v)); */ 
     351        inline void preMultTranslate( const Vec3d& v ); 
     352        inline void preMultTranslate( const Vec3f& v ); 
     353        /** Optimized version of postMult(translate(v)); */ 
     354        inline void postMultTranslate( const Vec3d& v ); 
     355        inline void postMultTranslate( const Vec3f& v ); 
     356 
     357        /** Optimized version of preMult(scale(v)); */ 
     358        inline void preMultScale( const Vec3d& v ); 
     359        inline void preMultScale( const Vec3f& v ); 
     360        /** Optimized version of postMult(scale(v)); */ 
     361        inline void postMultScale( const Vec3d& v ); 
     362        inline void postMultScale( const Vec3f& v ); 
     363 
     364        /** Optimized version of preMult(rotate(q)); */ 
     365        inline void preMultRotate( const Quat& q ); 
     366        /** Optimized version of postMult(rotate(q)); */ 
     367        inline void postMultRotate( const Quat& q ); 
     368 
    350369        inline void operator *= ( const Matrixd& other )  
    351370        {    if( this == &other ) { 
     
    648667} 
    649668 
     669inline void Matrixd::preMultTranslate( const Vec3d& v ) 
     670{ 
     671    for (unsigned i = 0; i < 3; ++i) 
     672    { 
     673        double tmp = v[i]; 
     674        if (tmp == 0) 
     675            continue; 
     676        _mat[3][0] += tmp*_mat[i][0]; 
     677        _mat[3][1] += tmp*_mat[i][1]; 
     678        _mat[3][2] += tmp*_mat[i][2]; 
     679        _mat[3][3] += tmp*_mat[i][3]; 
     680    } 
     681} 
     682 
     683inline void Matrixd::preMultTranslate( const Vec3f& v ) 
     684{ 
     685    for (unsigned i = 0; i < 3; ++i) 
     686    { 
     687        float tmp = v[i]; 
     688        if (tmp == 0) 
     689            continue; 
     690        _mat[3][0] += tmp*_mat[i][0]; 
     691        _mat[3][1] += tmp*_mat[i][1]; 
     692        _mat[3][2] += tmp*_mat[i][2]; 
     693        _mat[3][3] += tmp*_mat[i][3]; 
     694    } 
     695} 
     696 
     697inline void Matrixd::postMultTranslate( const Vec3d& v ) 
     698{ 
     699    for (unsigned i = 0; i < 3; ++i) 
     700    { 
     701        double tmp = v[i]; 
     702        if (tmp == 0) 
     703            continue; 
     704        _mat[0][i] += tmp*_mat[0][3]; 
     705        _mat[1][i] += tmp*_mat[1][3]; 
     706        _mat[2][i] += tmp*_mat[2][3]; 
     707        _mat[3][i] += tmp*_mat[3][3]; 
     708    } 
     709} 
     710 
     711inline void Matrixd::postMultTranslate( const Vec3f& v ) 
     712{ 
     713    for (unsigned i = 0; i < 3; ++i) 
     714    { 
     715        float tmp = v[i]; 
     716        if (tmp == 0) 
     717            continue; 
     718        _mat[0][i] += tmp*_mat[0][3]; 
     719        _mat[1][i] += tmp*_mat[1][3]; 
     720        _mat[2][i] += tmp*_mat[2][3]; 
     721        _mat[3][i] += tmp*_mat[3][3]; 
     722    } 
     723} 
     724 
     725inline void Matrixd::preMultScale( const Vec3d& v ) 
     726{ 
     727    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0]; 
     728    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1]; 
     729    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2]; 
     730} 
     731 
     732inline void Matrixd::preMultScale( const Vec3f& v ) 
     733{ 
     734    _mat[0][0] *= v[0]; _mat[0][1] *= v[0]; _mat[0][2] *= v[0]; _mat[0][3] *= v[0]; 
     735    _mat[1][0] *= v[1]; _mat[1][1] *= v[1]; _mat[1][2] *= v[1]; _mat[1][3] *= v[1]; 
     736    _mat[2][0] *= v[2]; _mat[2][1] *= v[2]; _mat[2][2] *= v[2]; _mat[2][3] *= v[2]; 
     737} 
     738 
     739inline void Matrixd::postMultScale( const Vec3d& v ) 
     740{ 
     741    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0]; 
     742    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1]; 
     743    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2]; 
     744} 
     745 
     746inline void Matrixd::postMultScale( const Vec3f& v ) 
     747{ 
     748    _mat[0][0] *= v[0]; _mat[1][0] *= v[0]; _mat[2][0] *= v[0]; _mat[3][0] *= v[0]; 
     749    _mat[0][1] *= v[1]; _mat[1][1] *= v[1]; _mat[2][1] *= v[1]; _mat[3][1] *= v[1]; 
     750    _mat[0][2] *= v[2]; _mat[1][2] *= v[2]; _mat[2][2] *= v[2]; _mat[3][2] *= v[2]; 
     751} 
     752 
     753inline void Matrixd::preMultRotate( const Quat& q ) 
     754{ 
     755    if (q.zeroRotation()) 
     756        return; 
     757    Matrixd r; 
     758    r.setRotate(q); 
     759    preMult(r); 
     760} 
     761 
     762inline void Matrixd::postMultRotate( const Quat& q ) 
     763{ 
     764    if (q.zeroRotation()) 
     765        return; 
     766    Matrixd r; 
     767    r.setRotate(q); 
     768    postMult(r); 
     769} 
     770 
    650771inline Vec3f operator* (const Vec3f& v, const Matrixd& m ) 
    651772{