Show
Ignore:
Timestamp:
09/17/08 18:14:28 (6 years ago)
Author:
robert
Message:

From Mathias Froehlich, "This is a generic optimization that does not depend on any cpu or instruction
set.

The optimization is based on the observation that matrix matrix multiplication
with a dense matrix 4x4 is 43 Operations whereas multiplication with a
transform, or scale matrix is only 4
2 operations. Which is a gain of a
*FACTOR*4* for these special cases.
The change implements these special cases, provides a unit test for these
implementation and converts uses of the expensiver dense matrix matrix
routine with the specialized versions.

Depending on the transform nodes in the scenegraph this change gives a
noticable improovement.
For example the osgforest code using the MatrixTransform? is about 20% slower
than the same codepath using the PositionAttitudeTransform? instead of the
MatrixTransform? with this patch applied.

If I remember right, the sse type optimizations did *not* provide a factor 4
improovement. Also these changes are totally independent of any cpu or
instruction set architecture. So I would prefer to have this current kind of
change instead of some hand coded and cpu dependent assembly stuff. If we
need that hand tuned stuff, these can go on top of this changes which must
provide than hand optimized additional variants for the specialized versions
to give a even better result in the end.

An other change included here is a change to rotation matrix from quaterion
code. There is a sqrt call which couold be optimized away. Since we divide in
effect by sqrt(length)*sqrt(length) which is just length ...
"

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • OpenSceneGraph/trunk/src/osg/Matrix_implementation.cpp

    r7301 r8868  
    1919#include <osg/GL> 
    2020 
     21#include <limits> 
    2122#include <stdlib.h> 
    2223 
     
    6364#define QW  q._v[3] 
    6465 
    65 void Matrix_implementation::setRotate(const Quat& q_in) 
    66 { 
    67     Quat q(q_in); 
     66void Matrix_implementation::setRotate(const Quat& q) 
     67{ 
    6868    double length2 = q.length2(); 
    69     if (length2!=1.0 && length2!=0) 
    70     { 
     69    if (fabs(length2) <= std::numeric_limits<double>::min()) 
     70    { 
     71        _mat[0][0] = 0.0; _mat[1][0] = 0.0; _mat[2][0] = 0.0; 
     72        _mat[0][1] = 0.0; _mat[1][1] = 0.0; _mat[2][1] = 0.0; 
     73        _mat[0][2] = 0.0; _mat[1][2] = 0.0; _mat[2][2] = 0.0; 
     74    } 
     75    else 
     76    { 
     77        double rlength2; 
    7178        // normalize quat if required. 
    72         q /= sqrt(length2); 
    73     } 
    74  
    75     // Source: Gamasutra, Rotating Objects Using Quaternions 
    76     // 
    77     //http://www.gamasutra.com/features/19980703/quaternions_01.htm 
    78  
    79     double wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; 
    80  
    81     // calculate coefficients 
    82     x2 = QX + QX; 
    83     y2 = QY + QY; 
    84     z2 = QZ + QZ; 
    85  
    86     xx = QX * x2; 
    87     xy = QX * y2; 
    88     xz = QX * z2; 
    89  
    90     yy = QY * y2; 
    91     yz = QY * z2; 
    92     zz = QZ * z2; 
    93  
    94     wx = QW * x2; 
    95     wy = QW * y2; 
    96     wz = QW * z2; 
    97  
    98     // Note.  Gamasutra gets the matrix assignments inverted, resulting 
    99     // in left-handed rotations, which is contrary to OpenGL and OSG's  
    100     // methodology.  The matrix assignment has been altered in the next 
    101     // few lines of code to do the right thing. 
    102     // Don Burns - Oct 13, 2001 
    103     _mat[0][0] = 1.0 - (yy + zz); 
    104     _mat[1][0] = xy - wz; 
    105     _mat[2][0] = xz + wy; 
    106  
    107  
    108     _mat[0][1] = xy + wz; 
    109     _mat[1][1] = 1.0 - (xx + zz); 
    110     _mat[2][1] = yz - wx; 
    111  
    112     _mat[0][2] = xz - wy; 
    113     _mat[1][2] = yz + wx; 
    114     _mat[2][2] = 1.0 - (xx + yy); 
     79        // We can avoid the expensive sqrt in this case since all 'coefficients' below are products of two q components. 
     80        // That is a square of a square root, so it is possible to avoid that 
     81        if (length2 != 1.0) 
     82        { 
     83            rlength2 = 2.0/length2; 
     84        } 
     85        else 
     86        { 
     87            rlength2 = 2.0; 
     88        } 
     89         
     90        // Source: Gamasutra, Rotating Objects Using Quaternions 
     91        // 
     92        //http://www.gamasutra.com/features/19980703/quaternions_01.htm 
     93         
     94        double wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2; 
     95         
     96        // calculate coefficients 
     97        x2 = rlength2*QX; 
     98        y2 = rlength2*QY; 
     99        z2 = rlength2*QZ; 
     100         
     101        xx = QX * x2; 
     102        xy = QX * y2; 
     103        xz = QX * z2; 
     104         
     105        yy = QY * y2; 
     106        yz = QY * z2; 
     107        zz = QZ * z2; 
     108         
     109        wx = QW * x2; 
     110        wy = QW * y2; 
     111        wz = QW * z2; 
     112         
     113        // Note.  Gamasutra gets the matrix assignments inverted, resulting 
     114        // in left-handed rotations, which is contrary to OpenGL and OSG's  
     115        // methodology.  The matrix assignment has been altered in the next 
     116        // few lines of code to do the right thing. 
     117        // Don Burns - Oct 13, 2001 
     118        _mat[0][0] = 1.0 - (yy + zz); 
     119        _mat[1][0] = xy - wz; 
     120        _mat[2][0] = xz + wy; 
     121         
     122         
     123        _mat[0][1] = xy + wz; 
     124        _mat[1][1] = 1.0 - (xx + zz); 
     125        _mat[2][1] = yz - wx; 
     126         
     127        _mat[0][2] = xz - wy; 
     128        _mat[1][2] = yz + wx; 
     129        _mat[2][2] = 1.0 - (xx + yy); 
     130    } 
    115131 
    116132#if 0 
     
    904920        0.0,     0.0,     0.0,      1.0); 
    905921 
    906     preMult(Matrix_implementation::translate(-eye)); 
     922    preMultTranslate(-eye); 
    907923} 
    908924