root/OpenSceneGraph/trunk/src/osgUtil/MeshOptimizers.cpp @ 11204

Revision 11204, 34.8 kB (checked in by robert, 4 years ago)

From Time Moore, "This submission implements 3 optimizations for meshes. INDEX_MESH turns DrawArrays? style geometry into DrawElements?, uniquifying the vertices in the process. This is useful for certain loaders, like ac3d, which just spit out DrawArrays?. VERTEX_POSTTRANSFORM and VERTEX_PRETRANSFORM optimize mesh triangle and vertex order for the caches on a modern GPU, using Tom Forsyth's algorithm. I describe this and the big difference it makes (38% improvement on a very large mesh) in my blog,
http://shiny-dynamics.blogspot.com/2010/03/vertex-cache-optimization-for-osg.html."

Line 
1/* -*-c++-*- OpenSceneGraph - Copyright (C) 1998-2006 Robert Osfield
2 * Copyright (C) 2010 Tim Moore
3 *
4 * This library is open source and may be redistributed and/or modified under 
5 * the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
6 * (at your option) any later version.  The full license is in LICENSE file
7 * included with this distribution, and on the openscenegraph.org website.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * OpenSceneGraph Public License for more details.
13*/
14
15#include <cassert>
16#include <limits>
17
18#include <algorithm>
19#include <utility>
20#include <vector>
21
22#include <iostream>
23
24#include <osg/Geometry>
25#include <osg/Math>
26#include <osg/PrimitiveSet>
27#include <osg/TriangleIndexFunctor>
28
29#include <osgUtil/MeshOptimizers>
30
31using namespace std;
32using namespace osg;
33
34namespace osgUtil
35{
36
37void GeometryCollector::reset()
38{
39    _geometryList.clear();
40}
41
42void GeometryCollector::apply(Geode& geode)
43{
44    for(unsigned int i = 0; i < geode.getNumDrawables(); ++i )
45    {
46        osg::Geometry* geom = dynamic_cast<osg::Geometry*>(geode.getDrawable(i));
47        if (geom) _geometryList.insert(geom);
48    }
49}
50
51namespace
52{
53typedef std::vector<unsigned int> IndexList;
54
55// A helper class that gathers up all the attribute arrays of an
56// osg::Geometry object that are BIND_PER_VERTEX and runs an
57// ArrayVisitor on them.
58struct GeometryArrayGatherer
59{
60    typedef std::vector<osg::Array*> ArrayList;
61
62    GeometryArrayGatherer(osg::Geometry& geometry)
63        : _useDrawElements(true)
64    {
65        add(geometry.getVertexArray(),osg::Geometry::BIND_PER_VERTEX);
66        add(geometry.getNormalArray(),geometry.getNormalBinding());
67        add(geometry.getColorArray(),geometry.getColorBinding());
68        add(geometry.getSecondaryColorArray(),geometry.getSecondaryColorBinding());
69        add(geometry.getFogCoordArray(),geometry.getFogCoordBinding());
70        unsigned int i;
71        for(i=0;i<geometry.getNumTexCoordArrays();++i)
72        {
73            add(geometry.getTexCoordArray(i),osg::Geometry::BIND_PER_VERTEX);
74        }
75        for(i=0;i<geometry.getNumVertexAttribArrays();++i)
76        {
77            add(geometry.getVertexAttribArray(i),geometry.getVertexAttribBinding(i));
78        }
79    }
80
81    void add(osg::Array* array, osg::Geometry::AttributeBinding binding)
82    {
83        if (binding == osg::Geometry::BIND_PER_VERTEX)
84        {
85            if (array)
86                _arrayList.push_back(array);
87        }
88        else if (binding == osg::Geometry::BIND_PER_PRIMITIVE)
89            _useDrawElements = false;
90    }
91
92    void accept(osg::ArrayVisitor& av)
93    {
94        for(ArrayList::iterator itr=_arrayList.begin();
95            itr!=_arrayList.end();
96            ++itr)
97        {
98            (*itr)->accept(av);
99        }
100    }
101   
102    ArrayList _arrayList;
103    // True if geometry contains bindings that are compatible with
104    // DrawElements.
105    bool _useDrawElements;
106};
107
108// Compare vertices in a mesh using all their attributes. The vertices
109// are identified by their index. Extracted from TriStripVisitor.cpp
110struct VertexAttribComparitor : public GeometryArrayGatherer
111{
112    VertexAttribComparitor(osg::Geometry& geometry)
113        : GeometryArrayGatherer(geometry)
114    {
115    }
116   
117    bool operator() (unsigned int lhs, unsigned int rhs) const
118    {
119        for(ArrayList::const_iterator itr=_arrayList.begin();
120            itr!=_arrayList.end();
121            ++itr)
122        {
123            int compare = (*itr)->compare(lhs,rhs);
124            if (compare==-1) return true;
125            if (compare==1) return false;
126        }
127        return false;
128    }   
129
130    int compare(unsigned int lhs, unsigned int rhs)
131    {
132        for(ArrayList::iterator itr=_arrayList.begin();
133            itr!=_arrayList.end();
134            ++itr)
135        {
136            int compare = (*itr)->compare(lhs,rhs);
137            if (compare==-1) return -1;
138            if (compare==1) return 1;
139        }
140        return 0;
141    }
142
143protected:
144    VertexAttribComparitor& operator = (const VertexAttribComparitor&) { return *this; }   
145
146};
147
148// Compact the vertex attribute arrays. Also stolen from TriStripVisitor
149class RemapArray : public osg::ArrayVisitor
150{
151    public:
152        RemapArray(const IndexList& remapping):_remapping(remapping) {}
153       
154        const IndexList& _remapping;
155       
156        template<class T>
157        inline void remap(T& array)
158        {
159            for(unsigned int i=0;i<_remapping.size();++i)
160            {
161                if (i!=_remapping[i])
162                {
163                    array[i] = array[_remapping[i]];
164                }
165            }
166            array.erase(array.begin()+_remapping.size(),array.end());
167        }
168       
169        virtual void apply(osg::Array&) {}
170        virtual void apply(osg::ByteArray& array) { remap(array); }
171        virtual void apply(osg::ShortArray& array) { remap(array); }
172        virtual void apply(osg::IntArray& array) { remap(array); }
173        virtual void apply(osg::UByteArray& array) { remap(array); }
174        virtual void apply(osg::UShortArray& array) { remap(array); }
175        virtual void apply(osg::UIntArray& array) { remap(array); }
176        virtual void apply(osg::Vec4ubArray& array) { remap(array); }
177        virtual void apply(osg::FloatArray& array) { remap(array); }
178        virtual void apply(osg::Vec2Array& array) { remap(array); }
179        virtual void apply(osg::Vec3Array& array) { remap(array); }
180        virtual void apply(osg::Vec4Array& array) { remap(array); }
181
182protected:
183
184        RemapArray& operator = (const RemapArray&) { return *this; }
185};
186
187
188// Construct an index list of triangles for DrawElements for any input
189// primitives.
190struct MyTriangleOperator
191{
192
193    IndexList _remapIndices;
194    IndexList _in_indices;
195
196    inline void operator()(unsigned int p1, unsigned int p2, unsigned int p3)
197    {
198        if (_remapIndices.empty())
199        {
200            _in_indices.push_back(p1);
201            _in_indices.push_back(p2);
202            _in_indices.push_back(p3);
203        }
204        else
205        {
206            _in_indices.push_back(_remapIndices[p1]);
207            _in_indices.push_back(_remapIndices[p2]);
208            _in_indices.push_back(_remapIndices[p3]);
209        }
210    }
211
212};
213typedef osg::TriangleIndexFunctor<MyTriangleOperator> MyTriangleIndexFunctor;
214}
215
216void IndexMeshVisitor::makeMesh(Geometry& geom)
217{
218        if (geom.getNormalBinding()==osg::Geometry::BIND_PER_PRIMITIVE ||
219        geom.getNormalBinding()==osg::Geometry::BIND_PER_PRIMITIVE_SET) return;
220
221    if (geom.getColorBinding()==osg::Geometry::BIND_PER_PRIMITIVE ||
222        geom.getColorBinding()==osg::Geometry::BIND_PER_PRIMITIVE_SET) return;
223   
224    if (geom.getSecondaryColorBinding()==osg::Geometry::BIND_PER_PRIMITIVE ||
225        geom.getSecondaryColorBinding()==osg::Geometry::BIND_PER_PRIMITIVE_SET) return;
226
227    if (geom.getFogCoordBinding()==osg::Geometry::BIND_PER_PRIMITIVE ||
228        geom.getFogCoordBinding()==osg::Geometry::BIND_PER_PRIMITIVE_SET) return;
229
230    // no point optimizing if we don't have enough vertices.
231    if (!geom.getVertexArray() || geom.getVertexArray()->getNumElements()<3) return;
232
233    // check for the existence of surface primitives
234    unsigned int numSurfacePrimitives = 0;
235    unsigned int numNonIndexedPrimitives = 0;
236    Geometry::PrimitiveSetList& primitives = geom.getPrimitiveSetList();
237    Geometry::PrimitiveSetList::iterator itr;
238    for(itr=primitives.begin();
239        itr!=primitives.end();
240        ++itr)
241    {
242        switch((*itr)->getMode())
243        {
244            case(PrimitiveSet::TRIANGLES):
245            case(PrimitiveSet::TRIANGLE_STRIP):
246            case(PrimitiveSet::TRIANGLE_FAN):
247            case(PrimitiveSet::QUADS):
248            case(PrimitiveSet::QUAD_STRIP):
249            case(PrimitiveSet::POLYGON):
250                ++numSurfacePrimitives;
251                break;
252            default:
253                // For now, only deal with polygons
254                return;
255        }
256        PrimitiveSet::Type type = (*itr)->getType();
257        if (!(type == PrimitiveSet::DrawElementsUBytePrimitiveType
258              || type == PrimitiveSet::DrawElementsUShortPrimitiveType
259              || type == PrimitiveSet::DrawElementsUIntPrimitiveType))
260            numNonIndexedPrimitives++;
261    }
262   
263    // nothing to index
264    if (!numSurfacePrimitives || !numNonIndexedPrimitives) return;
265
266    // check to see if vertex attributes indices exists, if so expand them to remove them
267    if (geom.suitableForOptimization())
268    {
269        // removing coord indices
270        osg::notify(osg::INFO)<<"TriStripVisitor::stripify(Geometry&): Removing attribute indices"<<std::endl;
271        geom.copyToAndOptimize(geom);
272    }
273
274
275    // compute duplicate vertices
276   
277    typedef std::vector<unsigned int> IndexList;
278    unsigned int numVertices = geom.getVertexArray()->getNumElements();
279    IndexList indices(numVertices);
280    unsigned int i,j;
281    for(i=0;i<numVertices;++i)
282    {
283        indices[i] = i;
284    }
285   
286    VertexAttribComparitor arrayComparitor(geom);
287    std::sort(indices.begin(),indices.end(),arrayComparitor);
288
289    unsigned int lastUnique = 0;
290    unsigned int numUnique = 1;
291    for(i=1;i<numVertices;++i)
292    {
293        if (arrayComparitor.compare(indices[lastUnique],indices[i]) != 0)
294        {
295            lastUnique = i;
296            ++numUnique;
297        }
298       
299    }
300    IndexList remapDuplicatesToOrignals(numVertices);
301    lastUnique = 0;
302    for(i=1;i<numVertices;++i)
303    {
304        if (arrayComparitor.compare(indices[lastUnique],indices[i])!=0)
305        {
306            // found a new vertex entry, so previous run of duplicates needs
307            // to be put together.
308            unsigned int min_index = indices[lastUnique];
309            for(j=lastUnique+1;j<i;++j)
310            {
311                min_index = osg::minimum(min_index,indices[j]);
312            }
313            for(j=lastUnique;j<i;++j)
314            {
315                remapDuplicatesToOrignals[indices[j]]=min_index;
316            }
317            lastUnique = i;
318        }
319       
320    }
321    unsigned int min_index = indices[lastUnique];
322    for(j=lastUnique+1;j<i;++j)
323    {
324        min_index = osg::minimum(min_index,indices[j]);
325    }
326    for(j=lastUnique;j<i;++j)
327    {
328        remapDuplicatesToOrignals[indices[j]]=min_index;
329    }
330
331
332    // copy the arrays.   
333    IndexList finalMapping(numVertices);
334    IndexList copyMapping;
335    copyMapping.reserve(numUnique);
336    unsigned int currentIndex=0;
337    for(i=0;i<numVertices;++i)
338    {
339        if (remapDuplicatesToOrignals[i]==i)
340        {
341            finalMapping[i] = currentIndex;
342            copyMapping.push_back(i);
343            currentIndex++;
344        }
345    }
346   
347    for(i=0;i<numVertices;++i)
348    {
349        if (remapDuplicatesToOrignals[i]!=i)
350        {
351            finalMapping[i] = finalMapping[remapDuplicatesToOrignals[i]];
352        }
353    }
354   
355   
356    MyTriangleIndexFunctor taf;
357    taf._remapIndices.swap(finalMapping);
358
359    Geometry::PrimitiveSetList new_primitives;
360    new_primitives.reserve(primitives.size());
361
362    for(itr=primitives.begin();
363        itr!=primitives.end();
364        ++itr)
365    {
366        // For now we only have primitive sets that play nicely with
367        // the TriangleIndexFunctor.
368        (*itr)->accept(taf);
369    }
370
371    // remap any shared vertex attributes
372    RemapArray ra(copyMapping);
373    arrayComparitor.accept(ra);
374    if (taf._in_indices.size() < 65536)
375    {
376        osg::DrawElementsUShort* elements = new DrawElementsUShort(GL_TRIANGLES);
377        for (IndexList::iterator itr = taf._in_indices.begin(),
378                 end = taf._in_indices.end();
379             itr != end;
380            ++itr)
381        {
382            elements->push_back((GLushort)(*itr));
383        }
384        new_primitives.push_back(elements);
385    }
386    else
387    {
388        osg::DrawElementsUInt* elements
389            = new DrawElementsUInt(GL_TRIANGLES, taf._in_indices.begin(),
390                                   taf._in_indices.end());
391        new_primitives.push_back(elements);
392    }
393    geom.setPrimitiveSetList(new_primitives);
394}
395
396void IndexMeshVisitor::makeMesh()
397{
398    for(GeometryList::iterator itr=_geometryList.begin();
399        itr!=_geometryList.end();
400        ++itr)
401    {
402        makeMesh(*(*itr));
403    }
404}
405
406namespace
407{
408// A simulation of a Least Recently Used cache. Position in the cache
409// corresponds to when the vertex was used i.e., 0 is the most
410// recently used.
411struct LRUCache
412{
413    LRUCache(size_t maxSize_) : maxSize(maxSize_)
414    {
415        entries.reserve(maxSize_ + 3);
416    }
417    vector<unsigned> entries;
418    size_t maxSize;
419
420    // Add a list of values to the cache
421    void addEntries(unsigned* begin, unsigned* end)
422    {
423        // If any of the new vertices are already in the cache, remove
424        // them, leaving some room at the front of the cache.
425        vector<unsigned>::reverse_iterator validEnd = entries.rend();
426        for (unsigned* pent = begin; pent != end; ++pent)
427        {
428            validEnd = remove(entries.rbegin(), validEnd, *pent);
429        }
430        // Now make room still needed for new cache entries
431        size_t newEnts = end - begin;
432        ssize_t spaceNeeded = newEnts - (entries.rend() - validEnd);
433        if (spaceNeeded > 0)
434        {
435            if (maxSize - entries.size() >= static_cast<unsigned>(spaceNeeded))
436                entries.resize(entries.size() + spaceNeeded);
437            else if (entries.size() < maxSize)
438                entries.resize(maxSize);
439            copy_backward(entries.begin() + newEnts - spaceNeeded,
440                          entries.end() - spaceNeeded, entries.end());
441        }
442        copy(begin, end, entries.begin());
443    }
444
445    bool empty()
446    {
447        return entries.empty();
448    }
449};
450
451// A list of triangles that use a vertex is associated with the Vertex
452// structure in another array.
453struct Vertex
454{
455    int cachePosition;
456    float score;
457    int trisUsing;
458    int numActiveTris;          // triangles left to process
459    size_t triList;             // index to start of triangle storage
460    Vertex()
461        : cachePosition(-1), score(0.0f), trisUsing(0), numActiveTris(0), triList(0)
462    {
463    }
464};
465
466// Code from Tom Forsyth's article. The algorithm is described in
467// detail at
468// http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html.
469// In summary, vertices are assigned a score based on the number of
470// triangles that use it (valence) and the vertex's position in the
471// model of the vertex cache, if any. Triangles are also given a
472// score, which is the sum of the scores of its vertices. The triangle
473// with the best score is added to the draw list, its vertices are
474// added and / or moved in the cache, the scores of vertices in the
475// cache and ejected from the cache are updated. Then the scores of
476// triangles that use those vertices are updated.
477//
478// Unless the cache is empty, the "best" triangle is found by
479// searching triangles that use vertices in the cache, not the whole
480// mesh. This keeps the algorithm running in time proportional to the
481// size of the mesh.
482
483// The "magic" scoring functions are described in the paper.
484const float cacheDecayPower = 1.5f;
485const float lastTriScore = 0.75f;
486const float valenceBoostScale = 2.0f;
487const float valenceBoostPower = 0.5f;
488
489const int maxCacheSize = 32;
490
491float findVertexScore (Vertex& vert)
492{
493
494    if (vert.numActiveTris == 0)
495    {
496        // No tri needs this vertex!
497        return -1.0f;
498    }
499    float score = 0.0f;
500    int cachePosition = vert.cachePosition;
501
502    if (cachePosition < 0)
503    {
504        // Vertex is not in FIFO cache - no score.
505    }
506    else
507    {
508        if (cachePosition < 3)
509        {
510            // This vertex was used in the last triangle,
511            // so it has a fixed score, whichever of the three
512            // it's in. Otherwise, you can get very different
513            // answers depending on whether you add
514            // the triangle 1,2,3 or 3,1,2 - which is silly.
515            score = lastTriScore;
516        }
517        else
518        {
519            assert (cachePosition < maxCacheSize);
520            // Points for being high in the cache.
521            const float scaler = 1.0f / (maxCacheSize - 3);
522            score = 1.0f - (cachePosition - 3 ) * scaler;
523            score = powf(score, cacheDecayPower);
524        }
525    }
526    // Bonus points for having a low number of tris still to
527    // use the vert, so we get rid of lone verts quickly.
528    float valenceBoost = powf(vert.numActiveTris, -valenceBoostPower);
529    score += valenceBoostScale * valenceBoost;
530    return score;
531}
532
533
534typedef vector<Vertex> VertexList;
535
536struct Triangle
537{
538    float score;
539    unsigned verts[3];
540};
541
542typedef vector<Triangle> TriangleList;
543
544inline float findTriangleScore(Triangle& tri, const VertexList& vertices)
545{
546    float result = 0.0f;
547    for (int i = 0; i < 3; ++i)
548        result += vertices[tri.verts[i]].score;
549    return result;
550}
551
552typedef std::pair<unsigned, float> TriangleScore;
553
554TriangleScore computeTriScores(Vertex& vert, const VertexList& vertices,
555                               TriangleList& triangles, vector<unsigned>& triStore)
556{
557    float bestScore = 0.0;
558    unsigned bestTri;
559    for (size_t i = vert.triList;
560         i < vert.triList + vert.numActiveTris;
561         ++i)
562    {
563        unsigned tri = triStore[i];
564        float score = triangles[tri].score = findTriangleScore(triangles[tri],
565                                                               vertices);
566        if (score > bestScore)
567        {
568            bestScore = score;
569            bestTri = tri;
570        }
571    }
572    return make_pair(bestTri, bestScore);
573}
574
575typedef vector<Triangle> TriangleList;
576
577struct TriangleCounterOperator
578{
579    VertexList* vertices;
580    int triangleCount;
581    TriangleCounterOperator() : vertices(0), triangleCount(0) {}
582
583    void doVertex(unsigned p)
584    {
585        if (vertices->size() <= p)
586            vertices->resize(p + 1);
587        (*vertices)[p].trisUsing++;
588    }
589
590    void operator() (unsigned int p1, unsigned int p2, unsigned int p3)
591    {
592        if (p1 == p2 || p2 == p3 || p1 == p3)
593            return;
594        doVertex(p1);
595        doVertex(p2);
596        doVertex(p3);
597        triangleCount++;
598    }
599};
600
601struct TriangleCounter : public TriangleIndexFunctor<TriangleCounterOperator>
602{
603    TriangleCounter(vector<Vertex>* vertices_)
604    {
605        vertices = vertices_;
606    }
607};
608
609// Initialize the vertex triangle lists and the triangle data structures
610struct TriangleAddOperator
611{
612    VertexList* vertices;
613    vector<unsigned>* vertexTris;
614    TriangleList* triangles;
615    int triIdx;
616    TriangleAddOperator() : vertices(0), triIdx(0) {}
617
618    void doVertex(unsigned p)
619    {
620        (*vertexTris)[(*vertices)[p].triList + (*vertices)[p].numActiveTris++]
621            = triIdx;
622    }
623
624    void operator() (unsigned int p1, unsigned int p2, unsigned int p3)
625    {
626        if (p1 == p2 || p2 == p3 || p1 == p3)
627            return;
628        doVertex(p1);
629        doVertex(p2);
630        doVertex(p3);
631    (*triangles)[triIdx].verts[0] = p1;
632    (*triangles)[triIdx].verts[1] = p2;
633    (*triangles)[triIdx].verts[2] = p3;
634    triIdx++;
635    }
636};
637
638struct TriangleAdder : public TriangleIndexFunctor<TriangleAddOperator>
639{
640    TriangleAdder(VertexList* vertices_, vector<unsigned>* vertexTris_,
641                  TriangleList* triangles_)
642    {
643        vertices = vertices_;
644        vertexTris = vertexTris_;
645        triangles = triangles_;
646    }
647};
648
649struct CompareTriangle
650{
651    bool operator()(const Triangle& lhs, const Triangle& rhs)
652    {
653        return lhs.score < rhs.score;
654    }
655};
656}
657
658void VertexCacheVisitor::optimizeVertices(Geometry& geom)
659{
660    Array* vertArray = geom.getVertexArray();
661    if (!vertArray)
662        return;
663    unsigned vertArraySize = vertArray->getNumElements();
664    // If all the vertices fit in the cache, there's no point in
665    // doing this optimization.
666    if (vertArraySize <= 16)
667        return;
668    Geometry::PrimitiveSetList& primSets = geom.getPrimitiveSetList();
669    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
670             end = primSets.end();
671         itr != end;
672         ++itr)
673    {
674        // Can only deal with polygons.
675        switch ((*itr)->getMode())
676        {
677        case(PrimitiveSet::TRIANGLES):
678        case(PrimitiveSet::TRIANGLE_STRIP):
679        case(PrimitiveSet::TRIANGLE_FAN):
680        case(PrimitiveSet::QUADS):
681        case(PrimitiveSet::QUAD_STRIP):
682        case(PrimitiveSet::POLYGON):
683            break;
684        default:
685            return;
686        }
687        PrimitiveSet::Type type = (*itr)->getType();
688        if (type != PrimitiveSet::DrawElementsUBytePrimitiveType
689            && type != PrimitiveSet::DrawElementsUShortPrimitiveType
690            && type != PrimitiveSet::DrawElementsUIntPrimitiveType)
691            return;
692    }
693#if 0
694    VertexCacheMissVisitor missv;
695    missv.doGeometry(geom);
696    cout << "Before optimization: misses: " << missv.misses
697         << " triangles: " << missv.triangles << " acmr: ";
698    if (missv.triangles > 0.0)
699        cout << (double)missv.misses / (double)missv.triangles << "\n";
700    else
701        cout << "0.0\n";
702    missv.reset();
703#endif
704    vector<unsigned> newVertList;
705    doVertexOptimization(geom, newVertList);
706    Geometry::PrimitiveSetList newPrims;
707    if (vertArraySize < 65536)
708    {
709        osg::DrawElementsUShort* elements = new DrawElementsUShort(GL_TRIANGLES);
710        elements->reserve(newVertList.size());
711        for (vector<unsigned>::iterator itr = newVertList.begin(),
712                 end = newVertList.end();
713             itr != end;
714             ++itr)
715            elements->push_back((GLushort)*itr);
716        if (geom.getUseVertexBufferObjects())
717        {
718            elements->setElementBufferObject(new ElementBufferObject);
719        }
720        newPrims.push_back(elements);
721    }
722    else
723    {
724        osg::DrawElementsUInt* elements
725            = new DrawElementsUInt(GL_TRIANGLES, newVertList.begin(),
726                                   newVertList.end());
727        if (geom.getUseVertexBufferObjects())
728        {
729            elements->setElementBufferObject(new ElementBufferObject);
730        }
731        newPrims.push_back(elements);
732    }
733
734    geom.setPrimitiveSetList(newPrims);
735#if 0
736    missv.doGeometry(geom);
737    cout << "After optimization: misses: " << missv.misses
738         << " triangles: " << missv.triangles << " acmr: ";
739    if (missv.triangles > 0.0)
740        cout << (double)missv.misses / (double)missv.triangles << "\n";
741    else
742        cout << "0.0\n";
743#endif
744    geom.dirtyDisplayList();
745}
746
747// The main optimization loop
748void VertexCacheVisitor::doVertexOptimization(Geometry& geom,
749                                              vector<unsigned>& vertDrawList)
750{
751    Geometry::PrimitiveSetList& primSets = geom.getPrimitiveSetList();
752    // lists for all the vertices and triangles
753    VertexList vertices;
754    TriangleList triangles;
755    TriangleCounter triCounter(&vertices);
756    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
757             end = primSets.end();
758         itr != end;
759         ++itr)
760        (*itr)->accept(triCounter);
761    triangles.resize(triCounter.triangleCount);
762    // Get total of triangles used by all the vertices
763    size_t vertTrisSize = 0;
764    for (VertexList::iterator itr = vertices.begin(), end = vertices.end();
765         itr != end;
766         ++itr)
767    {
768        itr->triList = vertTrisSize;
769        vertTrisSize += itr->trisUsing;
770    }
771    // Store for lists of triangles (indices) used by the vertices
772    vector<unsigned> vertTriListStore(vertTrisSize);
773    TriangleAdder triAdder(&vertices, &vertTriListStore, &triangles);
774    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
775             end = primSets.end();
776         itr != end;
777         ++itr)
778        (*itr)->accept(triAdder);
779    // Set up initial scores for vertices and triangles
780    for (VertexList::iterator itr = vertices.begin(), end = vertices.end();
781         itr != end;
782         ++itr)
783        itr->score = findVertexScore(*itr);
784    for (TriangleList::iterator itr = triangles.begin(), end = triangles.end();
785         itr != end;
786         ++itr)
787    {
788        itr->score = 0.0f;
789        for (int i = 0; i < 3; ++i)
790            itr->score += vertices[itr->verts[i]].score;
791    }
792    // Add Triangles to the draw list until there are no more.
793    unsigned trisLeft = triangles.size();
794    vertDrawList.reserve(trisLeft * 3);
795    LRUCache cache(maxCacheSize);
796    while (trisLeft-- > 0)
797    {
798        Triangle* triToAdd = 0;
799        float bestScore = 0.0;
800        for (vector<unsigned>::const_iterator itr = cache.entries.begin(),
801                 end = cache.entries.end();
802             itr != end;
803             ++itr)
804        {
805            TriangleScore tscore =  computeTriScores(vertices[*itr], vertices,
806                                                     triangles, vertTriListStore);
807            if (tscore.second > bestScore)
808            {
809                bestScore = tscore.second;
810                triToAdd = &triangles[tscore.first];
811            }
812        }
813        if (!triToAdd)
814        {
815            // The cache was empty, or all the triangles that use
816            // vertices in the cache have already been added.
817            OSG_NOTIFY(osg::DEBUG_INFO) << "VertexCacheVisitor searching all triangles" << std::endl;
818            TriangleList::iterator maxItr
819                = max_element(triangles.begin(), triangles.end(),
820                              CompareTriangle());
821            triToAdd = &(*maxItr);
822        }
823        assert(triToAdd != 0 && triToAdd->score > 0.0);
824        // Add triangle vertices, and remove triangle from the
825        // vertices that use it.
826        triToAdd->score = -1.0f;
827        unsigned triToAddIdx = triToAdd - &triangles[0];
828        for (unsigned i = 0; i < 3; ++i)
829        {
830            unsigned vertIdx = triToAdd->verts[i];
831            Vertex* vert = &vertices[vertIdx];
832            vertDrawList.push_back(vertIdx);
833            remove(vertTriListStore.begin() + vert->triList,
834                   vertTriListStore.begin() + vert->triList
835                   + vert->numActiveTris,
836                   triToAddIdx);
837            vert->numActiveTris--;
838        }
839        // Assume that the three oldest cache entries will get kicked
840        // out, so reset their scores and those of their
841        // triangles. Otherwise we'll "lose" them and not be able to
842        // change their scores.
843        if (cache.maxSize - cache.entries.size() < 3)
844        {
845            for (vector<unsigned>::iterator itr = cache.entries.end() - 3,
846                     end = cache.entries.end();
847                 itr != end;
848                ++itr)
849            {
850                vertices[*itr].cachePosition = -1;
851                vertices[*itr].score = findVertexScore(vertices[*itr]);
852            }
853            for (vector<unsigned>::iterator itr = cache.entries.end() - 3,
854                     end = cache.entries.end();
855                 itr != end;
856                ++itr)
857                computeTriScores(vertices[*itr], vertices, triangles,
858                                 vertTriListStore);
859        }
860        cache.addEntries(&triToAdd->verts[0], &triToAdd->verts[3]);
861        for (vector<unsigned>::const_iterator itr = cache.entries.begin(),
862                 end = cache.entries.end();
863             itr != end;
864             ++itr)
865        {
866            unsigned vertIdx = *itr;
867            vertices[vertIdx].cachePosition = itr - cache.entries.begin();
868            vertices[vertIdx].score = findVertexScore(vertices[vertIdx]);
869        }
870     }
871}
872
873void VertexCacheVisitor::optimizeVertices()
874{
875    for(GeometryList::iterator itr=_geometryList.begin();
876        itr!=_geometryList.end();
877        ++itr)
878    {
879        optimizeVertices(*(*itr));
880    }
881}
882
883VertexCacheMissVisitor::VertexCacheMissVisitor(unsigned cacheSize)
884    : osg::NodeVisitor(NodeVisitor::TRAVERSE_ALL_CHILDREN), misses(0),
885      triangles(0), _cacheSize(cacheSize)
886{
887}
888
889void VertexCacheMissVisitor::reset()
890{
891    misses = 0;
892    triangles = 0;
893}
894
895void VertexCacheMissVisitor::apply(Geode& geode)
896{
897    for(unsigned int i = 0; i < geode.getNumDrawables(); ++i )
898    {
899        osg::Geometry* geom = dynamic_cast<osg::Geometry*>(geode.getDrawable(i));
900        if (geom)
901            doGeometry(*geom);
902    }
903}
904
905namespace
906{
907// The cache miss algorithm models an LRU cache because it results in an
908// order that is insensitive to the actual cache size of the GPU. Real
909// GPUs use a FIFO cache, so the statistics gatherer simulates that.
910struct FIFOCache
911{
912    FIFOCache(size_t maxSize_) : maxSize(maxSize_)
913    {
914        entries.reserve(maxSize_);
915    }
916    vector<unsigned> entries;
917    size_t maxSize;
918
919    // Add a list of values to the cache
920    void addEntries(unsigned* begin, unsigned* end)
921    {
922        // Make room for new cache entries
923        size_t newEnts = end - begin;
924        if (entries.size() < maxSize)
925            entries.resize(osg::minimum(entries.size() + newEnts, maxSize));
926        vector<unsigned>::iterator copyEnd = entries.end() - newEnts;
927        copy_backward(entries.begin(), copyEnd, entries.end());
928        copy(begin, end, entries.begin());
929    }
930
931    bool empty()
932    {
933        return entries.empty();
934    }
935};
936
937// Insert vertices in a cache and record cache misses
938struct CacheRecordOperator
939{
940    CacheRecordOperator() : cache(0), misses(0), triangles(0) {}
941    FIFOCache* cache;
942    unsigned misses;
943    unsigned triangles;
944    void operator()(unsigned p1, unsigned p2, unsigned p3)
945    {
946        unsigned verts[3];
947        verts[0] = p1;
948        verts[1] = p2;
949        verts[2] = p3;
950        triangles++;
951        for (int i = 0; i < 3; ++i)
952        {
953            if (find(cache->entries.begin(), cache->entries.end(), verts[i])
954                == cache->entries.end())
955                misses++;
956        }
957        cache->addEntries(&verts[0], &verts[3]);
958    }
959};
960
961struct CacheRecorder : public TriangleIndexFunctor<CacheRecordOperator>
962{
963    CacheRecorder(unsigned cacheSize)
964    {
965        cache = new FIFOCache(cacheSize);
966    }
967
968    ~CacheRecorder()
969    {
970        delete cache;
971    }
972};
973}
974
975void VertexCacheMissVisitor::doGeometry(Geometry& geom)
976{
977    Array* vertArray = geom.getVertexArray();
978    if (!vertArray)
979        return;
980    Geometry::PrimitiveSetList& primSets = geom.getPrimitiveSetList();
981    CacheRecorder recorder(_cacheSize);
982    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
983             end = primSets.end();
984         itr != end;
985         ++itr)
986    {
987        (*itr)->accept(recorder);
988    }
989    misses += recorder.misses;
990    triangles += recorder.triangles;
991}
992
993namespace
994{
995// Move the values in an array to new positions, based on the
996// remapping table. remapping[i] contains element i's new position, if
997// any.  Unlike RemapArray in TriStripVisitor, this code doesn't
998// assume that elements only move downward in the array.
999class Remapper : public osg::ArrayVisitor
1000{
1001public:
1002    static const unsigned invalidIndex;
1003    Remapper(const vector<unsigned>& remapping)
1004        : _remapping(remapping), _newsize(0)
1005    {
1006        for (vector<unsigned>::const_iterator itr = _remapping.begin(),
1007                 end = _remapping.end();
1008             itr != end;
1009             ++itr)
1010            if (*itr != invalidIndex)
1011                ++_newsize;
1012    }
1013
1014    const vector<unsigned>& _remapping;
1015    size_t _newsize;
1016
1017    template<class T>
1018    inline void remap(T& array)
1019    {
1020        ref_ptr<T> newarray = new T(_newsize);
1021        T* newptr = newarray.get();
1022        for (size_t i = 0; i < array.size(); ++i)
1023            if (_remapping[i] != invalidIndex)
1024                (*newptr)[_remapping[i]] = array[i];
1025        array.swap(*newptr);
1026
1027    }
1028
1029    virtual void apply(osg::Array&) {}
1030    virtual void apply(osg::ByteArray& array) { remap(array); }
1031    virtual void apply(osg::ShortArray& array) { remap(array); }
1032    virtual void apply(osg::IntArray& array) { remap(array); }
1033    virtual void apply(osg::UByteArray& array) { remap(array); }
1034    virtual void apply(osg::UShortArray& array) { remap(array); }
1035    virtual void apply(osg::UIntArray& array) { remap(array); }
1036    virtual void apply(osg::Vec4ubArray& array) { remap(array); }
1037    virtual void apply(osg::FloatArray& array) { remap(array); }
1038    virtual void apply(osg::Vec2Array& array) { remap(array); }
1039    virtual void apply(osg::Vec3Array& array) { remap(array); }
1040    virtual void apply(osg::Vec4Array& array) { remap(array); }
1041};
1042
1043const unsigned Remapper::invalidIndex = std::numeric_limits<unsigned>::max();
1044
1045// Record the order in which vertices in a Geometry are used.
1046struct VertexReorderOperator
1047{
1048    unsigned seq;
1049    vector<unsigned> remap;
1050
1051    VertexReorderOperator() : seq(0)
1052    {
1053    }
1054   
1055    void inline doVertex(unsigned v)
1056    {
1057        if (remap[v] == Remapper::invalidIndex)
1058            remap[v] = seq++;
1059    }
1060    void operator()(unsigned p1, unsigned p2, unsigned p3)
1061    {
1062        doVertex(p1);
1063        doVertex(p2);
1064        doVertex(p3);
1065    }
1066};
1067
1068struct VertexReorder : public TriangleIndexFunctor<VertexReorderOperator>
1069{
1070    VertexReorder(unsigned numVerts)
1071    {
1072        remap.resize(numVerts, Remapper::invalidIndex);
1073    }
1074   
1075};
1076}
1077
1078void VertexAccessOrderVisitor::optimizeOrder()
1079{
1080    for (GeometryList::iterator itr = _geometryList.begin(), end = _geometryList.end();
1081         itr != end;
1082         ++itr)
1083    {
1084        optimizeOrder(*(*itr));
1085    }
1086}
1087
1088template<typename DE>
1089inline void reorderDrawElements(DE& drawElements,
1090                                const vector<unsigned>& reorder)
1091{
1092    for (typename DE::iterator itr = drawElements.begin(), end = drawElements.end();
1093         itr != end;
1094         ++itr)
1095    {
1096        *itr = static_cast<typename DE::value_type>(reorder[*itr]);
1097    }
1098}
1099
1100void VertexAccessOrderVisitor::optimizeOrder(Geometry& geom)
1101{
1102    Array* vertArray = geom.getVertexArray();
1103    if (!vertArray)
1104        return;
1105    Geometry::PrimitiveSetList& primSets = geom.getPrimitiveSetList();
1106    GeometryArrayGatherer gatherer(geom);
1107    if (!gatherer._useDrawElements)
1108        return;
1109    VertexReorder vr(vertArray->getNumElements());
1110    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
1111             end = primSets.end();
1112         itr != end;
1113         ++itr)
1114    {
1115        PrimitiveSet* ps = itr->get();
1116        PrimitiveSet::Type type = ps->getType();
1117        if (type != PrimitiveSet::DrawElementsUBytePrimitiveType
1118            && type != PrimitiveSet::DrawElementsUShortPrimitiveType
1119            && type != PrimitiveSet::DrawElementsUIntPrimitiveType)
1120            return;
1121        ps->accept(vr);
1122    }
1123    Remapper remapper(vr.remap);
1124    gatherer.accept(remapper);
1125    for (Geometry::PrimitiveSetList::iterator itr = primSets.begin(),
1126             end = primSets.end();
1127         itr != end;
1128         ++itr)
1129    {
1130        PrimitiveSet* ps = itr->get();
1131        switch (ps->getType())
1132        {
1133        case PrimitiveSet::DrawElementsUBytePrimitiveType:
1134            reorderDrawElements(*static_cast<DrawElementsUByte*>(ps), vr.remap);
1135            break;
1136        case PrimitiveSet::DrawElementsUShortPrimitiveType:
1137            reorderDrawElements(*static_cast<DrawElementsUShort*>(ps), vr.remap);
1138            break;
1139        case PrimitiveSet::DrawElementsUIntPrimitiveType:
1140            reorderDrawElements(*static_cast<DrawElementsUInt*>(ps), vr.remap);
1141            break;
1142        default:
1143            break;
1144        }
1145    }
1146    geom.dirtyDisplayList();
1147}
1148}
Note: See TracBrowser for help on using the browser.