never executed always true always false
    1 {-|
    2 Module: Flaw.Visual.Geometry.CacheOptimization
    3 Description: Geometry cache optimization algorithms.
    4 License: MIT
    5 
    6 Two cache optimization algorithms are implemented so far.
    7 In preferred order of application:
    8 
    9 * 'optimizeGeometryIndicesLocality' reorders triangles trying to use post-transform
   10 vertex cache as efficiently as possible.
   11 * 'optimizeGeometryVerticesLocality' reorders vertices (remapping indices accordingly, but not reordering triangles)
   12 trying to optimize pre-transform vertex cache usage.
   13 -}
   14 
   15 module Flaw.Visual.Geometry.CacheOptimization
   16   ( optimizeGeometryIndicesLocality
   17   , optimizeGeometryVerticesLocality
   18   , optimizeGeometryLocality
   19   ) where
   20 
   21 import Control.Monad
   22 import Control.Monad.ST
   23 import qualified Data.Vector.Generic as VG
   24 import qualified Data.Vector.Generic.Mutable as VGM
   25 import qualified Data.Vector.Unboxed as VU
   26 import qualified Data.Vector.Unboxed.Mutable as VUM
   27 
   28 cacheWeights :: VU.Vector Float
   29 cacheWeights = VU.fromList $ 0.75 : 0.75 : 0.75 : [pwr (1 - (i - 3) / 28) | i <- [3..31]] where
   30   pwr i = i * sqrt i
   31 
   32 -- | Optimize usage of vertex post-transform cache by manipulating order of triangles.
   33 -- "Linear-Speed Vertex Cache Optimisation" by Tom Forsyth
   34 -- https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html
   35 optimizeGeometryIndicesLocality :: (VG.Vector v i, Integral i) => v i -> v i
   36 optimizeGeometryIndicesLocality indices = runST $ do
   37 
   38   let
   39     indicesCount = VG.length indices
   40     trianglesCount = indicesCount `quot` 3
   41     verticesCount = fromIntegral $ VG.maximum indices + 1
   42 
   43   -- count triangles for each vertex
   44   verticesTrianglesCounts <- VUM.replicate verticesCount 0
   45   let
   46     f i = when (i < indicesCount) $ do
   47       VGM.unsafeModify verticesTrianglesCounts (+ 1) $ fromIntegral $ indices VG.! i
   48       f $ i + 1
   49     in f 0
   50 
   51   -- create lists of triangles per vertex
   52   verticesTrianglesPtrs <- VUM.new verticesCount
   53   let
   54     f p i = when (i < verticesCount) $ do
   55       VGM.unsafeWrite verticesTrianglesPtrs i p
   56       vertexTrianglesCount <- VGM.unsafeRead verticesTrianglesCounts i
   57       f (p + vertexTrianglesCount) (i + 1)
   58     in f 0 0
   59   verticesTrianglesOffsets <- VU.freeze verticesTrianglesPtrs
   60   verticesTriangles <- VUM.new indicesCount
   61   let
   62     f i = when (i < indicesCount) $ do
   63       let v = fromIntegral $ indices VG.! i
   64       o <- VGM.unsafeRead verticesTrianglesPtrs v
   65       VGM.unsafeWrite verticesTriangles o (i `quot` 3)
   66       VGM.unsafeWrite verticesTrianglesPtrs v $ o + 1
   67       f $ i + 1
   68     in f 0
   69 
   70   -- post-transform vertex cache
   71   let
   72     cacheSize = VG.length cacheWeights
   73   cache <- VUM.replicate (cacheSize + 3) (-1)
   74 
   75   let
   76     vertexValenceBoost i = do
   77       vertexTrianglesCount <- VGM.unsafeRead verticesTrianglesCounts i
   78       return $ if vertexTrianglesCount > 0 then 2 / sqrt (fromIntegral vertexTrianglesCount) else 0
   79 
   80   -- initial vertex scores
   81   verticesScores <- VUM.new verticesCount
   82   let
   83     f i = when (i < verticesCount) $ do
   84       VUM.unsafeWrite verticesScores i =<< vertexValenceBoost i
   85       f $ i + 1
   86     in f 0
   87 
   88   -- initial triangle scores
   89   trianglesScores <- VUM.new trianglesCount
   90   let
   91     updateTriangleScore i = do
   92       a <- VGM.unsafeRead verticesScores $ fromIntegral $ indices VG.! (i * 3)
   93       b <- VGM.unsafeRead verticesScores $ fromIntegral $ indices VG.! (i * 3 + 1)
   94       c <- VGM.unsafeRead verticesScores $ fromIntegral $ indices VG.! (i * 3 + 2)
   95       VGM.unsafeWrite trianglesScores i $ a + b + c
   96   let
   97     f i = when (i < trianglesCount) $ do
   98       updateTriangleScore i
   99       f $ i + 1
  100     in f 0
  101 
  102   -- interval tree of triangles
  103   -- calculate size of the last level which is nearest power of two for triangles count
  104   let
  105     intervalTreeSize = let
  106       f i = if i < trianglesCount then f $ i * 2 else i
  107       in f 2
  108   intervalTree <- VUM.replicate (intervalTreeSize - 1) (-1)
  109   -- update node function
  110   let
  111     updateIntervalTreeNode i = do
  112       let
  113         lastLevel = i * 2 + 1 >= intervalTreeSize - 1
  114       a <-
  115         if lastLevel then let
  116           t = i * 2 + 1 - intervalTreeSize + 1
  117           in return $ if t < trianglesCount then t else (-1)
  118         else VGM.unsafeRead intervalTree (i * 2 + 1)
  119       b <-
  120         if lastLevel then let
  121           t = i * 2 + 2 - intervalTreeSize + 1
  122           in return $ if t < trianglesCount then t else (-1)
  123         else VGM.unsafeRead intervalTree (i * 2 + 2)
  124       as <- if a >= 0 then VGM.unsafeRead trianglesScores a else return 0
  125       bs <- if b >= 0 then VGM.unsafeRead trianglesScores b else return 0
  126       VGM.unsafeWrite intervalTree i $ if as > bs then a else b
  127   -- initial update
  128   let
  129     f i = when (i >= 0) $ do
  130       updateIntervalTreeNode i
  131       f $ i - 1
  132     in f $ intervalTreeSize - 2
  133 
  134   -- emit new indices
  135   newIndices <- VGM.new $ trianglesCount * 3
  136   let
  137     step triangleIndex = when (triangleIndex < trianglesCount) $ do
  138       -- get triangle with highest score
  139       triangle <- VGM.unsafeRead intervalTree 0
  140 
  141       -- set triangle score to 0, so it's not selected anymore
  142       VGM.unsafeWrite trianglesScores triangle 0
  143       -- emit indices
  144       let
  145         a = indices VG.! (triangle * 3)
  146         b = indices VG.! (triangle * 3 + 1)
  147         c = indices VG.! (triangle * 3 + 2)
  148       VGM.unsafeWrite newIndices (triangleIndex * 3) a
  149       VGM.unsafeWrite newIndices (triangleIndex * 3 + 1) b
  150       VGM.unsafeWrite newIndices (triangleIndex * 3 + 2) c
  151       -- remove triangle from triangle lists of its vertices
  152       let
  153         removeTriangleFromVertexTriangles v = do
  154           let
  155             vertexTrianglesOffset = verticesTrianglesOffsets VG.! v
  156           vertexTrianglesCount <- VGM.unsafeRead verticesTrianglesCounts v
  157           let
  158             f i tc = if i >= tc then return tc else do
  159               t <- VGM.unsafeRead verticesTriangles $ vertexTrianglesOffset + i
  160               if t == triangle then do
  161                 when (i < tc - 1) $
  162                   VGM.unsafeWrite verticesTriangles (vertexTrianglesOffset + i) =<< VGM.unsafeRead verticesTriangles (vertexTrianglesOffset + tc - 1)
  163                 f i (tc - 1)
  164               else f (i + 1) tc
  165             in VGM.unsafeWrite verticesTrianglesCounts v =<< f 0 vertexTrianglesCount
  166       removeTriangleFromVertexTriangles $ fromIntegral a
  167       removeTriangleFromVertexTriangles $ fromIntegral b
  168       removeTriangleFromVertexTriangles $ fromIntegral c
  169       -- make room in cache
  170       let
  171         f i = when (i >= 0) $ do
  172           VGM.unsafeWrite cache (i + 3) =<< VGM.unsafeRead cache i
  173           f $ i - 1
  174         in f $ cacheSize - 1
  175       -- put triangle's vertices on top
  176       VGM.unsafeWrite cache 0 $ fromIntegral a
  177       VGM.unsafeWrite cache 1 $ fromIntegral b
  178       VGM.unsafeWrite cache 2 $ fromIntegral c
  179       -- remove triangle's vertices from the rest of the cache (in case they were in the cache already)
  180       cacheEnd <- let
  181         f p i = if i >= cacheSize + 3 then return p else do
  182           v <- VGM.unsafeRead cache i
  183           if v == fromIntegral a || v == fromIntegral b || v == fromIntegral c then f p (i + 1)
  184           else do
  185             when (p < i) $ VGM.unsafeWrite cache p v
  186             f (p + 1) (i + 1)
  187         in f 3 3
  188       -- update cached vertices' and their's triangles' scores and nodes
  189       let
  190         f i = when (i < cacheEnd) $ do
  191           v <- VGM.unsafeRead cache i
  192           when (v >= 0) $ do
  193             valenceBoost <- vertexValenceBoost v
  194             VGM.unsafeWrite verticesScores v $ (if i < cacheSize then cacheWeights VG.! i else 0) + valenceBoost
  195             vertexTrianglesCount <- VGM.unsafeRead verticesTrianglesCounts v
  196             let
  197               vertexTrianglesOffset = verticesTrianglesOffsets VG.! v
  198               u j = when (j < vertexTrianglesCount) $ do
  199                 t <- VGM.unsafeRead verticesTriangles $ vertexTrianglesOffset + j
  200                 updateTriangleScore t
  201                 let
  202                   g k = do
  203                     updateIntervalTreeNode k
  204                     when (k > 0) $ g $ (k - 1) `quot` 2
  205                   in g $ (t + intervalTreeSize - 1 - 1) `quot` 2
  206                 u $ j + 1
  207               in u 0
  208         in f 0
  209       -- update removed triangle's nodes
  210       let
  211         g k = do
  212           updateIntervalTreeNode k
  213           when (k > 0) $ g $ (k - 1) `quot` 2
  214         in g $ (triangle + intervalTreeSize - 1 - 1) `quot` 2
  215 
  216       -- repeat
  217       step $ triangleIndex + 1
  218     in step 0
  219 
  220   VG.unsafeFreeze newIndices
  221 
  222 -- | Optimize usage of vertex pre-transform cache by manipulating order of vertices.
  223 optimizeGeometryVerticesLocality :: (VG.Vector va a, VG.Vector vi i, Integral i) => va a -> vi i -> (va a, vi i)
  224 optimizeGeometryVerticesLocality vertices indices = runST $ do
  225   let
  226     verticesCount = VG.length vertices
  227     indicesCount = VG.length indices
  228   vertexMap <- VUM.replicate verticesCount verticesCount
  229   newVertices <- VGM.new verticesCount
  230   newIndices <- VGM.new indicesCount
  231   newVerticesCount <- let
  232     f p i = if i >= indicesCount then return p else do
  233       let
  234         v = fromIntegral $ indices VG.! i
  235       remappedVertex <- VGM.unsafeRead vertexMap v
  236       if remappedVertex < verticesCount then do
  237         VGM.unsafeWrite newIndices i $ fromIntegral remappedVertex
  238         f p (i + 1)
  239       else do
  240         VGM.unsafeWrite vertexMap v p
  241         VGM.unsafeWrite newVertices p $ vertices VG.! v
  242         VGM.unsafeWrite newIndices i $ fromIntegral p
  243         f (p + 1) (i + 1)
  244     in f 0 0
  245   freezedNewVertices <- VG.unsafeFreeze $ VGM.slice 0 newVerticesCount newVertices
  246   freezedNewIndices <- VG.unsafeFreeze newIndices
  247   return (freezedNewVertices, freezedNewIndices)
  248 
  249 -- | Perform both optimization of vertices and indices.
  250 optimizeGeometryLocality :: (VG.Vector va a, VG.Vector vi i, Integral i) => va a -> vi i -> (va a, vi i)
  251 optimizeGeometryLocality vertices indices = optimizeGeometryVerticesLocality vertices (optimizeGeometryIndicesLocality indices)