From 4128bcf0737d58a222c1669c459f3f8b0e19b89b Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Tue, 25 Mar 2014 16:24:08 -0400
Subject: [PATCH] Template the number of buckets during error calc so the
 compiler can unroll...

---
 BPTCEncoder/src/RGBAEndpoints.cpp | 23 +++++++++++++++++++----
 BPTCEncoder/src/RGBAEndpoints.h   | 17 ++++++++++++++++-
 2 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp
index c792051..ca365af 100755
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@@ -217,16 +217,16 @@ uint32 RGBAVector::ToPixel(const uint32 channelMask, const int pBit) const {
 //
 ///////////////////////////////////////////////////////////////////////////////
 
+template<const uint8 nBuckets>
 double RGBACluster::QuantizedError(
   const RGBAVector &p1, const RGBAVector &p2,
-  uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+  uint32 bitMask, const RGBAVector &errorMetricVec,
   const int pbits[2], uint8 *indices
 ) const {
 
   // nBuckets should be a power of two.
-  assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
-
-  const uint8 indexPrec = (nBuckets == 3)? 3 : 8-CountBitsInMask(~(nBuckets - 1));
+  assert(!(nBuckets & (nBuckets - 1)));
+  const uint8 indexPrec = 8-CountBitsInMask(~(nBuckets - 1));
   
   typedef uint32 tInterpPair[2];
   typedef tInterpPair tInterpLevel[16];
@@ -295,6 +295,21 @@ double RGBACluster::QuantizedError(
   return totalError;
 }
 
+template double RGBACluster::QuantizedError<4>(
+  const RGBAVector &p1, const RGBAVector &p2,
+  uint32 bitMask, const RGBAVector &errorMetricVec,
+  const int pbits[2], uint8 *indices) const;
+
+template double RGBACluster::QuantizedError<8>(
+  const RGBAVector &p1, const RGBAVector &p2,
+  uint32 bitMask, const RGBAVector &errorMetricVec,
+  const int pbits[2], uint8 *indices) const;
+
+template double RGBACluster::QuantizedError<16>(
+  const RGBAVector &p1, const RGBAVector &p2,
+  uint32 bitMask, const RGBAVector &errorMetricVec,
+  const int pbits[2], uint8 *indices) const;
+
 uint32 RGBACluster::GetPrincipalAxis(RGBADir &axis, float *eigOne, float *eigTwo) const {
 
   // We use these vectors for calculating the covariance matrix...
diff --git a/BPTCEncoder/src/RGBAEndpoints.h b/BPTCEncoder/src/RGBAEndpoints.h
index cce0d01..eca39f0 100755
--- a/BPTCEncoder/src/RGBAEndpoints.h
+++ b/BPTCEncoder/src/RGBAEndpoints.h
@@ -71,6 +71,7 @@
 #include "Matrix4x4.h"
 
 #include <algorithm>
+#include <cassert>
 #include <cmath>
 #include <cfloat>
 #include <cstring>
@@ -164,11 +165,25 @@ public:
 
   // Returns the error if we were to quantize the colors right now with the
   // given number of buckets and bit mask.
+  template<const uint8 nBuckets>
   double QuantizedError(
     const RGBAVector &p1, const RGBAVector &p2,
-    uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+    uint32 bitMask, const RGBAVector &errorMetricVec,
     const int pbits[2] = NULL, uint8 *indices = NULL) const;
 
+  double QuantizedError(
+    const RGBAVector &p1, const RGBAVector &p2,
+    uint32 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+    const int pbits[2] = NULL, uint8 *indices = NULL) const {
+    switch(nBuckets) {
+      case 4: return QuantizedError<4>(p1, p2, bitMask, errorMetricVec, pbits, indices);
+      case 8: return QuantizedError<8>(p1, p2, bitMask, errorMetricVec, pbits, indices);
+      case 16: return QuantizedError<16>(p1, p2, bitMask, errorMetricVec, pbits, indices);
+    }
+    assert(!"Unsupported num buckets");
+    return std::numeric_limits<double>::max();
+  }
+
   bool AllSamePoint() const { return m_Max == m_Min; }
 
   // Returns the principal axis for this point cluster.