From f12ee09f7e94b5a6b4e7dfc8e09faadae55206c8 Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Tue, 21 Jan 2014 14:46:25 -0500
Subject: [PATCH 1/3] Some formatting and rearrange the BPTC code to be more
 structured like the others

---
 BPTCEncoder/CMakeLists.txt                    |   22 +-
 BPTCEncoder/config/BC7Config.h.in             |   56 -
 BPTCEncoder/config/BPTCConfig.h.in            |   65 +
 .../{BC7Compressor.h => BPTCCompressor.h}     |   24 +-
 ...BC7CompressionMode.h => CompressionMode.h} |   24 +-
 ...essionModeSIMD.h => CompressionModeSIMD.h} |    0
 .../src/{BC7Compressor.cpp => Compressor.cpp} | 2132 +++++++++--------
 ...7CompressorSIMD.cpp => CompressorSIMD.cpp} |    0
 BPTCEncoder/src/ParallelStage.cpp             |    4 +-
 BPTCEncoder/src/ParallelStage.h               |   59 +-
 BPTCEncoder/src/RGBAEndpoints.cpp             |   90 +-
 Base/include/CompressionJob.h                 |   57 +-
 CLTool/src/tc.cpp                             |    2 +-
 CMakeLists.txt                                |   51 +-
 Core/src/CompressedImage.cpp                  |   55 +-
 Core/src/TexComp.cpp                          |   22 +-
 Core/src/ThreadGroup.cpp                      |    2 +-
 Core/src/WorkerQueue.cpp                      |    2 +-
 18 files changed, 1369 insertions(+), 1298 deletions(-)
 delete mode 100644 BPTCEncoder/config/BC7Config.h.in
 create mode 100644 BPTCEncoder/config/BPTCConfig.h.in
 rename BPTCEncoder/include/{BC7Compressor.h => BPTCCompressor.h} (91%)
 rename BPTCEncoder/src/{BC7CompressionMode.h => CompressionMode.h} (96%)
 rename BPTCEncoder/src/{BC7CompressionModeSIMD.h => CompressionModeSIMD.h} (100%)
 rename BPTCEncoder/src/{BC7Compressor.cpp => Compressor.cpp} (58%)
 rename BPTCEncoder/src/{BC7CompressorSIMD.cpp => CompressorSIMD.cpp} (100%)

diff --git a/BPTCEncoder/CMakeLists.txt b/BPTCEncoder/CMakeLists.txt
index 6b1b125..6581238 100644
--- a/BPTCEncoder/CMakeLists.txt
+++ b/BPTCEncoder/CMakeLists.txt
@@ -47,7 +47,7 @@ INCLUDE_DIRECTORIES(${FasTC_BINARY_DIR}/BPTCEncoder/include)
 
 IF(NOT "" STREQUAL "${AVPCLLIB_ROOT}")
   INCLUDE_DIRECTORIES(${AVPCLLIB_INCLUDE_DIR})
-  SET(FOUND_NVTT_BC7_EXPORT TRUE)
+  SET(FOUND_NVTT_BPTC_EXPORT TRUE)
 ENDIF()
 
 INCLUDE(CheckCXXSourceRuns)
@@ -57,9 +57,11 @@ IF( NOT HAS_INLINE_ASSEMBLY AND NOT HAS_INLINE_ASSEMBLY_WITH_FLAGS )
 ENDIF()
 
 # Check to see whether or not our compiler supports atomic operations
-IF( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" )
+IF( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR
+	"${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" )
   SET( COMPILER_CLANG True )
-ELSEIF( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" )
+ELSEIF( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
+		"${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" )
   SET( COMPILER_GNU True )
 ENDIF()
 
@@ -96,19 +98,21 @@ IF( HAS_MSVC_ATOMICS OR HAS_GCC_ATOMICS )
 ENDIF()
 
 CONFIGURE_FILE(
-  "config/BC7Config.h.in"
-  "include/BC7Config.h"
+  "config/BPTCConfig.h.in"
+  "include/BPTCConfig.h"
 )
 
 SET( HEADERS
-  src/BC7CompressionMode.h
+  config/BPTCConfig.h.in
+  include/BPTCCompressor.h
+  src/CompressionMode.h
   src/BitStream.h
   src/RGBAEndpoints.h
   src/ParallelStage.h
 )
 
 SET( SOURCES
-  src/BC7Compressor.cpp
+  src/Compressor.cpp
   src/RGBAEndpoints.cpp
   src/ParallelStage.cpp
 )
@@ -132,12 +136,12 @@ IF( HAS_SSE_41 )
   SET( HEADERS
 	${HEADERS}
 	src/RGBAEndpointsSIMD.h
-	src/BC7CompressionModeSIMD.h
+	src/CompressionModeSIMD.h
   )
 
   SET( SOURCES
 	${SOURCES}
-	src/BC7CompressorSIMD.cpp
+	src/CompressorSIMD.cpp
 	src/RGBAEndpointsSIMD.cpp
   )
 ENDIF( HAS_SSE_41 )
diff --git a/BPTCEncoder/config/BC7Config.h.in b/BPTCEncoder/config/BC7Config.h.in
deleted file mode 100644
index 5a3ae81..0000000
--- a/BPTCEncoder/config/BC7Config.h.in
+++ /dev/null
@@ -1,56 +0,0 @@
-/* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational, 
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
- * in all copies.
- *
- * Permission to incorporate this software into commercial products may be obtained by contacting the 
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
- *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
- * The software program and documentation are supplied "as is," without any accompanying services from the 
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
- * end-user understands that the program was developed for research purposes and is advised not to rely 
- * exclusively on the program for any reason.
- *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
- * ENHANCEMENTS, OR MODIFICATIONS.
- *
- * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
- *
- * The authors may be contacted via:
- *
- * Pavel Krajcevski
- * Dept of Computer Science
- * 201 S Columbia St
- * Frederick P. Brooks, Jr. Computer Science Bldg
- * Chapel Hill, NC 27599-3175
- * USA
- * 
- * <http://gamma.cs.unc.edu/FasTC/>
- */
-
-// BC7Config.h.in  -- This file contains variables that are introduced
-// explicitly by the CMake build process.
-
-// Do we have the proper popcnt instruction defined?
-#cmakedefine NO_INLINE_ASSEMBLY
-#cmakedefine HAS_SSE_POPCNT
-#cmakedefine HAS_SSE_41
-
-#cmakedefine HAS_ATOMICS
-#cmakedefine HAS_GCC_ATOMICS
-#cmakedefine HAS_MSVC_ATOMICS
-
-#cmakedefine FOUND_NVTT_BC7_EXPORT
diff --git a/BPTCEncoder/config/BPTCConfig.h.in b/BPTCEncoder/config/BPTCConfig.h.in
new file mode 100644
index 0000000..a3d2570
--- /dev/null
+++ b/BPTCEncoder/config/BPTCConfig.h.in
@@ -0,0 +1,65 @@
+/* FasTC
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
+ *
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ *
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
+ *
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
+ *
+ * The authors may be contacted via:
+ *
+ * Pavel Krajcevski
+ * Dept of Computer Science
+ * 201 S Columbia St
+ * Frederick P. Brooks, Jr. Computer Science Bldg
+ * Chapel Hill, NC 27599-3175
+ * USA
+ * 
+ * <http://gamma.cs.unc.edu/FasTC/>
+ */
+
+// BPTCConfig.h.in  -- This file contains variables that are introduced
+// explicitly by the CMake build process.
+
+// Do we have the proper popcnt instruction defined?
+#cmakedefine NO_INLINE_ASSEMBLY
+#cmakedefine HAS_SSE_POPCNT
+#cmakedefine HAS_SSE_41
+
+#cmakedefine HAS_ATOMICS
+#cmakedefine HAS_GCC_ATOMICS
+#cmakedefine HAS_MSVC_ATOMICS
+
+#cmakedefine FOUND_NVTT_BC7_EXPORT
diff --git a/BPTCEncoder/include/BC7Compressor.h b/BPTCEncoder/include/BPTCCompressor.h
similarity index 91%
rename from BPTCEncoder/include/BC7Compressor.h
rename to BPTCEncoder/include/BPTCCompressor.h
index 4eb43fc..4d69741 100755
--- a/BPTCEncoder/include/BC7Compressor.h
+++ b/BPTCEncoder/include/BPTCCompressor.h
@@ -1,5 +1,5 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
  * All rights reserved.
  *
  * Permission to use, copy, modify, and distribute this software and its
@@ -73,15 +73,15 @@
 //
 //------------------------------------------------------------------------------
 
-#ifndef BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_
-#define BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_
+#ifndef BPTCENCODER_INCLUDE_BPTCCOMPRESSOR_H_
+#define BPTCENCODER_INCLUDE_BPTCCOMPRESSOR_H_
 
-#include "BC7Config.h"
+#include "BPTCConfig.h"
 #include "CompressionJob.h"
 
 #include <iosfwd>
 
-namespace BC7C {
+namespace BPTCC {
   // This is the error metric that is applied to our error measurement algorithm
   // in order to bias calculation towards results that are more in-line with
   // how the Human Visual System works. Uniform error means that each color
@@ -112,7 +112,7 @@ namespace BC7C {
   void SetQualityLevel(int q);
   int GetQualityLevel();
 
-  // Compress the image given as RGBA data to BC7 format. Width and Height are
+  // Compress the image given as RGBA data to BPTC format. Width and Height are
   // the dimensions of the image in pixels.
   void Compress(const FasTC::CompressionJob &);
 
@@ -123,10 +123,10 @@ namespace BC7C {
   void CompressWithStats(const FasTC::CompressionJob &, std::ostream *logStream);
 
 #ifdef HAS_SSE_41
-  // Compress the image given as RGBA data to BC7 format using an algorithm
+  // Compress the image given as RGBA data to BPTC format using an algorithm
   // optimized for SIMD enabled platforms. Width and Height are the dimensions
   // of the image in pixels.
-  void CompressImageBC7SIMD(const unsigned char* inBuf, unsigned char* outBuf,
+  void CompressImageBPTCSIMD(const unsigned char* inBuf, unsigned char* outBuf,
                             unsigned int width, unsigned int height);
 #endif
 
@@ -138,7 +138,7 @@ namespace BC7C {
   void CompressAtomic(FasTC::CompressionJobList &);
 #endif
 
-#ifdef FOUND_NVTT_BC7_EXPORT
+#ifdef FOUND_NVTT_BPTC_EXPORT
   // These functions take the same arguments as Compress and CompressWithStats,
   // but they use the NVTT compressor if it was supplied to CMake.
   void CompressNVTT(const FasTC::CompressionJob &);
@@ -146,9 +146,9 @@ namespace BC7C {
                              std::ostream *logStream);
 #endif
 
-  // Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height
+  // Decompress the image given as BPTC data to R8G8B8A8 format. Width and Height
   // are the dimensions of the image in pixels.
   void Decompress(const FasTC::DecompressionJob &);
-}  // namespace BC7C
+}  // namespace BPTCC
 
-#endif  // BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_
+#endif  // BPTCENCODER_INCLUDE_BPTCCOMPRESSOR_H_
diff --git a/BPTCEncoder/src/BC7CompressionMode.h b/BPTCEncoder/src/CompressionMode.h
similarity index 96%
rename from BPTCEncoder/src/BC7CompressionMode.h
rename to BPTCEncoder/src/CompressionMode.h
index 4dc6cf0..6eda945 100755
--- a/BPTCEncoder/src/BC7CompressionMode.h
+++ b/BPTCEncoder/src/CompressionMode.h
@@ -1,5 +1,5 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
  * All rights reserved.
  *
  * Permission to use, copy, modify, and distribute this software and its
@@ -73,13 +73,16 @@
 //
 //------------------------------------------------------------------------------
 
-#ifndef BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_
-#define BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_
+#ifndef BPTCENCODER_SRC_BPTCCOMPRESSIONMODE_H_
+#define BPTCENCODER_SRC_BPTCCOMPRESSIONMODE_H_
 
 #include "RGBAEndpoints.h"
 
-// Forward Declarations
 class BitStream;
+
+namespace BPTCC {
+
+// Forward Declarations
 struct VisitedState;
 const int kMaxEndpoints = 3;
 
@@ -90,7 +93,7 @@ static const int kPBits[4][2] = {
   { 1, 1 }
 };
 
-class BC7CompressionMode {
+class CompressionMode {
 
  public:
 
@@ -100,13 +103,13 @@ class BC7CompressionMode {
   // This initializes the compression variables used in order to compress a list
   // of clusters. We can increase the speed a tad by specifying whether or not
   // the block is opaque or not.
-  explicit BC7CompressionMode(int mode, bool opaque = true)
+  explicit CompressionMode(int mode, bool opaque = true)
     : m_IsOpaque(opaque)
     , m_Attributes(&(kModeAttributes[mode]))
     , m_RotateMode(0)
     , m_IndexMode(0)
   { }
-  ~BC7CompressionMode() { }
+  ~CompressionMode() { }
 
   // This function compresses a group of clusters into the passed bitstream. The
   // size of the clusters array is determined by the BC7 compression mode.
@@ -195,7 +198,7 @@ class BC7CompressionMode {
 
   // This returns the proper error metric even if we have rotation bits set
   RGBAVector GetErrorMetric() const {
-    const float *w = BC7C::GetErrorMetric();
+    const float *w = BPTCC::GetErrorMetric();
     switch(GetRotationMode()) {
       default:
       case 0: return RGBAVector(w[0], w[1], w[2], w[3]);
@@ -307,6 +310,7 @@ class BC7CompressionMode {
                             int &bestPBitCombo) const;
 };
 
-extern const uint32 kBC7InterpolationValues[4][16][2];
+extern const uint32 kInterpolationValues[4][16][2];
 
-#endif  // BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_
+}  // namespace BPTCC {
+#endif  // BPTCENCODER_SRC_BPTCCOMPRESSIONMODE_H_
diff --git a/BPTCEncoder/src/BC7CompressionModeSIMD.h b/BPTCEncoder/src/CompressionModeSIMD.h
similarity index 100%
rename from BPTCEncoder/src/BC7CompressionModeSIMD.h
rename to BPTCEncoder/src/CompressionModeSIMD.h
diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/Compressor.cpp
similarity index 58%
rename from BPTCEncoder/src/BC7Compressor.cpp
rename to BPTCEncoder/src/Compressor.cpp
index 1332cc4..de3ec0d 100755
--- a/BPTCEncoder/src/BC7Compressor.cpp
+++ b/BPTCEncoder/src/Compressor.cpp
@@ -73,8 +73,8 @@
 //
 //------------------------------------------------------------------------------
 
-#include "BC7Compressor.h"
-#include "BC7CompressionMode.h"
+#include "BPTCCompressor.h"
+#include "CompressionMode.h"
 
 #include "TexCompTypes.h"
 #include "BCLookupTables.h"
@@ -302,7 +302,9 @@ static void insert(T* buf, int bufSz, T newVal, int idx = 0) {
 template <typename T>
 static inline void swap(T &a, T &b) { T t = a; a = b; b = t; }
 
-const uint32 kBC7InterpolationValues[4][16][2] = {
+namespace BPTCC {
+
+const uint32 kInterpolationValues[4][16][2] = {
   { {64, 0}, {33, 31}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0},
     {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} },
   { {64, 0}, {43, 21}, {21, 43}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0},
@@ -313,44 +315,44 @@ const uint32 kBC7InterpolationValues[4][16][2] = {
     {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64}}
 };
 
-int BC7CompressionMode::MaxAnnealingIterations = 50;  // This is a setting.
+int CompressionMode::MaxAnnealingIterations = 50;  // This is a setting.
 
-BC7CompressionMode::Attributes
-BC7CompressionMode::kModeAttributes[kNumModes] = {
+CompressionMode::Attributes
+CompressionMode::kModeAttributes[kNumModes] = {
   // Mode 0
   { 0, 4, 3, 3, 0, 4, 0,
-    false, false, BC7CompressionMode::ePBitType_NotShared },
+    false, false, CompressionMode::ePBitType_NotShared },
 
   // Mode 1
   { 1, 6, 2, 3, 0, 6, 0,
-    false, false, BC7CompressionMode::ePBitType_Shared },
+    false, false, CompressionMode::ePBitType_Shared },
 
   // Mode 2
   { 2, 6, 3, 2, 0, 5, 0,
-    false, false, BC7CompressionMode::ePBitType_None },
+    false, false, CompressionMode::ePBitType_None },
 
   // Mode 3
   { 3, 6, 2, 2, 0, 7, 0,
-    false, false, BC7CompressionMode::ePBitType_NotShared },
+    false, false, CompressionMode::ePBitType_NotShared },
 
   // Mode 4
   { 4, 0, 1, 2, 3, 5, 6,
-    true,  true,   BC7CompressionMode::ePBitType_None },
+    true,  true,   CompressionMode::ePBitType_None },
 
   // Mode 5
   { 5, 0, 1, 2, 2, 7, 8,
-    true,  false, BC7CompressionMode::ePBitType_None },
+    true,  false, CompressionMode::ePBitType_None },
 
   // Mode 6
   { 6, 0, 1, 4, 0, 7, 7,
-    false, false, BC7CompressionMode::ePBitType_NotShared },
+    false, false, CompressionMode::ePBitType_NotShared },
 
   // Mode 7
   { 7, 6, 2, 2, 0, 5, 5,
-    false, false, BC7CompressionMode::ePBitType_NotShared },
+    false, false, CompressionMode::ePBitType_NotShared },
 };
 
-void BC7CompressionMode::ClampEndpointsToGrid(
+void CompressionMode::ClampEndpointsToGrid(
   RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo
 ) const {
   const int nPbitCombos = GetNumPbitCombos();
@@ -390,7 +392,7 @@ void BC7CompressionMode::ClampEndpointsToGrid(
   p2 = bp2;
 }
 
-double BC7CompressionMode::CompressSingleColor(
+double CompressionMode::CompressSingleColor(
   const RGBAVector &p, RGBAVector &p1, RGBAVector &p2,
   int &bestPbitCombo
 ) const {
@@ -449,8 +451,8 @@ double BC7CompressionMode::CompressSingleColor(
       }
 
       const uint32 bpi = GetNumberOfBitsPerIndex() - 1;
-      const uint32 interpVal0 = kBC7InterpolationValues[bpi][1][0];
-      const uint32 interpVal1 = kBC7InterpolationValues[bpi][1][1];
+      const uint32 interpVal0 = kInterpolationValues[bpi][1][0];
+      const uint32 interpVal1 = kInterpolationValues[bpi][1][1];
 
       // Find the closest interpolated val that to the given val...
       uint32 bestChannelDist = 0xFF;
@@ -473,7 +475,7 @@ double BC7CompressionMode::CompressSingleColor(
       dist[ci] = std::max(bestChannelDist, dist[ci]);
     }
 
-    const float *errorWeights = BC7C::GetErrorMetric();
+    const float *errorWeights = BPTCC::GetErrorMetric();
     float error = 0.0;
     for(uint32 i = 0; i < kNumColorChannels; i++) {
       float e = static_cast<float>(dist[i]) * errorWeights[i];
@@ -565,7 +567,7 @@ struct VisitedState {
   int pBitCombo;
 };
 
-void BC7CompressionMode::PickBestNeighboringEndpoints(
+void CompressionMode::PickBestNeighboringEndpoints(
   const RGBACluster &cluster,
   const RGBAVector &p1, const RGBAVector &p2, const int curPbitCombo,
   RGBAVector &np1, RGBAVector &np2, int &nPbitCombo,
@@ -645,9 +647,6 @@ void BC7CompressionMode::PickBestNeighboringEndpoints(
 // the bytes interpreted as a float. This prevents two things: 1, a
 // division, and 2, a cast from an integer to a float.
 
-#define COMPILE_ASSERT(x) extern int __compile_assert_[static_cast<int>(x)];
-COMPILE_ASSERT(RAND_MAX == 0x7FFF)
-
 static inline float frand() {
   // RAND_MAX is 0x7FFF, which offers 15 bits
   // of precision. Therefore, we move the bits
@@ -663,7 +662,10 @@ static inline float frand() {
   return fltUnion.flt - 1.0f;
 }
 
-bool BC7CompressionMode::AcceptNewEndpointError(
+#define COMPILE_ASSERT(x) extern int __compile_assert_[static_cast<int>(x)];
+COMPILE_ASSERT(RAND_MAX == 0x7FFF)
+
+bool CompressionMode::AcceptNewEndpointError(
   double newError, double oldError, float temp
 ) const {
   // Always accept better endpoints.
@@ -677,7 +679,7 @@ bool BC7CompressionMode::AcceptNewEndpointError(
   return r < p;
 }
 
-double BC7CompressionMode::OptimizeEndpointsForCluster(
+double CompressionMode::OptimizeEndpointsForCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
   int *bestIndices,
@@ -771,7 +773,7 @@ double BC7CompressionMode::OptimizeEndpointsForCluster(
   return bestError;
 }
 
-double BC7CompressionMode::CompressCluster(
+double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
   int *bestIndices,
@@ -851,7 +853,7 @@ double BC7CompressionMode::CompressCluster(
   typedef tInterpPair tInterpLevel[16];
 
   const tInterpLevel *interpVals =
-    kBC7InterpolationValues + (GetNumberOfBitsPerAlpha() - 1);
+    kInterpolationValues + (GetNumberOfBitsPerAlpha() - 1);
 
   const float weight = GetErrorMetric().a;
 
@@ -1062,7 +1064,7 @@ double BC7CompressionMode::CompressCluster(
   return rgbError + alphaError;
 }
 
-double BC7CompressionMode::CompressCluster(
+double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
   int *bestIndices,
@@ -1244,7 +1246,7 @@ double BC7CompressionMode::CompressCluster(
   );
 }
 
-double BC7CompressionMode::Compress(
+double CompressionMode::Compress(
   BitStream &stream, const int shapeIdx, const RGBACluster *clusters
 ) {
 
@@ -1519,873 +1521,935 @@ double BC7CompressionMode::Compress(
   return totalErr;
 }
 
-namespace BC7C {
-  static ErrorMetric gErrorMetric = eErrorMetric_Uniform;
-  void SetErrorMetric(ErrorMetric e) { gErrorMetric = e; }
+static ErrorMetric gErrorMetric = eErrorMetric_Uniform;
+void SetErrorMetric(ErrorMetric e) { gErrorMetric = e; }
 
-  ALIGN_SSE const float kErrorMetrics[kNumErrorMetrics][kNumColorChannels] = {
-    { 1.0f, 1.0f, 1.0f, 1.0f },
-    { sqrtf(0.3f), sqrtf(0.56f), sqrtf(0.11f), 1.0f }
-  };
+ALIGN_SSE const float kErrorMetrics[kNumErrorMetrics][kNumColorChannels] = {
+  { 1.0f, 1.0f, 1.0f, 1.0f },
+  { sqrtf(0.3f), sqrtf(0.56f), sqrtf(0.11f), 1.0f }
+};
 
-  const float *GetErrorMetric() { return kErrorMetrics[GetErrorMetricEnum()]; }
-  ErrorMetric GetErrorMetricEnum() { return gErrorMetric; }
+const float *GetErrorMetric() { return kErrorMetrics[GetErrorMetricEnum()]; }
+ErrorMetric GetErrorMetricEnum() { return gErrorMetric; }
 
-  class BlockLogger {
-   public:
-    BlockLogger(uint64 blockIdx, std::ostream &os)
-      : m_BlockIdx(blockIdx), m_Stream(os) { }
-
-    template<typename T>
-    friend std::ostream &operator<<(const BlockLogger &bl, const T &v);
-
-    uint64 m_BlockIdx;
-    std::ostream &m_Stream;
-  };
+class BlockLogger {
+  public:
+  BlockLogger(uint64 blockIdx, std::ostream &os)
+    : m_BlockIdx(blockIdx), m_Stream(os) { }
 
   template<typename T>
-  std::ostream &operator<<(const BlockLogger &bl, const T &v) {
-    std::stringstream ss;
-    ss << bl.m_BlockIdx << ": " << v;
-    return bl.m_Stream << ss.str();
+  friend std::ostream &operator<<(const BlockLogger &bl, const T &v);
+
+  uint64 m_BlockIdx;
+  std::ostream &m_Stream;
+};
+
+template<typename T>
+std::ostream &operator<<(const BlockLogger &bl, const T &v) {
+  std::stringstream ss;
+  ss << bl.m_BlockIdx << ": " << v;
+  return bl.m_Stream << ss.str();
+}
+
+// Function prototypes
+static void CompressBC7Block(const uint32 *block, uint8 *outBuf);
+static void CompressBC7Block(
+  const uint32 *block, uint8 *outBuf, const BlockLogger &logStream
+);
+
+static int gQualityLevel = 50;
+void SetQualityLevel(int q) {
+  gQualityLevel = std::max(0, q);
+  const int kMaxIters = CompressionMode::kMaxAnnealingIterations;
+  CompressionMode::MaxAnnealingIterations =
+    std::min(kMaxIters, GetQualityLevel());
+}
+int GetQualityLevel() { return gQualityLevel; }
+
+// Returns true if the entire block is a single color.
+static bool AllOneColor(const uint32 block[16]) {
+  const uint32 pixel = block[0];
+  for(int i = 1; i < 16; i++) {
+    if( block[i] != pixel )
+      return false;
   }
 
-  // Function prototypes
-  static void CompressBC7Block(const uint32 *block, uint8 *outBuf);
-  static void CompressBC7Block(
-    const uint32 *block, uint8 *outBuf, const BlockLogger &logStream
-  );
+  return true;
+}
 
-  static int gQualityLevel = 50;
-  void SetQualityLevel(int q) {
-    gQualityLevel = std::max(0, q);
-    const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations;
-    BC7CompressionMode::MaxAnnealingIterations =
-      std::min(kMaxIters, GetQualityLevel());
-  }
-  int GetQualityLevel() { return gQualityLevel; }
+// Write out a transparent block.
+static void WriteTransparentBlock(BitStream &stream) {
+  // Use mode 6
+  stream.WriteBits(1 << 6, 7);
+  stream.WriteBits(0, 128-7);
+  assert(stream.GetBitsWritten() == 128);
+}
 
-  // Returns true if the entire block is a single color.
-  static bool AllOneColor(const uint32 block[16]) {
-    const uint32 pixel = block[0];
-    for(int i = 1; i < 16; i++) {
-      if( block[i] != pixel )
-        return false;
-    }
+// Compresses a single color optimally and outputs the result.
+static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) {
 
-    return true;
-  }
+  stream.WriteBits(1 << 5, 6);  // Mode 5
+  stream.WriteBits(0, 2);  // No rotation bits.
 
-  // Write out a transparent block.
-  static void WriteTransparentBlock(BitStream &stream) {
-    // Use mode 6
-    stream.WriteBits(1 << 6, 7);
-    stream.WriteBits(0, 128-7);
-    assert(stream.GetBitsWritten() == 128);
-  }
+  uint8 r = pixel & 0xFF;
+  uint8 g = (pixel >> 8) & 0xFF;
+  uint8 b = (pixel >> 16) & 0xFF;
+  uint8 a = (pixel >> 24) & 0xFF;
 
-  // Compresses a single color optimally and outputs the result.
-  static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) {
+  // Red endpoints
+  stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7);
+  stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7);
 
-    stream.WriteBits(1 << 5, 6);  // Mode 5
-    stream.WriteBits(0, 2);  // No rotation bits.
+  // Green endpoints
+  stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7);
+  stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7);
 
-    uint8 r = pixel & 0xFF;
-    uint8 g = (pixel >> 8) & 0xFF;
-    uint8 b = (pixel >> 16) & 0xFF;
-    uint8 a = (pixel >> 24) & 0xFF;
+  // Blue endpoints
+  stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7);
+  stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7);
 
-    // Red endpoints
-    stream.WriteBits(Optimal7CompressBC7Mode5[r][0], 7);
-    stream.WriteBits(Optimal7CompressBC7Mode5[r][1], 7);
+  // Alpha endpoints... are just the same.
+  stream.WriteBits(a, 8);
+  stream.WriteBits(a, 8);
 
-    // Green endpoints
-    stream.WriteBits(Optimal7CompressBC7Mode5[g][0], 7);
-    stream.WriteBits(Optimal7CompressBC7Mode5[g][1], 7);
+  // Color indices are 1 for each pixel...
+  // Anchor index is 0, so 1 bit for the first pixel, then
+  // 01 for each following pixel giving the sequence of 31 bits:
+  // ...010101011
+  stream.WriteBits(0xaaaaaaab, 31);
 
-    // Blue endpoints
-    stream.WriteBits(Optimal7CompressBC7Mode5[b][0], 7);
-    stream.WriteBits(Optimal7CompressBC7Mode5[b][1], 7);
+  // Alpha indices...
+  stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
+}
 
-    // Alpha endpoints... are just the same.
-    stream.WriteBits(a, 8);
-    stream.WriteBits(a, 8);
+static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
 
-    // Color indices are 1 for each pixel...
-    // Anchor index is 0, so 1 bit for the first pixel, then
-    // 01 for each following pixel giving the sequence of 31 bits:
-    // ...010101011
-    stream.WriteBits(0xaaaaaaab, 31);
+void GetBlock(const uint32 x, const uint32 y, const uint32 pixelsWide,
+              const uint32 *inPixels, uint32 block[16]) {
+  memcpy(block, inPixels + y*pixelsWide + x, 4 * sizeof(uint32));
+  memcpy(block + 4, inPixels + (y+1)*pixelsWide + x, 4 * sizeof(uint32));
+  memcpy(block + 8, inPixels + (y+2)*pixelsWide + x, 4 * sizeof(uint32));
+  memcpy(block + 12, inPixels + (y+3)*pixelsWide + x, 4 * sizeof(uint32));
+}
 
-    // Alpha indices...
-    stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
-  }
+// Compress an image using BC7 compression. Use the inBuf parameter to point
+// to an image in 4-byte RGBA format. The width and height parameters specify
+// the size of the image in pixels. The buffer pointed to by outBuf should be
+// large enough to store the compressed image. This implementation has an 4:1
+// compression ratio.
+void Compress(const FasTC::CompressionJob &cj) {
+  const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
+  const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
+  uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
 
-  static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
+  uint32 startX = cj.XStart();
+  bool done = false;
 
-  void GetBlock(const uint32 x, const uint32 y, const uint32 pixelsWide,
-                const uint32 *inPixels, uint32 block[16]) {
-    memcpy(block, inPixels + y*pixelsWide + x, 4 * sizeof(uint32));
-    memcpy(block + 4, inPixels + (y+1)*pixelsWide + x, 4 * sizeof(uint32));
-    memcpy(block + 8, inPixels + (y+2)*pixelsWide + x, 4 * sizeof(uint32));
-    memcpy(block + 12, inPixels + (y+3)*pixelsWide + x, 4 * sizeof(uint32));
-  }
+  for(uint32 j = cj.YStart(); !done; j += 4) {
+    for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
 
-  // Compress an image using BC7 compression. Use the inBuf parameter to point
-  // to an image in 4-byte RGBA format. The width and height parameters specify
-  // the size of the image in pixels. The buffer pointed to by outBuf should be
-  // large enough to store the compressed image. This implementation has an 4:1
-  // compression ratio.
-  void Compress(const FasTC::CompressionJob &cj) {
-    const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
-    const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
-    uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
-
-    uint32 startX = cj.XStart();
-    bool done = false;
-
-    for(uint32 j = cj.YStart(); !done; j += 4) {
-      for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
-
-        uint32 block[16];
-        GetBlock(i, j, cj.Width(), inPixels, block);
-        CompressBC7Block(block, outBuf);
+      uint32 block[16];
+      GetBlock(i, j, cj.Width(), inPixels, block);
+      CompressBC7Block(block, outBuf);
 
 #ifndef NDEBUG
-        const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
-        const uint8 *cmpblock = reinterpret_cast<const uint8 *>(outBuf);
-        uint32 unComp[16];
-        DecompressBC7Block(cmpblock, unComp);
-        const uint8* unCompData = reinterpret_cast<const uint8 *>(unComp);
+      const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
+      const uint8 *cmpblock = reinterpret_cast<const uint8 *>(outBuf);
+      uint32 unComp[16];
+      DecompressBC7Block(cmpblock, unComp);
+      const uint8* unCompData = reinterpret_cast<const uint8 *>(unComp);
 
-        double diffSum = 0.0;
-        for(int k = 0; k < 64; k+=4) {
-          double rdiff = sad(unCompData[k], inBlock[k]);
-          double gdiff = sad(unCompData[k+1], inBlock[k+1]);
-          double bdiff = sad(unCompData[k+2], inBlock[k+2]);
-          double adiff = sad(unCompData[k+3], inBlock[k+3]);
-          const double asrc = static_cast<double>(inBlock[k+3]);
-          const double adst = static_cast<double>(unCompData[k+3]);
-          double avga = ((asrc + adst)*0.5)/255.0;
-          diffSum += (rdiff + gdiff + bdiff + adiff) * avga;
-        }
-        double blockError = static_cast<double>(diffSum) / 64.0;
-        if(blockError > 5.0) {
-          fprintf(stderr, "WARNING: Block error very high"
-                          " at <%d, %d>: (%.2f)\n", i, j, blockError);
-        }
+      double diffSum = 0.0;
+      for(int k = 0; k < 64; k+=4) {
+        double rdiff = sad(unCompData[k], inBlock[k]);
+        double gdiff = sad(unCompData[k+1], inBlock[k+1]);
+        double bdiff = sad(unCompData[k+2], inBlock[k+2]);
+        double adiff = sad(unCompData[k+3], inBlock[k+3]);
+        const double asrc = static_cast<double>(inBlock[k+3]);
+        const double adst = static_cast<double>(unCompData[k+3]);
+        double avga = ((asrc + adst)*0.5)/255.0;
+        diffSum += (rdiff + gdiff + bdiff + adiff) * avga;
+      }
+      double blockError = static_cast<double>(diffSum) / 64.0;
+      if(blockError > 5.0) {
+        fprintf(stderr, "WARNING: Block error very high"
+                        " at <%d, %d>: (%.2f)\n", i, j, blockError);
+      }
 #endif
 
-        outBuf += kBlockSz;
-        done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
-      }
-      startX = 0;
+      outBuf += kBlockSz;
+      done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
     }
+    startX = 0;
   }
+}
 
 #ifdef HAS_ATOMICS
 #ifdef HAS_MSVC_ATOMICS
-  static uint32 TestAndSet(uint32 *x) {
-    return InterlockedExchange(x, 1);
-  }
+static uint32 TestAndSet(uint32 *x) {
+  return InterlockedExchange(x, 1);
+}
 
-  static uint32 FetchAndAdd(uint32 *x) {
-    return InterlockedIncrement(x)-1;
-  }
+static uint32 FetchAndAdd(uint32 *x) {
+  return InterlockedIncrement(x)-1;
+}
 #elif defined HAS_GCC_ATOMICS
-  static uint32 TestAndSet(uint32 *x) {
-    return __sync_lock_test_and_set(x, 1);
-  }
+static uint32 TestAndSet(uint32 *x) {
+  return __sync_lock_test_and_set(x, 1);
+}
 
-  static uint32 FetchAndAdd(uint32 *x) {
-    return __sync_fetch_and_add(x, 1);
-  }
+static uint32 FetchAndAdd(uint32 *x) {
+  return __sync_fetch_and_add(x, 1);
+}
 #endif
 
-  // Variables used for synchronization in threadsafe implementation.
-  void CompressAtomic(FasTC::CompressionJobList &cjl) {
-    uint32 jobIdx;
-    while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) {
-      // !HACK! ... Microsoft has this defined
-      #undef GetJob
+// Variables used for synchronization in threadsafe implementation.
+void CompressAtomic(FasTC::CompressionJobList &cjl) {
+  uint32 jobIdx;
+  while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) {
+    // !HACK! ... Microsoft has this defined
+    #undef GetJob
 
-      const FasTC::CompressionJob *cj = cjl.GetJob(jobIdx);
-      const uint32 nBlocks = (cj->Height() * cj->Width()) / 16;
+    const FasTC::CompressionJob *cj = cjl.GetJob(jobIdx);
+    const uint32 nBlocks = (cj->Height() * cj->Width()) / 16;
 
-      // Help finish whatever texture we're compressing before we start again on
-      // my work...
-      uint32 blockIdx;
-      while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks &&
-            *(cjl.GetFinishedFlag(jobIdx)) == 0) {
-        unsigned char *out = cj->OutBuf() + (16 * blockIdx);
+    // Help finish whatever texture we're compressing before we start again on
+    // my work...
+    uint32 blockIdx;
+    while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks &&
+          *(cjl.GetFinishedFlag(jobIdx)) == 0) {
+      unsigned char *out = cj->OutBuf() + (16 * blockIdx);
 
-        uint32 block[16];
-        uint32 x = cj->XStart() + 4 * (blockIdx % (cj->Width() / 4));
-        uint32 y = cj->YStart() + 4 * (blockIdx / (cj->Width() / 4));
-        const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj->InBuf());
-        GetBlock(x, y, cj->Width(), inPixels, block);
-        CompressBC7Block(block, out);
-      }
-
-      if(TestAndSet(cjl.GetFinishedFlag(jobIdx)) == 0) {
-        cjl.m_CurrentBlockIndex = 0;
-        cjl.m_CurrentJobIndex++;
-      }
-
-      // Wait until this texture finishes.
-      while(cjl.m_CurrentJobIndex == jobIdx) { }
+      uint32 block[16];
+      uint32 x = cj->XStart() + 4 * (blockIdx % (cj->Width() / 4));
+      uint32 y = cj->YStart() + 4 * (blockIdx / (cj->Width() / 4));
+      const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj->InBuf());
+      GetBlock(x, y, cj->Width(), inPixels, block);
+      CompressBC7Block(block, out);
     }
+
+    if(TestAndSet(cjl.GetFinishedFlag(jobIdx)) == 0) {
+      cjl.m_CurrentBlockIndex = 0;
+      cjl.m_CurrentJobIndex++;
+    }
+
+    // Wait until this texture finishes.
+    while(cjl.m_CurrentJobIndex == jobIdx) { }
   }
+}
 #endif  // HAS_ATOMICS
 
-  void CompressWithStats(const FasTC::CompressionJob &cj, std::ostream *logStream) {
-    const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
-    const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
-    uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
+void CompressWithStats(const FasTC::CompressionJob &cj, std::ostream *logStream) {
+  const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
+  const uint32 kBlockSz = GetBlockSize(FasTC::eCompressionFormat_BPTC);
+  uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
 
-    uint32 startX = cj.XStart();
-    bool done = false;
-    for(uint32 j = cj.YStart(); !done; j += 4) {
-      for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
+  uint32 startX = cj.XStart();
+  bool done = false;
+  for(uint32 j = cj.YStart(); !done; j += 4) {
+    for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
 
-        uint32 block[16];
-        GetBlock(i, j, cj.Width(), inPixels, block);
+      uint32 block[16];
+      GetBlock(i, j, cj.Width(), inPixels, block);
 
-        if(logStream) {
-          uint64 blockIdx = cj.CoordsToBlockIdx(i, j);
-          CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream));
-        } else {
-          CompressBC7Block(block, outBuf);
-        }
+      if(logStream) {
+        uint64 blockIdx = cj.CoordsToBlockIdx(i, j);
+        CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream));
+      } else {
+        CompressBC7Block(block, outBuf);
+      }
 
 #ifndef NDEBUG
-        const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
-        const uint8 *cmpData = outBuf;
-        uint32 unComp[16];
-        DecompressBC7Block(cmpData, unComp);
-        const uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
+      const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
+      const uint8 *cmpData = outBuf;
+      uint32 unComp[16];
+      DecompressBC7Block(cmpData, unComp);
+      const uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
 
-        uint32 diffSum = 0;
-        for(uint32 k = 0; k < 64; k++) {
-          diffSum += sad(unCompData[k], inBlock[k]);
-        }
-        double blockError = static_cast<double>(diffSum) / 64.0;
-        if(blockError > 50.0) {
-          fprintf(stderr, "WARNING: Block error very high"
-                          " (%.2f)\n", blockError);
-        }
+      uint32 diffSum = 0;
+      for(uint32 k = 0; k < 64; k++) {
+        diffSum += sad(unCompData[k], inBlock[k]);
+      }
+      double blockError = static_cast<double>(diffSum) / 64.0;
+      if(blockError > 50.0) {
+        fprintf(stderr, "WARNING: Block error very high"
+                        " (%.2f)\n", blockError);
+      }
 #endif
 
-        outBuf += 16;
-        done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
-      }
-
-      startX = 0;
+      outBuf += 16;
+      done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
     }
+
+    startX = 0;
+  }
+}
+
+static double CompressTwoClusters(
+  int shapeIdx,
+  const RGBACluster *clusters,
+  uint8 *outBuf,
+  bool opaque,
+  double *errors = NULL,
+  int *modeChosen = NULL
+) {
+
+  uint8 tempBuf1[16];
+  BitStream tmpStream1(tempBuf1, 128, 0);
+  double bestError =
+    CompressionMode(1, opaque).Compress(tmpStream1, shapeIdx, clusters);
+
+  if(errors) errors[1] = bestError;
+  if(modeChosen) *modeChosen = 1;
+
+  memcpy(outBuf, tempBuf1, 16);
+  if(bestError == 0.0) {
+    return 0.0;
   }
 
-  static double CompressTwoClusters(
-    int shapeIdx,
-    const RGBACluster *clusters,
-    uint8 *outBuf,
-    bool opaque,
-    double *errors = NULL,
-    int *modeChosen = NULL
-  ) {
+  uint8 tempBuf3[16];
+  BitStream tmpStream3(tempBuf3, 128, 0);
 
-    uint8 tempBuf1[16];
-    BitStream tmpStream1(tempBuf1, 128, 0);
-    double bestError =
-      BC7CompressionMode(1, opaque).Compress(tmpStream1, shapeIdx, clusters);
+  double error =
+    CompressionMode(3, opaque).Compress(tmpStream3, shapeIdx, clusters);
 
-    if(errors) errors[1] = bestError;
-    if(modeChosen) *modeChosen = 1;
-
-    memcpy(outBuf, tempBuf1, 16);
+  if(errors) errors[3] = error;
+  if(error < bestError) {
+    if(modeChosen) *modeChosen = 3;
+    bestError = error;
+    memcpy(outBuf, tempBuf3, 16);
     if(bestError == 0.0) {
       return 0.0;
     }
-
-    uint8 tempBuf3[16];
-    BitStream tmpStream3(tempBuf3, 128, 0);
-
-    double error =
-      BC7CompressionMode(3, opaque).Compress(tmpStream3, shapeIdx, clusters);
-
-    if(errors) errors[3] = error;
-    if(error < bestError) {
-      if(modeChosen) *modeChosen = 3;
-      bestError = error;
-      memcpy(outBuf, tempBuf3, 16);
-      if(bestError == 0.0) {
-        return 0.0;
-      }
-    }
-
-    // Mode 3 offers more precision for RGB data. Mode 7 is really only if we
-    // have alpha.
-    if(!opaque) {
-      uint8 tempBuf7[16];
-      BitStream tmpStream7(tempBuf7, 128, 0);
-
-      error =
-        BC7CompressionMode(7, opaque).Compress(tmpStream7, shapeIdx, clusters);
-
-      if(errors) errors[7] = error;
-      if(error < bestError) {
-        if(modeChosen) *modeChosen = 7;
-        memcpy(outBuf, tempBuf7, 16);
-        return error;
-      }
-    }
-
-    return bestError;
   }
 
-  static double CompressThreeClusters(
-    int shapeIdx,
-    const RGBACluster *clusters,
-    uint8 *outBuf,
-    bool opaque,
-    double *errors = NULL,
-    int *modeChosen = NULL
-  ) {
-    uint8 tempBuf0[16];
-    BitStream tmpStream0(tempBuf0, 128, 0);
-
-    uint8 tempBuf2[16];
-    BitStream tmpStream2(tempBuf2, 128, 0);
-
-    double error, bestError = DBL_MAX;;
-    if(shapeIdx < 16) {
-      bestError =
-        BC7CompressionMode(0, opaque).Compress(tmpStream0, shapeIdx, clusters);
-
-      if(errors) errors[0] = bestError;
-    } else {
-      if(errors) errors[0] = -1.0;
-    }
-
-    if(modeChosen) *modeChosen = 0;
-    memcpy(outBuf, tempBuf0, 16);
-    if(bestError == 0.0) {
-      return 0.0;
-    }
+  // Mode 3 offers more precision for RGB data. Mode 7 is really only if we
+  // have alpha.
+  if(!opaque) {
+    uint8 tempBuf7[16];
+    BitStream tmpStream7(tempBuf7, 128, 0);
 
     error =
-      BC7CompressionMode(2, opaque).Compress(tmpStream2, shapeIdx, clusters);
+      CompressionMode(7, opaque).Compress(tmpStream7, shapeIdx, clusters);
 
-    if(errors) errors[2] = error;
+    if(errors) errors[7] = error;
     if(error < bestError) {
-      if(modeChosen) *modeChosen = 2;
-      memcpy(outBuf, tempBuf2, 16);
+      if(modeChosen) *modeChosen = 7;
+      memcpy(outBuf, tempBuf7, 16);
       return error;
     }
-
-    return bestError;
   }
 
-  static void PopulateTwoClustersForShape(
-    const RGBACluster &points, int shapeIdx, RGBACluster *clusters
-  ) {
-    const uint16 shape = kShapeMask2[shapeIdx];
-    for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) {
+  return bestError;
+}
 
-      const RGBAVector &p = points.GetPoint(pt);
+static double CompressThreeClusters(
+  int shapeIdx,
+  const RGBACluster *clusters,
+  uint8 *outBuf,
+  bool opaque,
+  double *errors = NULL,
+  int *modeChosen = NULL
+) {
+  uint8 tempBuf0[16];
+  BitStream tmpStream0(tempBuf0, 128, 0);
 
-      if((1 << pt) & shape)
-        clusters[1].AddPoint(p);
-      else
-        clusters[0].AddPoint(p);
-    }
+  uint8 tempBuf2[16];
+  BitStream tmpStream2(tempBuf2, 128, 0);
+
+  double error, bestError = DBL_MAX;;
+  if(shapeIdx < 16) {
+    bestError =
+      CompressionMode(0, opaque).Compress(tmpStream0, shapeIdx, clusters);
+
+    if(errors) errors[0] = bestError;
+  } else {
+    if(errors) errors[0] = -1.0;
+  }
+
+  if(modeChosen) *modeChosen = 0;
+  memcpy(outBuf, tempBuf0, 16);
+  if(bestError == 0.0) {
+    return 0.0;
+  }
+
+  error =
+    CompressionMode(2, opaque).Compress(tmpStream2, shapeIdx, clusters);
+
+  if(errors) errors[2] = error;
+  if(error < bestError) {
+    if(modeChosen) *modeChosen = 2;
+    memcpy(outBuf, tempBuf2, 16);
+    return error;
+  }
+
+  return bestError;
+}
+
+static void PopulateTwoClustersForShape(
+  const RGBACluster &points, int shapeIdx, RGBACluster *clusters
+) {
+  const uint16 shape = kShapeMask2[shapeIdx];
+  for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) {
+
+    const RGBAVector &p = points.GetPoint(pt);
+
+    if((1 << pt) & shape)
+      clusters[1].AddPoint(p);
+    else
+      clusters[0].AddPoint(p);
+  }
 
 #ifndef NDEBUG
-    const uint32 pbs1 = clusters[0].GetPointBitString();
-    const uint32 pbs2 = clusters[1].GetPointBitString();
-    assert(!(pbs1 & pbs2));
-    assert((pbs1 ^ pbs2) == 0xFFFF);
-    assert((shape & pbs2) == shape);
+  const uint32 pbs1 = clusters[0].GetPointBitString();
+  const uint32 pbs2 = clusters[1].GetPointBitString();
+  assert(!(pbs1 & pbs2));
+  assert((pbs1 ^ pbs2) == 0xFFFF);
+  assert((shape & pbs2) == shape);
 #endif
-  }
+}
 
-  static void PopulateThreeClustersForShape(
-    const RGBACluster &points, int shapeIdx, RGBACluster *clusters
-  ) {
-    for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) {
+static void PopulateThreeClustersForShape(
+  const RGBACluster &points, int shapeIdx, RGBACluster *clusters
+) {
+  for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) {
 
-      const RGBAVector &p = points.GetPoint(pt);
+    const RGBAVector &p = points.GetPoint(pt);
 
-      if((1 << pt) & kShapeMask3[shapeIdx][0]) {
-        if((1 << pt) & kShapeMask3[shapeIdx][1]) {
-          clusters[2].AddPoint(p);
-        } else {
-          clusters[1].AddPoint(p);
-        }
+    if((1 << pt) & kShapeMask3[shapeIdx][0]) {
+      if((1 << pt) & kShapeMask3[shapeIdx][1]) {
+        clusters[2].AddPoint(p);
       } else {
-        clusters[0].AddPoint(p);
+        clusters[1].AddPoint(p);
       }
+    } else {
+      clusters[0].AddPoint(p);
     }
+  }
 
 #ifndef NDEBUG
-    const uint32 pbs1 = clusters[0].GetPointBitString();
-    const uint32 pbs2 = clusters[1].GetPointBitString();
-    const uint32 pbs3 = clusters[2].GetPointBitString();
+  const uint32 pbs1 = clusters[0].GetPointBitString();
+  const uint32 pbs2 = clusters[1].GetPointBitString();
+  const uint32 pbs3 = clusters[2].GetPointBitString();
 
-    assert(!(pbs1 & pbs2));
-    assert(!(pbs3 & pbs2));
-    assert(!(pbs3 & pbs1));
+  assert(!(pbs1 & pbs2));
+  assert(!(pbs3 & pbs2));
+  assert(!(pbs3 & pbs1));
 #endif
+}
+
+static double EstimateTwoClusterError(RGBACluster &c) {
+  RGBAVector Min, Max, v;
+  c.GetBoundingBox(Min, Max);
+  v = Max - Min;
+  if(v * v == 0) {
+    return 0.0;
   }
 
-  static double EstimateTwoClusterError(RGBACluster &c) {
-    RGBAVector Min, Max, v;
-    c.GetBoundingBox(Min, Max);
-    v = Max - Min;
-    if(v * v == 0) {
-      return 0.0;
-    }
+  const float *w = BPTCC::GetErrorMetric();
 
-    const float *w = BC7C::GetErrorMetric();
-
-    double error = 0.0001;
+  double error = 0.0001;
 #ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-    double eigOne = c.GetPrincipalEigenvalue();
-    double eigTwo = c.GetSecondEigenvalue();
-    if(eigOne != 0.0) {
-      error += eigTwo / eigOne;
-    } else {
-      error += 1.0;
-    }
+  double eigOne = c.GetPrincipalEigenvalue();
+  double eigTwo = c.GetSecondEigenvalue();
+  if(eigOne != 0.0) {
+    error += eigTwo / eigOne;
+  } else {
+    error += 1.0;
+  }
 #else
-    error += c.QuantizedError(Min, Max, 8,
-                              0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3]));
+  error += c.QuantizedError(Min, Max, 8,
+                            0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3]));
 #endif
-    return error;
+  return error;
+}
+
+static double EstimateThreeClusterError(RGBACluster &c) {
+  RGBAVector Min, Max, v;
+  c.GetBoundingBox(Min, Max);
+  v = Max - Min;
+  if(v * v == 0) {
+    return 0.0;
   }
 
-  static double EstimateThreeClusterError(RGBACluster &c) {
-    RGBAVector Min, Max, v;
-    c.GetBoundingBox(Min, Max);
-    v = Max - Min;
-    if(v * v == 0) {
-      return 0.0;
-    }
+  const float *w = BPTCC::GetErrorMetric();
 
-    const float *w = BC7C::GetErrorMetric();
-
-    double error = 0.0001;
+  double error = 0.0001;
 #ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-    double eigOne = c.GetPrincipalEigenvalue();
-    double eigTwo = c.GetSecondEigenvalue();
+  double eigOne = c.GetPrincipalEigenvalue();
+  double eigTwo = c.GetSecondEigenvalue();
 
-    if(eigOne != 0.0) {
-      error += eigTwo / eigOne;
-    } else {
-      error += 1.0;
-    }
+  if(eigOne != 0.0) {
+    error += eigTwo / eigOne;
+  } else {
+    error += 1.0;
+  }
 #else
-    error += c.QuantizedError(Min, Max, 4,
-                              0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3]));
+  error += c.QuantizedError(Min, Max, 4,
+                            0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3]));
 #endif
-    return error;
+  return error;
+}
+
+static void CompressBC7Block(const uint32 *block, uint8 *outBuf) {
+  // All a single color?
+  if(AllOneColor(block)) {
+    BitStream bStrm(outBuf, 128, 0);
+    CompressOptimalColorBC7(*block, bStrm);
+    return;
   }
 
-  static void CompressBC7Block(const uint32 *block, uint8 *outBuf) {
-    // All a single color?
-    if(AllOneColor(block)) {
-      BitStream bStrm(outBuf, 128, 0);
-      CompressOptimalColorBC7(*block, bStrm);
+  RGBACluster blockCluster;
+  bool opaque = true;
+  bool transparent = true;
+
+  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
+    RGBAVector p = RGBAVector(i, block[i]);
+    blockCluster.AddPoint(p);
+    if(fabs(p.a - 255.0f) > 1e-10)
+      opaque = false;
+
+    if(p.a > 0.0f)
+      transparent = false;
+  }
+
+  // The whole block is transparent?
+  if(transparent) {
+    BitStream bStrm(outBuf, 128, 0);
+    WriteTransparentBlock(bStrm);
+    return;
+  }
+
+  // First we must figure out which shape to use. To do this, simply
+  // see which shape has the smallest sum of minimum bounding spheres.
+  double bestError[2] = { DBL_MAX, DBL_MAX };
+  int bestShapeIdx[2] = { -1, -1 };
+  RGBACluster bestClusters[2][3];
+
+  for(unsigned int i = 0; i < kNumShapes2; i++) {
+    RGBACluster clusters[2];
+    PopulateTwoClustersForShape(blockCluster, i, clusters);
+
+    double err = 0.0;
+    for(int ci = 0; ci < 2; ci++) {
+      err += EstimateTwoClusterError(clusters[ci]);
+    }
+
+    // If it's small, we'll take it!
+    if(err < 1e-9) {
+      CompressTwoClusters(i, clusters, outBuf, opaque);
       return;
     }
 
-    RGBACluster blockCluster;
-    bool opaque = true;
-    bool transparent = true;
-
-    for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
-      RGBAVector p = RGBAVector(i, block[i]);
-      blockCluster.AddPoint(p);
-      if(fabs(p.a - 255.0f) > 1e-10)
-        opaque = false;
-
-      if(p.a > 0.0f)
-        transparent = false;
+    if(err < bestError[0]) {
+      bestError[0] = err;
+      bestShapeIdx[0] = i;
+      bestClusters[0][0] = clusters[0];
+      bestClusters[0][1] = clusters[1];
     }
+  }
 
-    // The whole block is transparent?
-    if(transparent) {
-      BitStream bStrm(outBuf, 128, 0);
-      WriteTransparentBlock(bStrm);
-      return;
-    }
+  // There are not 3 subset blocks that support alpha, so only check these
+  // if the entire block is opaque.
+  if(opaque) {
+    for(unsigned int i = 0; i < kNumShapes3; i++) {
 
-    // First we must figure out which shape to use. To do this, simply
-    // see which shape has the smallest sum of minimum bounding spheres.
-    double bestError[2] = { DBL_MAX, DBL_MAX };
-    int bestShapeIdx[2] = { -1, -1 };
-    RGBACluster bestClusters[2][3];
-
-    for(unsigned int i = 0; i < kNumShapes2; i++) {
-      RGBACluster clusters[2];
-      PopulateTwoClustersForShape(blockCluster, i, clusters);
+      RGBACluster clusters[3];
+      PopulateThreeClustersForShape(blockCluster, i, clusters);
 
       double err = 0.0;
-      for(int ci = 0; ci < 2; ci++) {
-        err += EstimateTwoClusterError(clusters[ci]);
+      for(int ci = 0; ci < 3; ci++) {
+        err += EstimateThreeClusterError(clusters[ci]);
       }
 
       // If it's small, we'll take it!
       if(err < 1e-9) {
-        CompressTwoClusters(i, clusters, outBuf, opaque);
+        CompressThreeClusters(i, clusters, outBuf, opaque);
         return;
       }
 
-      if(err < bestError[0]) {
-        bestError[0] = err;
-        bestShapeIdx[0] = i;
-        bestClusters[0][0] = clusters[0];
-        bestClusters[0][1] = clusters[1];
+      if(err < bestError[1]) {
+        bestError[1] = err;
+        bestShapeIdx[1] = i;
+        bestClusters[1][0] = clusters[0];
+        bestClusters[1][1] = clusters[1];
+        bestClusters[1][2] = clusters[2];
       }
     }
+  }
 
-    // There are not 3 subset blocks that support alpha, so only check these
-    // if the entire block is opaque.
-    if(opaque) {
-      for(unsigned int i = 0; i < kNumShapes3; i++) {
+  uint8 tempBuf1[16], tempBuf2[16];
 
-        RGBACluster clusters[3];
-        PopulateThreeClustersForShape(blockCluster, i, clusters);
+  BitStream tempStream1 (tempBuf1, 128, 0);
+  CompressionMode compressor(6, opaque);
+  double best = compressor.Compress(tempStream1, 0, &blockCluster);
+  if(best == 0.0f) {
+    memcpy(outBuf, tempBuf1, 16);
+    return;
+  }
 
-        double err = 0.0;
-        for(int ci = 0; ci < 3; ci++) {
-          err += EstimateThreeClusterError(clusters[ci]);
-        }
+  // Check modes 4 and 5 if the block isn't opaque...
+  if(!opaque) {
+    for(int mode = 4; mode <= 5; mode++) {
 
-        // If it's small, we'll take it!
-        if(err < 1e-9) {
-          CompressThreeClusters(i, clusters, outBuf, opaque);
+      BitStream tempStream2(tempBuf2, 128, 0);
+      CompressionMode compressorTry(mode, opaque);
+
+      double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
+      if(error < best) {
+
+        best = error;
+
+        if(best == 0.0f) {
+          memcpy(outBuf, tempBuf2, 16);
           return;
-        }
-
-        if(err < bestError[1]) {
-          bestError[1] = err;
-          bestShapeIdx[1] = i;
-          bestClusters[1][0] = clusters[0];
-          bestClusters[1][1] = clusters[1];
-          bestClusters[1][2] = clusters[2];
+        } else {
+          memcpy(tempBuf1, tempBuf2, 16);
         }
       }
     }
+  }
 
-    uint8 tempBuf1[16], tempBuf2[16];
+  double error =
+    CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque);
+  if(error < best) {
 
-    BitStream tempStream1 (tempBuf1, 128, 0);
-    BC7CompressionMode compressor(6, opaque);
-    double best = compressor.Compress(tempStream1, 0, &blockCluster);
-    if(best == 0.0f) {
-      memcpy(outBuf, tempBuf1, 16);
+    best = error;
+    if(error == 0.0f) {
+      memcpy(outBuf, tempBuf2, 16);
+      return;
+    } else {
+      memcpy(tempBuf1, tempBuf2, 16);
+    }
+  }
+
+  if(opaque) {
+    const double newError =
+      CompressThreeClusters(bestShapeIdx[1],
+                            bestClusters[1],
+                            tempBuf2,
+                            opaque);
+    if(newError < best) {
+      memcpy(outBuf, tempBuf2, 16);
       return;
     }
+  }
 
-    // Check modes 4 and 5 if the block isn't opaque...
-    if(!opaque) {
-      for(int mode = 4; mode <= 5; mode++) {
+  memcpy(outBuf, tempBuf1, 16);
+}
 
-        BitStream tempStream2(tempBuf2, 128, 0);
-        BC7CompressionMode compressorTry(mode, opaque);
+static double EstimateTwoClusterErrorStats(
+  RGBACluster &c, double (&estimates)[2]
+) {
+  RGBAVector Min, Max, v;
+  c.GetBoundingBox(Min, Max);
+  v = Max - Min;
+  if(v * v == 0) {
+    estimates[0] = estimates[1] = 0.0;
+    return 0.0;
+  }
 
-        double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
-        if(error < best) {
+  const float *w = BPTCC::GetErrorMetric();
 
-          best = error;
+  const double err1 = c.QuantizedError(
+    Min, Max, 8, 0xFFFCFCFC, RGBAVector(w[0], w[1], w[2], w[3])
+  );
 
-          if(best == 0.0f) {
-            memcpy(outBuf, tempBuf2, 16);
-            return;
+  if(err1 >= 0.0) {
+    estimates[0] = err1;
+  } else {
+    estimates[0] = std::min(estimates[0], err1);
+  }
+
+  const double err3 = c.QuantizedError(
+    Min, Max, 8, 0xFFFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])
+  );
+
+  if(err3 >= 0.0) {
+    estimates[1] = err3;
+  } else {
+    estimates[1] = std::min(estimates[1], err3);
+  }
+
+  double error = 0.0001;
+#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
+  double eigOne = c.GetPrincipalEigenvalue();
+  double eigTwo = c.GetSecondEigenvalue();
+  if(eigOne != 0.0) {
+    error += eigTwo / eigOne;
+  } else {
+    error += 1.0;
+  }
+#else
+  error += std::min(err1, err3);
+#endif
+  return error;
+}
+
+static double EstimateThreeClusterErrorStats(
+  RGBACluster &c, double (&estimates)[2]
+) {
+  RGBAVector Min, Max, v;
+  c.GetBoundingBox(Min, Max);
+  v = Max - Min;
+  if(v * v == 0) {
+    estimates[0] = estimates[1] = 0.0;
+    return 0.0;
+  }
+
+  const float *w = BPTCC::GetErrorMetric();
+  const double err0 = 0.0001 + c.QuantizedError(
+    Min, Max, 4, 0xFFF0F0F0, RGBAVector(w[0], w[1], w[2], w[3])
+  );
+
+  if(err0 >= 0.0) {
+    estimates[0] = err0;
+  } else {
+    estimates[0] = std::min(estimates[0], err0);
+  }
+
+  const double err2 = 0.0001 + c.QuantizedError(
+    Min, Max, 4, 0xFFF8F8F8, RGBAVector(w[0], w[1], w[2], w[3])
+  );
+
+  if(err2 >= 0.0) {
+    estimates[1] = err2;
+  } else {
+    estimates[1] = std::min(estimates[1], err2);
+  }
+
+  double error = 0.0001;
+#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
+  double eigOne = c.GetPrincipalEigenvalue();
+  double eigTwo = c.GetSecondEigenvalue();
+
+  if(eigOne != 0.0) {
+    error += eigTwo / eigOne;
+  } else {
+    error += 1.0;
+  }
+#else
+  error += std::min(err0, err2);
+#endif
+  return error;
+}
+
+static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
+  assert(estimates);
+  assert(mode >= 0);
+  assert(mode < CompressionMode::kNumModes);
+  if(estimates[mode] == -1.0 || est < estimates[mode]) {
+    estimates[mode] = est;
+  }
+}
+
+template<typename T>
+static void PrintStat(const BlockLogger &lgr, const char *stat, const T &v) {
+  std::stringstream ss;
+  ss << stat << " -- " << v << std::endl;
+  lgr << ss.str();
+}
+
+// Compress a single block but collect statistics as well...
+static void CompressBC7Block(
+  const uint32 *block, uint8 *outBuf, const BlockLogger &logStream
+) {
+
+  class RAIIStatSaver {
+  private:
+    const BlockLogger &m_Logger;
+
+    int *m_ModePtr;
+    double *m_Estimates;
+    double *m_Errors;
+
+  public:
+    RAIIStatSaver(const BlockLogger &logger)
+      : m_Logger(logger)
+      , m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
+    void SetMode(int *modePtr) { m_ModePtr = modePtr; }
+    void SetEstimates(double *estimates) { m_Estimates = estimates; }
+    void SetErrors(double *errors) { m_Errors = errors; }
+
+    ~RAIIStatSaver() {
+
+      assert(m_ModePtr);
+      assert(m_Estimates);
+      assert(m_Errors);
+
+      PrintStat(m_Logger, kBlockStatString[eBlockStat_Mode], *m_ModePtr);
+
+      for(uint32 i = 0; i < CompressionMode::kNumModes; i++) {
+
+        PrintStat(m_Logger,
+                  kBlockStatString[eBlockStat_ModeZeroEstimate + i],
+                  m_Estimates[i]);
+        PrintStat(m_Logger,
+                  kBlockStatString[eBlockStat_ModeZeroError + i],
+                  m_Errors[i]);
+      }
+    }
+  };
+
+  int bestMode = 0;
+  double modeEstimate[CompressionMode::kNumModes];
+  double modeError[CompressionMode::kNumModes];
+
+  // reset global variables...
+  bestMode = 0;
+  for(uint32 i = 0; i < CompressionMode::kNumModes; i++) {
+    modeError[i] = modeEstimate[i] = -1.0;
+  }
+
+  RAIIStatSaver __statsaver__(logStream);
+  __statsaver__.SetMode(&bestMode);
+  __statsaver__.SetEstimates(modeEstimate);
+  __statsaver__.SetErrors(modeError);
+
+  // All a single color?
+  if(AllOneColor(block)) {
+    BitStream bStrm(outBuf, 128, 0);
+    CompressOptimalColorBC7(*block, bStrm);
+    bestMode = 5;
+
+    PrintStat(logStream, kBlockStatString[eBlockStat_Path], 0);
+    return;
+  }
+
+  RGBACluster blockCluster;
+  bool opaque = true;
+  bool transparent = true;
+
+  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
+    RGBAVector p = RGBAVector(i, block[i]);
+    blockCluster.AddPoint(p);
+    if(fabs(p.a - 255.0f) > 1e-10) {
+      opaque = false;
+    }
+
+    if(p.a > 0.0f) {
+      transparent = false;
+    }
+  }
+
+  // The whole block is transparent?
+  if(transparent) {
+    BitStream bStrm(outBuf, 128, 0);
+    WriteTransparentBlock(bStrm);
+    bestMode = 6;
+
+    PrintStat(logStream, kBlockStatString[eBlockStat_Path], 1);
+    return;
+  }
+
+  // First, estimate the error it would take to compress a single line with
+  // mode 6...
+  {
+    RGBAVector Min, Max, v;
+    blockCluster.GetBoundingBox(Min, Max);
+    v = Max - Min;
+    if(v * v == 0) {
+      modeEstimate[6] = 0.0;
+    } else {
+      const float *w = GetErrorMetric();
+      const double err = 0.0001 + blockCluster.QuantizedError(
+        Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])
+      );
+      UpdateErrorEstimate(modeEstimate, 6, err);
+
+#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
+      double eigOne = blockCluster.GetPrincipalEigenvalue();
+      double eigTwo = blockCluster.GetSecondEigenvalue();
+      double error;
+      if(eigOne != 0.0) {
+        error = eigTwo / eigOne;
+      } else {
+        error = 1.0;
+      }
+
+      PrintStream(logStream, kBlockStatString[eBlockStat_SingleShapeEstimate], error);
+#endif
+    }
+  }
+
+  // First we must figure out which shape to use. To do this, simply
+  // see which shape has the smallest sum of minimum bounding spheres.
+  double bestError[2] = { DBL_MAX, DBL_MAX };
+  int bestShapeIdx[2] = { -1, -1 };
+  RGBACluster bestClusters[2][3];
+
+  for(unsigned int i = 0; i < kNumShapes2; i++) {
+    RGBACluster clusters[2];
+    PopulateTwoClustersForShape(blockCluster, i, clusters);
+
+    double err = 0.0;
+    double errEstimate[2] = { -1.0, -1.0 };
+    for(int ci = 0; ci < 2; ci++) {
+      double shapeEstimate[2] = { -1.0, -1.0 };
+      err += EstimateTwoClusterErrorStats(clusters[ci], shapeEstimate);
+
+      for(int ei = 0; ei < 2; ei++) {
+        if(shapeEstimate[ei] >= 0.0) {
+          if(errEstimate[ei] == -1.0) {
+            errEstimate[ei] = shapeEstimate[ei];
           } else {
-            memcpy(tempBuf1, tempBuf2, 16);
+            errEstimate[ei] += shapeEstimate[ei];
           }
         }
       }
     }
 
-    double error =
-      CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque);
-    if(error < best) {
-
-      best = error;
-      if(error == 0.0f) {
-        memcpy(outBuf, tempBuf2, 16);
-        return;
-      } else {
-        memcpy(tempBuf1, tempBuf2, 16);
-      }
-    }
-
-    if(opaque) {
-      const double newError =
-        CompressThreeClusters(bestShapeIdx[1],
-                              bestClusters[1],
-                              tempBuf2,
-                              opaque);
-      if(newError < best) {
-        memcpy(outBuf, tempBuf2, 16);
-        return;
-      }
-    }
-
-    memcpy(outBuf, tempBuf1, 16);
-  }
-
-  static double EstimateTwoClusterErrorStats(
-    RGBACluster &c, double (&estimates)[2]
-  ) {
-    RGBAVector Min, Max, v;
-    c.GetBoundingBox(Min, Max);
-    v = Max - Min;
-    if(v * v == 0) {
-      estimates[0] = estimates[1] = 0.0;
-      return 0.0;
-    }
-
-    const float *w = BC7C::GetErrorMetric();
-
-    const double err1 = c.QuantizedError(
-      Min, Max, 8, 0xFFFCFCFC, RGBAVector(w[0], w[1], w[2], w[3])
-    );
-
-    if(err1 >= 0.0) {
-      estimates[0] = err1;
-    } else {
-      estimates[0] = std::min(estimates[0], err1);
-    }
-
-    const double err3 = c.QuantizedError(
-      Min, Max, 8, 0xFFFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])
-    );
-
-    if(err3 >= 0.0) {
-      estimates[1] = err3;
-    } else {
-      estimates[1] = std::min(estimates[1], err3);
-    }
-
-    double error = 0.0001;
 #ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-    double eigOne = c.GetPrincipalEigenvalue();
-    double eigTwo = c.GetSecondEigenvalue();
-    if(eigOne != 0.0) {
-      error += eigTwo / eigOne;
-    } else {
-      error += 1.0;
-    }
-#else
-    error += std::min(err1, err3);
+    err /= 2.0;
 #endif
-    return error;
-  }
 
-  static double EstimateThreeClusterErrorStats(
-    RGBACluster &c, double (&estimates)[2]
-  ) {
-    RGBAVector Min, Max, v;
-    c.GetBoundingBox(Min, Max);
-    v = Max - Min;
-    if(v * v == 0) {
-      estimates[0] = estimates[1] = 0.0;
-      return 0.0;
+    if(errEstimate[0] != -1.0) {
+      UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
     }
 
-    const float *w = BC7C::GetErrorMetric();
-    const double err0 = 0.0001 + c.QuantizedError(
-      Min, Max, 4, 0xFFF0F0F0, RGBAVector(w[0], w[1], w[2], w[3])
-    );
-
-    if(err0 >= 0.0) {
-      estimates[0] = err0;
-    } else {
-      estimates[0] = std::min(estimates[0], err0);
+    if(errEstimate[1] != -1.0) {
+      UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
     }
 
-    const double err2 = 0.0001 + c.QuantizedError(
-      Min, Max, 4, 0xFFF8F8F8, RGBAVector(w[0], w[1], w[2], w[3])
-    );
-
-    if(err2 >= 0.0) {
-      estimates[1] = err2;
-    } else {
-      estimates[1] = std::min(estimates[1], err2);
+    if(err < bestError[0]) {
+      PrintStat(logStream, 
+        kBlockStatString[eBlockStat_TwoShapeEstimate], err
+      );
     }
 
-    double error = 0.0001;
-#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-    double eigOne = c.GetPrincipalEigenvalue();
-    double eigTwo = c.GetSecondEigenvalue();
+    // If it's small, we'll take it!
+    if(err < 1e-9) {
+      int modeChosen;
+      CompressTwoClusters(
+        i, clusters, outBuf, opaque, modeError, &modeChosen
+      );
+      bestMode = modeChosen;
 
-    if(eigOne != 0.0) {
-      error += eigTwo / eigOne;
-    } else {
-      error += 1.0;
-    }
-#else
-    error += std::min(err0, err2);
-#endif
-    return error;
-  }
-
-  static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
-    assert(estimates);
-    assert(mode >= 0);
-    assert(mode < BC7CompressionMode::kNumModes);
-    if(estimates[mode] == -1.0 || est < estimates[mode]) {
-      estimates[mode] = est;
-    }
-  }
-
-  template<typename T>
-  static void PrintStat(const BlockLogger &lgr, const char *stat, const T &v) {
-    std::stringstream ss;
-    ss << stat << " -- " << v << std::endl;
-    lgr << ss.str();
-  }
-
-  // Compress a single block but collect statistics as well...
-  static void CompressBC7Block(
-    const uint32 *block, uint8 *outBuf, const BlockLogger &logStream
-  ) {
-
-    class RAIIStatSaver {
-    private:
-      const BlockLogger &m_Logger;
-
-      int *m_ModePtr;
-      double *m_Estimates;
-      double *m_Errors;
-
-    public:
-      RAIIStatSaver(const BlockLogger &logger)
-        : m_Logger(logger)
-        , m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
-      void SetMode(int *modePtr) { m_ModePtr = modePtr; }
-      void SetEstimates(double *estimates) { m_Estimates = estimates; }
-      void SetErrors(double *errors) { m_Errors = errors; }
-
-      ~RAIIStatSaver() {
-
-        assert(m_ModePtr);
-        assert(m_Estimates);
-        assert(m_Errors);
-
-        PrintStat(m_Logger, kBlockStatString[eBlockStat_Mode], *m_ModePtr);
-
-        for(uint32 i = 0; i < BC7CompressionMode::kNumModes; i++) {
-
-          PrintStat(m_Logger,
-                    kBlockStatString[eBlockStat_ModeZeroEstimate + i],
-                    m_Estimates[i]);
-          PrintStat(m_Logger,
-                    kBlockStatString[eBlockStat_ModeZeroError + i],
-                    m_Errors[i]);
-        }
-      }
-    };
-
-    int bestMode = 0;
-    double modeEstimate[BC7CompressionMode::kNumModes];
-    double modeError[BC7CompressionMode::kNumModes];
-
-    // reset global variables...
-    bestMode = 0;
-    for(uint32 i = 0; i < BC7CompressionMode::kNumModes; i++) {
-      modeError[i] = modeEstimate[i] = -1.0;
-    }
-
-    RAIIStatSaver __statsaver__(logStream);
-    __statsaver__.SetMode(&bestMode);
-    __statsaver__.SetEstimates(modeEstimate);
-    __statsaver__.SetErrors(modeError);
-
-    // All a single color?
-    if(AllOneColor(block)) {
-      BitStream bStrm(outBuf, 128, 0);
-      CompressOptimalColorBC7(*block, bStrm);
-      bestMode = 5;
-
-      PrintStat(logStream, kBlockStatString[eBlockStat_Path], 0);
+      PrintStat(logStream, kBlockStatString[eBlockStat_Path], 2);
       return;
     }
 
-    RGBACluster blockCluster;
-    bool opaque = true;
-    bool transparent = true;
-
-    for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
-      RGBAVector p = RGBAVector(i, block[i]);
-      blockCluster.AddPoint(p);
-      if(fabs(p.a - 255.0f) > 1e-10) {
-        opaque = false;
-      }
-
-      if(p.a > 0.0f) {
-        transparent = false;
-      }
+    if(err < bestError[0]) {
+      bestError[0] = err;
+      bestShapeIdx[0] = i;
+      bestClusters[0][0] = clusters[0];
+      bestClusters[0][1] = clusters[1];
     }
+  }
 
-    // The whole block is transparent?
-    if(transparent) {
-      BitStream bStrm(outBuf, 128, 0);
-      WriteTransparentBlock(bStrm);
-      bestMode = 6;
+  // There are not 3 subset blocks that support alpha, so only check these
+  // if the entire block is opaque.
+  if(opaque) {
+    for(unsigned int i = 0; i < kNumShapes3; i++) {
 
-      PrintStat(logStream, kBlockStatString[eBlockStat_Path], 1);
-      return;
-    }
-
-    // First, estimate the error it would take to compress a single line with
-    // mode 6...
-    {
-      RGBAVector Min, Max, v;
-      blockCluster.GetBoundingBox(Min, Max);
-      v = Max - Min;
-      if(v * v == 0) {
-        modeEstimate[6] = 0.0;
-      } else {
-        const float *w = GetErrorMetric();
-        const double err = 0.0001 + blockCluster.QuantizedError(
-          Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])
-        );
-        UpdateErrorEstimate(modeEstimate, 6, err);
-
-#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-        double eigOne = blockCluster.GetPrincipalEigenvalue();
-        double eigTwo = blockCluster.GetSecondEigenvalue();
-        double error;
-        if(eigOne != 0.0) {
-          error = eigTwo / eigOne;
-        } else {
-          error = 1.0;
-        }
-
-        PrintStream(logStream, kBlockStatString[eBlockStat_SingleShapeEstimate], error);
-#endif
-      }
-    }
-
-    // First we must figure out which shape to use. To do this, simply
-    // see which shape has the smallest sum of minimum bounding spheres.
-    double bestError[2] = { DBL_MAX, DBL_MAX };
-    int bestShapeIdx[2] = { -1, -1 };
-    RGBACluster bestClusters[2][3];
-
-    for(unsigned int i = 0; i < kNumShapes2; i++) {
-      RGBACluster clusters[2];
-      PopulateTwoClustersForShape(blockCluster, i, clusters);
+      RGBACluster clusters[3];
+      PopulateThreeClustersForShape(blockCluster, i, clusters);
 
       double err = 0.0;
       double errEstimate[2] = { -1.0, -1.0 };
-      for(int ci = 0; ci < 2; ci++) {
+      for(int ci = 0; ci < 3; ci++) {
         double shapeEstimate[2] = { -1.0, -1.0 };
-        err += EstimateTwoClusterErrorStats(clusters[ci], shapeEstimate);
+        err += EstimateThreeClusterErrorStats(clusters[ci], shapeEstimate);
 
         for(int ei = 0; ei < 2; ei++) {
           if(shapeEstimate[ei] >= 0.0) {
@@ -2399,27 +2463,27 @@ namespace BC7C {
       }
 
 #ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-      err /= 2.0;
+      err /= 3.0;
 #endif
 
       if(errEstimate[0] != -1.0) {
-        UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
+        UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
       }
 
       if(errEstimate[1] != -1.0) {
-        UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
+        UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
       }
 
-      if(err < bestError[0]) {
+      if(err < bestError[1]) {
         PrintStat(logStream, 
-          kBlockStatString[eBlockStat_TwoShapeEstimate], err
+          kBlockStatString[eBlockStat_ThreeShapeEstimate], err
         );
       }
 
       // If it's small, we'll take it!
       if(err < 1e-9) {
         int modeChosen;
-        CompressTwoClusters(
+        CompressThreeClusters(
           i, clusters, outBuf, opaque, modeError, &modeChosen
         );
         bestMode = modeChosen;
@@ -2428,257 +2492,210 @@ namespace BC7C {
         return;
       }
 
-      if(err < bestError[0]) {
-        bestError[0] = err;
-        bestShapeIdx[0] = i;
-        bestClusters[0][0] = clusters[0];
-        bestClusters[0][1] = clusters[1];
+      if(err < bestError[1]) {
+        bestError[1] = err;
+        bestShapeIdx[1] = i;
+        bestClusters[1][0] = clusters[0];
+        bestClusters[1][1] = clusters[1];
+        bestClusters[1][2] = clusters[2];
       }
     }
-
-    // There are not 3 subset blocks that support alpha, so only check these
-    // if the entire block is opaque.
-    if(opaque) {
-      for(unsigned int i = 0; i < kNumShapes3; i++) {
-
-        RGBACluster clusters[3];
-        PopulateThreeClustersForShape(blockCluster, i, clusters);
-
-        double err = 0.0;
-        double errEstimate[2] = { -1.0, -1.0 };
-        for(int ci = 0; ci < 3; ci++) {
-          double shapeEstimate[2] = { -1.0, -1.0 };
-          err += EstimateThreeClusterErrorStats(clusters[ci], shapeEstimate);
-
-          for(int ei = 0; ei < 2; ei++) {
-            if(shapeEstimate[ei] >= 0.0) {
-              if(errEstimate[ei] == -1.0) {
-                errEstimate[ei] = shapeEstimate[ei];
-              } else {
-                errEstimate[ei] += shapeEstimate[ei];
-              }
-            }
-          }
-        }
-
-#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
-        err /= 3.0;
-#endif
-
-        if(errEstimate[0] != -1.0) {
-          UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
-        }
-
-        if(errEstimate[1] != -1.0) {
-          UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
-        }
-
-        if(err < bestError[1]) {
-          PrintStat(logStream, 
-            kBlockStatString[eBlockStat_ThreeShapeEstimate], err
-          );
-        }
-
-        // If it's small, we'll take it!
-        if(err < 1e-9) {
-          int modeChosen;
-          CompressThreeClusters(
-            i, clusters, outBuf, opaque, modeError, &modeChosen
-          );
-          bestMode = modeChosen;
-
-          PrintStat(logStream, kBlockStatString[eBlockStat_Path], 2);
-          return;
-        }
-
-        if(err < bestError[1]) {
-          bestError[1] = err;
-          bestShapeIdx[1] = i;
-          bestClusters[1][0] = clusters[0];
-          bestClusters[1][1] = clusters[1];
-          bestClusters[1][2] = clusters[2];
-        }
-      }
-    }
-
-    PrintStat(logStream, kBlockStatString[eBlockStat_Path], 3);
-
-    uint8 tempBuf1[16], tempBuf2[16];
-
-    BitStream tempStream1 (tempBuf1, 128, 0);
-    BC7CompressionMode compressor(6, opaque);
-    double best = compressor.Compress(tempStream1, 0, &blockCluster);
-    modeError[6] = best;
-    bestMode = 6;
-    if(best == 0.0f) {
-      memcpy(outBuf, tempBuf1, 16);
-      return;
-    }
-
-    // Check modes 4 and 5 if the block isn't opaque...
-    if(!opaque) {
-      for(int mode = 4; mode <= 5; mode++) {
-
-        BitStream tempStream2(tempBuf2, 128, 0);
-        BC7CompressionMode compressorTry(mode, opaque);
-
-        double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
-        if(error < best) {
-
-          bestMode = mode;
-          best = error;
-
-          if(best == 0.0f) {
-            memcpy(outBuf, tempBuf2, 16);
-            return;
-          } else {
-            memcpy(tempBuf1, tempBuf2, 16);
-          }
-        }
-      }
-    }
-
-    int modeChosen;
-    double error = CompressTwoClusters(
-      bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen
-    );
-    if(error < best) {
-
-      bestMode = modeChosen;
-      best = error;
-
-      if(error == 0.0f) {
-        memcpy(outBuf, tempBuf2, 16);
-        return;
-      } else {
-        memcpy(tempBuf1, tempBuf2, 16);
-      }
-    }
-
-    if(opaque) {
-      const double newError = CompressThreeClusters(
-        bestShapeIdx[1], bestClusters[1],
-        tempBuf2, opaque, modeError, &modeChosen
-      );
-      if(newError < best) {
-
-        bestMode = modeChosen;
-        memcpy(outBuf, tempBuf2, 16);
-        return;
-      }
-    }
-
-    memcpy(outBuf, tempBuf1, 16);
   }
 
-  static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]) {
+  PrintStat(logStream, kBlockStatString[eBlockStat_Path], 3);
 
-    BitStreamReadOnly strm(block);
+  uint8 tempBuf1[16], tempBuf2[16];
 
-    uint32 mode = 0;
-    while(!strm.ReadBit()) {
-      mode++;
-    }
+  BitStream tempStream1 (tempBuf1, 128, 0);
+  CompressionMode compressor(6, opaque);
+  double best = compressor.Compress(tempStream1, 0, &blockCluster);
+  modeError[6] = best;
+  bestMode = 6;
+  if(best == 0.0f) {
+    memcpy(outBuf, tempBuf1, 16);
+    return;
+  }
 
-    const BC7CompressionMode::Attributes *attrs =
-      BC7CompressionMode::GetAttributesForMode(mode);
-    const uint32 nSubsets = attrs->numSubsets;
+  // Check modes 4 and 5 if the block isn't opaque...
+  if(!opaque) {
+    for(int mode = 4; mode <= 5; mode++) {
 
-    uint32 idxMode = 0;
-    uint32 rotMode = 0;
-    uint32 shapeIdx = 0;
-    if ( nSubsets > 1 ) {
-      shapeIdx = strm.ReadBits(mode == 0? 4 : 6);
-    } else if( attrs->hasRotation ) {
-      rotMode = strm.ReadBits(2);
-      if( attrs->hasIdxMode ) {
-        idxMode = strm.ReadBit();
+      BitStream tempStream2(tempBuf2, 128, 0);
+      CompressionMode compressorTry(mode, opaque);
+
+      double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
+      if(error < best) {
+
+        bestMode = mode;
+        best = error;
+
+        if(best == 0.0f) {
+          memcpy(outBuf, tempBuf2, 16);
+          return;
+        } else {
+          memcpy(tempBuf1, tempBuf2, 16);
+        }
       }
     }
+  }
 
-    assert(idxMode < 2);
-    assert(rotMode < 4);
-    assert(shapeIdx < ((mode == 0)? 16U : 64U));
+  int modeChosen;
+  double error = CompressTwoClusters(
+    bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen
+  );
+  if(error < best) {
 
-    uint32 cp = attrs->colorChannelPrecision;
-    const uint32 shift = 8 - cp;
+    bestMode = modeChosen;
+    best = error;
 
-    uint8 eps[3][2][4];
-    for(uint32 ch = 0; ch < 3; ch++)
+    if(error == 0.0f) {
+      memcpy(outBuf, tempBuf2, 16);
+      return;
+    } else {
+      memcpy(tempBuf1, tempBuf2, 16);
+    }
+  }
+
+  if(opaque) {
+    const double newError = CompressThreeClusters(
+      bestShapeIdx[1], bestClusters[1],
+      tempBuf2, opaque, modeError, &modeChosen
+    );
+    if(newError < best) {
+
+      bestMode = modeChosen;
+      memcpy(outBuf, tempBuf2, 16);
+      return;
+    }
+  }
+
+  memcpy(outBuf, tempBuf1, 16);
+}
+
+static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]) {
+
+  BitStreamReadOnly strm(block);
+
+  uint32 mode = 0;
+  while(!strm.ReadBit()) {
+    mode++;
+  }
+
+  const CompressionMode::Attributes *attrs =
+    CompressionMode::GetAttributesForMode(mode);
+  const uint32 nSubsets = attrs->numSubsets;
+
+  uint32 idxMode = 0;
+  uint32 rotMode = 0;
+  uint32 shapeIdx = 0;
+  if ( nSubsets > 1 ) {
+    shapeIdx = strm.ReadBits(mode == 0? 4 : 6);
+  } else if( attrs->hasRotation ) {
+    rotMode = strm.ReadBits(2);
+    if( attrs->hasIdxMode ) {
+      idxMode = strm.ReadBit();
+    }
+  }
+
+  assert(idxMode < 2);
+  assert(rotMode < 4);
+  assert(shapeIdx < ((mode == 0)? 16U : 64U));
+
+  uint32 cp = attrs->colorChannelPrecision;
+  const uint32 shift = 8 - cp;
+
+  uint8 eps[3][2][4];
+  for(uint32 ch = 0; ch < 3; ch++)
+  for(uint32 i = 0; i < nSubsets; i++)
+  for(uint32 ep = 0; ep < 2; ep++)
+    eps[i][ep][ch] = strm.ReadBits(cp) << shift;
+
+  uint32 ap = attrs->alphaChannelPrecision;
+  const uint32 ash = 8 - ap;
+
+  if(ap == 0) {
     for(uint32 i = 0; i < nSubsets; i++)
     for(uint32 ep = 0; ep < 2; ep++)
-      eps[i][ep][ch] = strm.ReadBits(cp) << shift;
-
-    uint32 ap = attrs->alphaChannelPrecision;
-    const uint32 ash = 8 - ap;
-
-    if(ap == 0) {
-      for(uint32 i = 0; i < nSubsets; i++)
-      for(uint32 ep = 0; ep < 2; ep++)
-        eps[i][ep][3] = 0xFF;
-    } else {
-      for(uint32 i = 0; i < nSubsets; i++)
-      for(uint32 ep = 0; ep < 2; ep++)
-        eps[i][ep][3] = strm.ReadBits(ap) << ash;
-    }
-
-    // Handle pbits
-    switch(attrs->pbitType) {
-      case BC7CompressionMode::ePBitType_None:
-        // Do nothing.
-      break;
-
-      case BC7CompressionMode::ePBitType_Shared:
-
-        cp += 1;
-        ap += 1;
-
-        for(uint32 i = 0; i < nSubsets; i++) {
-
-          uint32 pbit = strm.ReadBit();
-
-          for(uint32 j = 0; j < 2; j++)
-          for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
-            const uint32 prec = ch == 3? ap : cp;
-            eps[i][j][ch] |= pbit << (8-prec);
-          }
-        }
-      break;
-
-      case BC7CompressionMode::ePBitType_NotShared:
-
-        cp += 1;
-        ap += 1;
-
-        for(uint32 i = 0; i < nSubsets; i++)
-        for(uint32 j = 0; j < 2; j++) {
-
-          uint32 pbit = strm.ReadBit();
-
-          for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
-            const uint32 prec = ch == 3? ap : cp;
-            eps[i][j][ch] |= pbit << (8-prec);
-          }
-        }
-      break;
-    }
-
-    // Quantize endpoints...
+      eps[i][ep][3] = 0xFF;
+  } else {
     for(uint32 i = 0; i < nSubsets; i++)
-    for(uint32 j = 0; j < 2; j++)
-    for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
-      const uint32 prec = ch == 3? ap : cp;
-      eps[i][j][ch] |= eps[i][j][ch] >> prec;
+    for(uint32 ep = 0; ep < 2; ep++)
+      eps[i][ep][3] = strm.ReadBits(ap) << ash;
+  }
+
+  // Handle pbits
+  switch(attrs->pbitType) {
+    case CompressionMode::ePBitType_None:
+      // Do nothing.
+    break;
+
+    case CompressionMode::ePBitType_Shared:
+
+      cp += 1;
+      ap += 1;
+
+      for(uint32 i = 0; i < nSubsets; i++) {
+
+        uint32 pbit = strm.ReadBit();
+
+        for(uint32 j = 0; j < 2; j++)
+        for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
+          const uint32 prec = ch == 3? ap : cp;
+          eps[i][j][ch] |= pbit << (8-prec);
+        }
+      }
+    break;
+
+    case CompressionMode::ePBitType_NotShared:
+
+      cp += 1;
+      ap += 1;
+
+      for(uint32 i = 0; i < nSubsets; i++)
+      for(uint32 j = 0; j < 2; j++) {
+
+        uint32 pbit = strm.ReadBit();
+
+        for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
+          const uint32 prec = ch == 3? ap : cp;
+          eps[i][j][ch] |= pbit << (8-prec);
+        }
+      }
+    break;
+  }
+
+  // Quantize endpoints...
+  for(uint32 i = 0; i < nSubsets; i++)
+  for(uint32 j = 0; j < 2; j++)
+  for(uint32 ch = 0; ch < kNumColorChannels; ch++) {
+    const uint32 prec = ch == 3? ap : cp;
+    eps[i][j][ch] |= eps[i][j][ch] >> prec;
+  }
+
+  // Figure out indices...
+  uint32 alphaIndices[kMaxNumDataPoints];
+  uint32 colorIndices[kMaxNumDataPoints];
+
+  int nBitsPerAlpha = attrs->numBitsPerAlpha;
+  int nBitsPerColor = attrs->numBitsPerIndex;
+
+  uint32 idxPrec = attrs->numBitsPerIndex;
+  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
+    uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets);
+
+    int idx = 0;
+    if(GetAnchorIndexForSubset(subset, shapeIdx, nSubsets) == i) {
+      idx = strm.ReadBits(idxPrec - 1);
+    } else {
+      idx = strm.ReadBits(idxPrec);
     }
+    colorIndices[i] = idx;
+  }
 
-    // Figure out indices...
-    uint32 alphaIndices[kMaxNumDataPoints];
-    uint32 colorIndices[kMaxNumDataPoints];
-
-    int nBitsPerAlpha = attrs->numBitsPerAlpha;
-    int nBitsPerColor = attrs->numBitsPerIndex;
-
-    uint32 idxPrec = attrs->numBitsPerIndex;
+  idxPrec = attrs->numBitsPerAlpha;
+  if(idxPrec == 0) {
+    memcpy(alphaIndices, colorIndices, sizeof(alphaIndices));
+  } else {
     for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
       uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets);
 
@@ -2688,110 +2705,95 @@ namespace BC7C {
       } else {
         idx = strm.ReadBits(idxPrec);
       }
-      colorIndices[i] = idx;
+      alphaIndices[i] = idx;
     }
 
-    idxPrec = attrs->numBitsPerAlpha;
-    if(idxPrec == 0) {
-      memcpy(alphaIndices, colorIndices, sizeof(alphaIndices));
-    } else {
+    if(idxMode) {
       for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
-        uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets);
-
-        int idx = 0;
-        if(GetAnchorIndexForSubset(subset, shapeIdx, nSubsets) == i) {
-          idx = strm.ReadBits(idxPrec - 1);
-        } else {
-          idx = strm.ReadBits(idxPrec);
-        }
-        alphaIndices[i] = idx;
+        swap(alphaIndices[i], colorIndices[i]);
       }
 
-      if(idxMode) {
-        for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
-          swap(alphaIndices[i], colorIndices[i]);
-        }
-
-        swap(nBitsPerAlpha, nBitsPerColor);
-      }
-    }
-
-    assert(strm.GetBitsRead() == 128);
-
-    // Get final colors by interpolating...
-    for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
-
-      const uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets);
-      uint32 &pixel = outBuf[i];
-
-      pixel = 0;
-      for(int ch = 0; ch < 4; ch++) {
-        if(ch == 3 && nBitsPerAlpha > 0) {
-          uint32 i0 =
-            kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][0];
-          uint32 i1 =
-            kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][1];
-
-          const uint32 ep1 = static_cast<uint32>(eps[subset][0][3]);
-          const uint32 ep2 = static_cast<uint32>(eps[subset][1][3]);
-          const uint8 ip = (((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF;
-          pixel |= ip << 24;
-
-        } else {
-          uint32 i0 =
-            kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][0];
-          uint32 i1 =
-            kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][1];
-
-          const uint32 ep1 = static_cast<uint32>(eps[subset][0][ch]);
-          const uint32 ep2 = static_cast<uint32>(eps[subset][1][ch]);
-          const uint8 ip = (((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF;
-          pixel |= ip << (8*ch);
-        }
-      }
-
-      // Swap colors if necessary...
-      uint8 *pb = reinterpret_cast<uint8 *>(&pixel);
-      switch(rotMode) {
-        default:
-        case 0:
-          // Do nothing
-          break;
-
-        case 1:
-          swap(pb[0], pb[3]);
-          break;
-
-        case 2:
-          swap(pb[1], pb[3]);
-          break;
-
-        case 3:
-          swap(pb[2], pb[3]);
-          break;
-      }
+      swap(nBitsPerAlpha, nBitsPerColor);
     }
   }
 
-  // Convert the image from a BC7 buffer to a RGBA8 buffer
-  void Decompress(const FasTC::DecompressionJob &dj) {
+  assert(strm.GetBitsRead() == 128);
 
-    const uint8 *inBuf = dj.InBuf();
-    uint32 *outBuf = reinterpret_cast<uint32 *>(dj.OutBuf());
+  // Get final colors by interpolating...
+  for(uint32 i = 0; i < kMaxNumDataPoints; i++) {
 
-    for(unsigned int j = 0; j < dj.Height(); j += 4) {
-      for(unsigned int i = 0; i < dj.Width(); i += 4) {
+    const uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets);
+    uint32 &pixel = outBuf[i];
 
-        uint32 pixels[16];
-        DecompressBC7Block(inBuf, pixels);
+    pixel = 0;
+    for(int ch = 0; ch < 4; ch++) {
+      if(ch == 3 && nBitsPerAlpha > 0) {
+        uint32 i0 =
+          kInterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][0];
+        uint32 i1 =
+          kInterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][1];
 
-        memcpy(outBuf + j*dj.Width() + i, pixels, 4 * sizeof(pixels[0]));
-        memcpy(outBuf + (j+1)*dj.Width() + i, pixels+4, 4 * sizeof(pixels[0]));
-        memcpy(outBuf + (j+2)*dj.Width() + i, pixels+8, 4 * sizeof(pixels[0]));
-        memcpy(outBuf + (j+3)*dj.Width() + i, pixels+12, 4 * sizeof(pixels[0]));
+        const uint32 ep1 = static_cast<uint32>(eps[subset][0][3]);
+        const uint32 ep2 = static_cast<uint32>(eps[subset][1][3]);
+        const uint8 ip = (((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF;
+        pixel |= ip << 24;
 
-        inBuf += 16;
+      } else {
+        uint32 i0 =
+          kInterpolationValues[nBitsPerColor - 1][colorIndices[i]][0];
+        uint32 i1 =
+          kInterpolationValues[nBitsPerColor - 1][colorIndices[i]][1];
+
+        const uint32 ep1 = static_cast<uint32>(eps[subset][0][ch]);
+        const uint32 ep2 = static_cast<uint32>(eps[subset][1][ch]);
+        const uint8 ip = (((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF;
+        pixel |= ip << (8*ch);
       }
     }
+
+    // Swap colors if necessary...
+    uint8 *pb = reinterpret_cast<uint8 *>(&pixel);
+    switch(rotMode) {
+      default:
+      case 0:
+        // Do nothing
+        break;
+
+      case 1:
+        swap(pb[0], pb[3]);
+        break;
+
+      case 2:
+        swap(pb[1], pb[3]);
+        break;
+
+      case 3:
+        swap(pb[2], pb[3]);
+        break;
+    }
   }
-}  // namespace BC7C
+}
+
+// Convert the image from a BC7 buffer to a RGBA8 buffer
+void Decompress(const FasTC::DecompressionJob &dj) {
+
+  const uint8 *inBuf = dj.InBuf();
+  uint32 *outBuf = reinterpret_cast<uint32 *>(dj.OutBuf());
+
+  for(unsigned int j = 0; j < dj.Height(); j += 4) {
+    for(unsigned int i = 0; i < dj.Width(); i += 4) {
+
+      uint32 pixels[16];
+      DecompressBC7Block(inBuf, pixels);
+
+      memcpy(outBuf + j*dj.Width() + i, pixels, 4 * sizeof(pixels[0]));
+      memcpy(outBuf + (j+1)*dj.Width() + i, pixels+4, 4 * sizeof(pixels[0]));
+      memcpy(outBuf + (j+2)*dj.Width() + i, pixels+8, 4 * sizeof(pixels[0]));
+      memcpy(outBuf + (j+3)*dj.Width() + i, pixels+12, 4 * sizeof(pixels[0]));
+
+      inBuf += 16;
+    }
+  }
+}
+
+}  // namespace BPTCC
diff --git a/BPTCEncoder/src/BC7CompressorSIMD.cpp b/BPTCEncoder/src/CompressorSIMD.cpp
similarity index 100%
rename from BPTCEncoder/src/BC7CompressorSIMD.cpp
rename to BPTCEncoder/src/CompressorSIMD.cpp
diff --git a/BPTCEncoder/src/ParallelStage.cpp b/BPTCEncoder/src/ParallelStage.cpp
index ad40b33..573ad22 100644
--- a/BPTCEncoder/src/ParallelStage.cpp
+++ b/BPTCEncoder/src/ParallelStage.cpp
@@ -47,7 +47,7 @@
 #include <string.h>
 
 /*
- const BC7ParallelStage stage;
+ const BPTCParallelStage stage;
  
  // This is the stream of data that will be used to read the block data.
  const unsigned char *const m_InBuf;
@@ -65,7 +65,7 @@
  uint32 m_NumBlocks;
  */    
 ParallelStage::ParallelStage(
-  BC7ParallelStage stage,
+  BPTCParallelStage stage,
   const unsigned char *inbuf,
   unsigned char *outbuf,
   uint32 numBlocks,
diff --git a/BPTCEncoder/src/ParallelStage.h b/BPTCEncoder/src/ParallelStage.h
index ca99c6f..6e9c892 100644
--- a/BPTCEncoder/src/ParallelStage.h
+++ b/BPTCEncoder/src/ParallelStage.h
@@ -1,30 +1,39 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
+ * All rights reserved.
  *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational,
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted,
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear
- * in all copies.
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
  *
- * Permission to incorporate this software into commercial products may be obtained by contacting the
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
  *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill.
- * The software program and documentation are supplied "as is," without any accompanying services from the
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The
- * end-user understands that the program was developed for research purposes and is advised not to rely
- * exclusively on the program for any reason.
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
  *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING,
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
  * ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
@@ -37,13 +46,13 @@
  * Frederick P. Brooks, Jr. Computer Science Bldg
  * Chapel Hill, NC 27599-3175
  * USA
- *
+ * 
  * <http://gamma.cs.unc.edu/FasTC/>
  */
 
 #include "TexCompTypes.h"
 
-enum BC7ParallelStage {
+enum BPTCParallelStage {
   eParallelStage_Uniform,
   eParallelStage_Partitioned,
   eParallelStage_Normal,
@@ -54,7 +63,7 @@ enum BC7ParallelStage {
 class ParallelStage {
  public:
   ParallelStage(
-    BC7ParallelStage stage,
+    BPTCParallelStage stage,
     const unsigned char *inbuf,
     unsigned char *outbuf,
     uint32 numBlocks,
@@ -66,7 +75,7 @@ class ParallelStage {
   
   ~ParallelStage();
 
-  const BC7ParallelStage m_Stage;
+  const BPTCParallelStage m_Stage;
 
   // Adds the block number to the list of blocks for this parallel stage
   void AddBlock(uint32 blockNum);
diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp
index 8b2d5ef..48ca587 100755
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@@ -1,30 +1,39 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
+ * All rights reserved.
  *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational, 
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
- * in all copies.
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
  *
- * Permission to incorporate this software into commercial products may be obtained by contacting the 
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
  *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
- * The software program and documentation are supplied "as is," without any accompanying services from the 
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
- * end-user understands that the program was developed for research purposes and is advised not to rely 
- * exclusively on the program for any reason.
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
  *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
  * ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
@@ -46,27 +55,28 @@
 //
 // This code has been modified significantly from the original.
 
-//--------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 // Copyright 2011 Intel Corporation
 // All Rights Reserved
 //
-// Permission is granted to use, copy, distribute and prepare derivative works of this
-// software for any purpose and without fee, provided, that the above copyright notice
-// and this statement appear in all copies.  Intel makes no representations about the
-// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
-// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
-// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
-// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
-// assume any responsibility for any errors which may appear in this software nor any
+// Permission is granted to use, copy, distribute and prepare derivative works
+// of this software for any purpose and without fee, provided, that the above
+// copyright notice and this statement appear in all copies.  Intel makes no
+// representations about the suitability of this software for any purpose.  THIS
+// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES,
+// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER
+// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR
+// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not assume
+// any responsibility for any errors which may appear in this software nor any
 // responsibility to update it.
 //
-//--------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 
-#include "BC7Config.h"
+#include "BPTCConfig.h"
 #include "RGBAEndpoints.h"
-#include "BC7Compressor.h"
-#include "BC7CompressionMode.h"
+#include "BPTCCompressor.h"
+#include "CompressionMode.h"
 
 #include <cassert>
 #include <cstdlib>
@@ -406,7 +416,11 @@ uint32 RGBACluster::GetPowerMethodIterations() {
   return m_PowerMethodIterations;
 }
 
-double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2], int *indices) const {
+double RGBACluster::QuantizedError(
+  const RGBAVector &p1, const RGBAVector &p2,
+  uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+  const int pbits[2], int *indices
+) const {
 
   // nBuckets should be a power of two.
   assert(nBuckets == 3 || !(nBuckets & (nBuckets - 1)));
@@ -415,7 +429,9 @@ double RGBACluster::QuantizedError(const RGBAVector &p1, const RGBAVector &p2, u
   
   typedef uint32 tInterpPair[2];
   typedef tInterpPair tInterpLevel[16];
-  const tInterpLevel *interpVals = (nBuckets == 3)? kBC7InterpolationValues : kBC7InterpolationValues + (indexPrec - 1);
+  const tInterpLevel *interpVals =
+    (nBuckets == 3)? BPTCC::kInterpolationValues 
+    : BPTCC::kInterpolationValues + (indexPrec - 1);
 
   assert(indexPrec >= 2 && indexPrec <= 4);
 
diff --git a/Base/include/CompressionJob.h b/Base/include/CompressionJob.h
index 0dd4044..c6541fe 100755
--- a/Base/include/CompressionJob.h
+++ b/Base/include/CompressionJob.h
@@ -1,30 +1,39 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
+ * All rights reserved.
  *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational, 
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
- * in all copies.
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
  *
- * Permission to incorporate this software into commercial products may be obtained by contacting the 
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
  *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
- * The software program and documentation are supplied "as is," without any accompanying services from the 
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
- * end-user understands that the program was developed for research purposes and is advised not to rely 
- * exclusively on the program for any reason.
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
  *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
  * ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
@@ -58,7 +67,7 @@ namespace FasTC {
 
   // This structure defines a compression job. Here, width and height are the dimensions
   // of the image in pixels. inBuf contains the R8G8B8A8 data that is to be compressed, and
-  // outBuf will contain the compressed BC7 data.
+  // outBuf will contain the compressed BPTC data.
   //
   // Implicit sizes:
   //    inBuf - (width * height * 4) bytes
@@ -165,8 +174,8 @@ namespace FasTC {
     }
   };
   
-  // This struct mirrors that for a compression job, but is used to decompress a BC7 stream. Here, inBuf
-  // is a buffer of BC7 data, and outBuf is the destination where we will copy the decompressed R8G8B8A8 data
+  // This struct mirrors that for a compression job, but is used to decompress a BPTC stream. Here, inBuf
+  // is a buffer of BPTC data, and outBuf is the destination where we will copy the decompressed R8G8B8A8 data
   class DecompressionJob {
    private:
     const ECompressionFormat m_Format;
diff --git a/CLTool/src/tc.cpp b/CLTool/src/tc.cpp
index fb02d98..78f5fd5 100644
--- a/CLTool/src/tc.cpp
+++ b/CLTool/src/tc.cpp
@@ -341,7 +341,7 @@ int main(int argc, char **argv) {
     if(decompressedOutput[0] != '\0') {
       memcpy(basename, decompressedOutput, 256);
     } else if(format == FasTC::eCompressionFormat_BPTC) {
-      strcat(basename, "-bc7.png");
+      strcat(basename, "-bptc.png");
     } else if(format == FasTC::eCompressionFormat_PVRTC) {
       strcat(basename, "-pvrtc.png");
     } else if(format == FasTC::eCompressionFormat_DXT1) {
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 574333e..85cfdbb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,30 +1,39 @@
 # FasTC
-# Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+# Copyright (c) 2014 University of North Carolina at Chapel Hill.
+# All rights reserved.
 #
-# Permission to use, copy, modify, and distribute this software and its documentation for educational, 
-# research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
-# provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
-# in all copies.
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for educational, research, and non-profit purposes, without
+# fee, and without a written agreement is hereby granted, provided that the
+# above copyright notice, this paragraph, and the following four paragraphs
+# appear in all copies.
 #
-# Permission to incorporate this software into commercial products may be obtained by contacting the 
-# authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+# Permission to incorporate this software into commercial products may be
+# obtained by contacting the authors or the Office of Technology Development
+# at the University of North Carolina at Chapel Hill <otd@unc.edu>.
 #
-# This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
-# The software program and documentation are supplied "as is," without any accompanying services from the 
-# University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
-# and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
-# end-user understands that the program was developed for research purposes and is advised not to rely 
-# exclusively on the program for any reason.
+# This software program and documentation are copyrighted by the University of
+# North Carolina at Chapel Hill. The software program and documentation are
+# supplied "as is," without any accompanying services from the University of
+# North Carolina at Chapel Hill or the authors. The University of North
+# Carolina at Chapel Hill and the authors do not warrant that the operation of
+# the program will be uninterrupted or error-free. The end-user understands
+# that the program was developed for research purposes and is advised not to
+# rely exclusively on the program for any reason.
 #
-# IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
-# DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
-# USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
-# AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+# AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+# OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+# AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+# DAMAGE.
 #
-# THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
-# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
-# STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
-# OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+# THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+# DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+# STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+# AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+# THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
 # ENHANCEMENTS, OR MODIFICATIONS.
 #
 # Please send all BUG REPORTS to <pavel@cs.unc.edu>.
diff --git a/Core/src/CompressedImage.cpp b/Core/src/CompressedImage.cpp
index 42116c9..499d3ec 100644
--- a/Core/src/CompressedImage.cpp
+++ b/Core/src/CompressedImage.cpp
@@ -1,30 +1,39 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+ * Copyright (c) 2014 University of North Carolina at Chapel Hill.
+ * All rights reserved.
  *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational, 
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
- * in all copies.
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
  *
- * Permission to incorporate this software into commercial products may be obtained by contacting the 
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
  *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
- * The software program and documentation are supplied "as is," without any accompanying services from the 
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
- * end-user understands that the program was developed for research purposes and is advised not to rely 
- * exclusively on the program for any reason.
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
  *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
  * ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
@@ -51,7 +60,7 @@
 #include "Pixel.h"
 
 #include "TexCompTypes.h"
-#include "BC7Compressor.h"
+#include "BPTCCompressor.h"
 #include "PVRTCCompressor.h"
 #include "DXTCompressor.h"
 #include "ETCCompressor.h"
@@ -135,7 +144,7 @@ bool CompressedImage::DecompressImage(unsigned char *outBuf, unsigned int outBuf
 
     case FasTC::eCompressionFormat_BPTC: 
     { 
-      BC7C::Decompress(dj);
+      BPTCC::Decompress(dj);
     }
     break;
 
diff --git a/Core/src/TexComp.cpp b/Core/src/TexComp.cpp
index 96f5158..0353cfd 100644
--- a/Core/src/TexComp.cpp
+++ b/Core/src/TexComp.cpp
@@ -52,7 +52,7 @@
 
 #include "ETCCompressor.h"
 #include "DXTCompressor.h"
-#include "BC7Compressor.h"
+#include "BPTCCompressor.h"
 #include "CompressionFuncs.h"
 #include "Image.h"
 #include "ImageFile.h"
@@ -104,12 +104,12 @@ static  CompressionFuncWithStats ChooseFuncFromSettingsWithStats(const SCompress
 
     case FasTC::eCompressionFormat_BPTC:
     {
-#ifdef FOUND_NVTT_BC7_EXPORT
+#ifdef FOUND_NVTT_BPTC_EXPORT
       if(s.bUseNVTT)
-       return BC7C::CompressNVTTWithStats;
+       return BPTCC::CompressNVTTWithStats;
       else
 #endif
-       return BC7C::CompressWithStats;
+       return BPTCC::CompressWithStats;
     }
     break;
     
@@ -135,19 +135,19 @@ static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) {
   switch(s.format) {
     case FasTC::eCompressionFormat_BPTC:
     {
-      BC7C::SetQualityLevel(s.iQuality);
+      BPTCC::SetQualityLevel(s.iQuality);
 #ifdef HAS_SSE_41
       if(s.bUseSIMD) {
-        return BC7C::CompressImageBC7SIMD;
+        return BPTCC::CompressImageBPTCSIMD;
       }
 #endif
 
-#ifdef FOUND_NVTT_BC7_EXPORT
+#ifdef FOUND_NVTT_BPTC_EXPORT
       if(s.bUseNVTT)
-       return BC7C::CompressNVTT;
+       return BPTCC::CompressNVTT;
       else
 #endif
-       return BC7C::Compress;
+       return BPTCC::Compress;
     }
     break;
 
@@ -232,8 +232,8 @@ class AtomicThreadUnit : public TCCallable {
   virtual ~AtomicThreadUnit() { }
   virtual void operator()() {
     m_Barrier->Wait();
-    if(m_CmpFnc == BC7C::Compress) {
-      BC7C::CompressAtomic(m_CompressionJobList);
+    if(m_CmpFnc == BPTCC::Compress) {
+      BPTCC::CompressAtomic(m_CompressionJobList);
     }
     else {
       assert(!"I don't know what we're compressing...");
diff --git a/Core/src/ThreadGroup.cpp b/Core/src/ThreadGroup.cpp
index c74ed6c..4b90a46 100644
--- a/Core/src/ThreadGroup.cpp
+++ b/Core/src/ThreadGroup.cpp
@@ -42,7 +42,7 @@
  */
 
 #include "ThreadGroup.h"
-#include "BC7Compressor.h"
+#include "BPTCCompressor.h"
 
 #include <cstdlib>
 #include <cstdio>
diff --git a/Core/src/WorkerQueue.cpp b/Core/src/WorkerQueue.cpp
index e2df7af..ceafffb 100644
--- a/Core/src/WorkerQueue.cpp
+++ b/Core/src/WorkerQueue.cpp
@@ -49,7 +49,7 @@
 #include <cassert>
 #include <iostream>
 
-#include "BC7Compressor.h"
+#include "BPTCCompressor.h"
 
 using FasTC::CompressionJob;
 

From ea953979fe78a5e5430958195ef5d4b6b2e3aff4 Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Tue, 21 Jan 2014 15:04:39 -0500
Subject: [PATCH 2/3] Move bitstream to FasTC base lib

---
 BPTCEncoder/CMakeLists.txt                    |  1 -
 BPTCEncoder/src/CompressionMode.h             |  6 +-
 BPTCEncoder/src/Compressor.cpp                |  3 +
 Base/CMakeLists.txt                           |  1 +
 {BPTCEncoder/src => Base/include}/BitStream.h | 85 +++++++++++--------
 5 files changed, 58 insertions(+), 38 deletions(-)
 rename {BPTCEncoder/src => Base/include}/BitStream.h (54%)

diff --git a/BPTCEncoder/CMakeLists.txt b/BPTCEncoder/CMakeLists.txt
index 6581238..03c3758 100644
--- a/BPTCEncoder/CMakeLists.txt
+++ b/BPTCEncoder/CMakeLists.txt
@@ -106,7 +106,6 @@ SET( HEADERS
   config/BPTCConfig.h.in
   include/BPTCCompressor.h
   src/CompressionMode.h
-  src/BitStream.h
   src/RGBAEndpoints.h
   src/ParallelStage.h
 )
diff --git a/BPTCEncoder/src/CompressionMode.h b/BPTCEncoder/src/CompressionMode.h
index 6eda945..e38a88e 100755
--- a/BPTCEncoder/src/CompressionMode.h
+++ b/BPTCEncoder/src/CompressionMode.h
@@ -78,7 +78,9 @@
 
 #include "RGBAEndpoints.h"
 
-class BitStream;
+namespace FasTC {
+  class BitStream;
+}  // namespace FasTC
 
 namespace BPTCC {
 
@@ -113,7 +115,7 @@ class CompressionMode {
 
   // This function compresses a group of clusters into the passed bitstream. The
   // size of the clusters array is determined by the BC7 compression mode.
-  double Compress(BitStream &stream,
+  double Compress(FasTC::BitStream &stream,
                   const int shapeIdx, const RGBACluster *clusters);
 
   // This switch controls the quality of the simulated annealing optimizer. We
diff --git a/BPTCEncoder/src/Compressor.cpp b/BPTCEncoder/src/Compressor.cpp
index de3ec0d..f85f69c 100755
--- a/BPTCEncoder/src/Compressor.cpp
+++ b/BPTCEncoder/src/Compressor.cpp
@@ -79,7 +79,10 @@
 #include "TexCompTypes.h"
 #include "BCLookupTables.h"
 #include "RGBAEndpoints.h"
+
 #include "BitStream.h"
+using FasTC::BitStream;
+using FasTC::BitStreamReadOnly;
 
 #ifdef HAS_MSVC_ATOMICS
 #   include "Windows.h"
diff --git a/Base/CMakeLists.txt b/Base/CMakeLists.txt
index bee31e9..4db3c82 100644
--- a/Base/CMakeLists.txt
+++ b/Base/CMakeLists.txt
@@ -58,6 +58,7 @@ SET( SOURCES
 )
 
 SET( HEADERS
+  "include/BitStream.h"
   "include/TexCompTypes.h"
   "include/Image.h"
   "include/Color.h"
diff --git a/BPTCEncoder/src/BitStream.h b/Base/include/BitStream.h
similarity index 54%
rename from BPTCEncoder/src/BitStream.h
rename to Base/include/BitStream.h
index 3f30d6d..8fb6f9a 100755
--- a/BPTCEncoder/src/BitStream.h
+++ b/Base/include/BitStream.h
@@ -1,30 +1,39 @@
 /* FasTC
- * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved.
+ * Copyright (c) 2012 University of North Carolina at Chapel Hill.
+ * All rights reserved.
  *
- * Permission to use, copy, modify, and distribute this software and its documentation for educational, 
- * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, 
- * provided that the above copyright notice, this paragraph, and the following four paragraphs appear 
- * in all copies.
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for educational, research, and non-profit purposes, without
+ * fee, and without a written agreement is hereby granted, provided that the
+ * above copyright notice, this paragraph, and the following four paragraphs
+ * appear in all copies.
  *
- * Permission to incorporate this software into commercial products may be obtained by contacting the 
- * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill <otd@unc.edu>.
+ * Permission to incorporate this software into commercial products may be
+ * obtained by contacting the authors or the Office of Technology Development
+ * at the University of North Carolina at Chapel Hill <otd@unc.edu>.
  *
- * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. 
- * The software program and documentation are supplied "as is," without any accompanying services from the 
- * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill 
- * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The 
- * end-user understands that the program was developed for research purposes and is advised not to rely 
- * exclusively on the program for any reason.
+ * This software program and documentation are copyrighted by the University of
+ * North Carolina at Chapel Hill. The software program and documentation are
+ * supplied "as is," without any accompanying services from the University of
+ * North Carolina at Chapel Hill or the authors. The University of North
+ * Carolina at Chapel Hill and the authors do not warrant that the operation of
+ * the program will be uninterrupted or error-free. The end-user understands
+ * that the program was developed for research purposes and is advised not to
+ * rely exclusively on the program for any reason.
  *
- * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR 
- * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE 
- * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE 
- * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE
+ * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA
+ * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  *
- * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, 
- * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
- * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY 
- * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
+ * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY
+ * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY 
+ * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON
+ * AN "AS IS" BASIS, AND THE UNIVERSITY  OF NORTH CAROLINA AT CHAPEL HILL AND
+ * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 
  * ENHANCEMENTS, OR MODIFICATIONS.
  *
  * Please send all BUG REPORTS to <pavel@cs.unc.edu>.
@@ -46,25 +55,28 @@
 //
 // This code has been modified significantly from the original.
 
-//--------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 // Copyright 2011 Intel Corporation
 // All Rights Reserved
 //
-// Permission is granted to use, copy, distribute and prepare derivative works of this
-// software for any purpose and without fee, provided, that the above copyright notice
-// and this statement appear in all copies.  Intel makes no representations about the
-// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
-// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
-// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
-// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
-// assume any responsibility for any errors which may appear in this software nor any
+// Permission is granted to use, copy, distribute and prepare derivative works
+// of this software for any purpose and without fee, provided, that the above
+// copyright notice and this statement appear in all copies.  Intel makes no
+// representations about the suitability of this software for any purpose.  THIS
+// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES,
+// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER
+// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR
+// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not assume
+// any responsibility for any errors which may appear in this software nor any
 // responsibility to update it.
 //
-//--------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 
-#ifndef __BITSTREAM_H__
-#define __BITSTREAM_H__
+#ifndef __BASE_INCLUDE_BITSTREAM_H__
+#define __BASE_INCLUDE_BITSTREAM_H__
+
+namespace FasTC {
 
 class BitStream {
  public:
@@ -158,4 +170,7 @@ class BitStreamReadOnly {
   const unsigned char *m_CurByte;
   int m_NextBit;
 };
-#endif //__BITSTREAM_H__
+
+}  // namespace FasTC
+
+#endif //__BASE_INCLUDE_BITSTREAM_H__

From c37dca1068e019aa975c0fae0fa8a0f8525e2226 Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Tue, 21 Jan 2014 16:23:18 -0500
Subject: [PATCH 3/3] Split calculation of compression parameters from packing
 them.

---
 BPTCEncoder/src/CompressionMode.h |  40 ++++-
 BPTCEncoder/src/Compressor.cpp    | 249 +++++++++++++++---------------
 BPTCEncoder/src/RGBAEndpoints.cpp |   4 +-
 BPTCEncoder/src/RGBAEndpoints.h   |   8 +-
 4 files changed, 166 insertions(+), 135 deletions(-)

diff --git a/BPTCEncoder/src/CompressionMode.h b/BPTCEncoder/src/CompressionMode.h
index e38a88e..8b6f9fe 100755
--- a/BPTCEncoder/src/CompressionMode.h
+++ b/BPTCEncoder/src/CompressionMode.h
@@ -113,6 +113,30 @@ class CompressionMode {
   { }
   ~CompressionMode() { }
 
+  // These are all of the parameters required to define the data in a compressed
+  // BPTC block. The mode determines how these parameters will be translated
+  // into actual bits.
+  struct Params {
+    const uint16 m_ShapeIdx;
+    RGBAVector m_P1[kMaxNumSubsets], m_P2[kMaxNumSubsets];
+    uint8 m_Indices[kMaxNumSubsets][kMaxNumDataPoints];
+    uint8 m_AlphaIndices[kMaxNumDataPoints];
+    uint8 m_PbitCombo[kMaxNumSubsets];
+    int8 m_RotationMode, m_IndexMode;
+    explicit Params(uint32 shape)
+      : m_RotationMode(-1), m_IndexMode(-1), m_ShapeIdx(shape) {
+      memset(m_Indices, 0xFF, sizeof(m_Indices));
+      memset(m_AlphaIndices, 0xFF, sizeof(m_AlphaIndices));
+      memset(m_PbitCombo, 0xFF, sizeof(m_PbitCombo));
+    }
+  };
+
+  // This outputs the parameters to the given bitstream based on the current
+  // compression mode. The first argument is not const because the mode and
+  // the value of the first index determines whether or not the indices need to
+  // be swapped. The final output bits will always be a valid BPTC block. 
+  void Pack(Params &params, FasTC::BitStream &stream) const;
+
   // This function compresses a group of clusters into the passed bitstream. The
   // size of the clusters array is determined by the BC7 compression mode.
   double Compress(FasTC::BitStream &stream,
@@ -176,7 +200,7 @@ class CompressionMode {
   }
   int GetNumberOfSubsets() const { return m_Attributes->numSubsets; }
 
-  int GetNumberOfBitsPerIndex(int indexMode = -1) const {
+  int GetNumberOfBitsPerIndex(int8 indexMode = -1) const {
     if(indexMode < 0) indexMode = m_IndexMode;
     if(indexMode == 0)
       return m_Attributes->numBitsPerIndex;
@@ -184,7 +208,7 @@ class CompressionMode {
       return m_Attributes->numBitsPerAlpha;
   }
 
-  int GetNumberOfBitsPerAlpha(int indexMode = -1) const {
+  int GetNumberOfBitsPerAlpha(int8 indexMode = -1) const {
     if(indexMode < 0) indexMode = m_IndexMode;
     if(indexMode == 0)
       return m_Attributes->numBitsPerAlpha;
@@ -261,8 +285,8 @@ class CompressionMode {
   double OptimizeEndpointsForCluster(
     const RGBACluster &cluster,
     RGBAVector &p1, RGBAVector &p2,
-    int *bestIndices,
-    int &bestPbitCombo
+    uint8 *bestIndices,
+    uint8 &bestPbitCombo
   ) const;
 
   // This function performs the heuristic to choose the "best" neighboring
@@ -290,26 +314,26 @@ class CompressionMode {
   // then we choose the best p-bit combo and return it as well.
   double CompressSingleColor(const RGBAVector &p,
                              RGBAVector &p1, RGBAVector &p2,
-                             int &bestPbitCombo) const;
+                             uint8 &bestPbitCombo) const;
 
   // Compress the cluster using a generalized cluster fit. This figures out the
   // proper endpoints assuming that we have no alpha.
   double CompressCluster(const RGBACluster &cluster,
                          RGBAVector &p1, RGBAVector &p2,
-                         int *bestIndices, int &bestPbitCombo) const;
+                         uint8 *bestIndices, uint8 &bestPbitCombo) const;
 
   // Compress the non-opaque cluster using a generalized cluster fit, and place
   // the endpoints within p1 and p2. The color indices and alpha indices are
   // computed as well.
   double CompressCluster(const RGBACluster &cluster,
                          RGBAVector &p1, RGBAVector &p2,
-                         int *bestIndices, int *alphaIndices) const;
+                         uint8 *bestIndices, uint8 *alphaIndices) const;
 
   // This function takes two endpoints in the continuous domain (as floats) and
   // clamps them to the nearest grid points based on the compression mode (and
   // possible pbit values)
   void ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2,
-                            int &bestPBitCombo) const;
+                            uint8 &bestPBitCombo) const;
 };
 
 extern const uint32 kInterpolationValues[4][16][2];
diff --git a/BPTCEncoder/src/Compressor.cpp b/BPTCEncoder/src/Compressor.cpp
index f85f69c..1c77c4d 100755
--- a/BPTCEncoder/src/Compressor.cpp
+++ b/BPTCEncoder/src/Compressor.cpp
@@ -356,7 +356,7 @@ CompressionMode::kModeAttributes[kNumModes] = {
 };
 
 void CompressionMode::ClampEndpointsToGrid(
-  RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo
+  RGBAVector &p1, RGBAVector &p2, uint8 &bestPBitCombo
 ) const {
   const int nPbitCombos = GetNumPbitCombos();
   const bool hasPbits = nPbitCombos > 1;
@@ -397,11 +397,10 @@ void CompressionMode::ClampEndpointsToGrid(
 
 double CompressionMode::CompressSingleColor(
   const RGBAVector &p, RGBAVector &p1, RGBAVector &p2,
-  int &bestPbitCombo
+  uint8 &bestPbitCombo
 ) const {
   const uint32 pixel = p.ToPixel();
   float bestError = FLT_MAX;
-  bestPbitCombo = -1;
 
   for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) {
     const int *pbitCombo = GetPBitCombo(pbi);
@@ -453,7 +452,7 @@ double CompressionMode::CompressSingleColor(
         possValsL[i] |= (possValsL[i] >> nBits);
       }
 
-      const uint32 bpi = GetNumberOfBitsPerIndex() - 1;
+      const uint8 bpi = GetNumberOfBitsPerIndex() - 1;
       const uint32 interpVal0 = kInterpolationValues[bpi][1][0];
       const uint32 interpVal1 = kInterpolationValues[bpi][1][1];
 
@@ -685,8 +684,8 @@ bool CompressionMode::AcceptNewEndpointError(
 double CompressionMode::OptimizeEndpointsForCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int &bestPbitCombo
+  uint8 *bestIndices,
+  uint8 &bestPbitCombo
 ) const {
 
   const uint32 nBuckets = (1 << GetNumberOfBitsPerIndex());
@@ -731,7 +730,7 @@ double CompressionMode::OptimizeEndpointsForCluster(
 
     float temp = static_cast<float>(energy) / static_cast<float>(maxEnergy-1);
 
-    int indices[kMaxNumDataPoints];
+    uint8 indices[kMaxNumDataPoints];
     RGBAVector np1, np2;
     int nPbitCombo = 0;
 
@@ -779,8 +778,8 @@ double CompressionMode::OptimizeEndpointsForCluster(
 double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int *alphaIndices
+  uint8 *bestIndices,
+  uint8 *alphaIndices
 ) const {
   assert(GetModeNumber() == 4 || GetModeNumber() == 5);
   assert(GetNumberOfSubsets() == 1);
@@ -796,7 +795,7 @@ double CompressionMode::CompressCluster(
             "detected much earlier.");
 
     const RGBAVector &p = cluster.GetPoint(0);
-    int dummyPbit = 0;
+    uint8 dummyPbit = 0;
     double bestErr = CompressSingleColor(p, p1, p2, dummyPbit);
 
     // We're assuming all indices will be index 1...
@@ -843,7 +842,7 @@ double CompressionMode::CompressCluster(
     rgbCluster.AddPoint(v);
   }
 
-  int dummyPbit = 0;
+  uint8 dummyPbit = 0;
   RGBAVector rgbp1, rgbp2;
   double rgbError = CompressCluster(
     rgbCluster, rgbp1, rgbp2, bestIndices, dummyPbit
@@ -1070,8 +1069,8 @@ double CompressionMode::CompressCluster(
 double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int &bestPbitCombo
+  uint8 *bestIndices,
+  uint8 &bestPbitCombo
 ) const {
   // If all the points are the same in the cluster, then we need to figure out
   // what the best approximation to this point is....
@@ -1233,7 +1232,7 @@ double CompressionMode::CompressCluster(
   ClampEndpointsToGrid(p1, p2, bestPbitCombo);
 
   #ifdef _DEBUG
-    int pBitCombo = bestPbitCombo;
+    uint8 pBitCombo = bestPbitCombo;
     RGBAVector tp1 = p1, tp2 = p2;
     ClampEndpointsToGrid(tp1, tp2, pBitCombo);
 
@@ -1249,99 +1248,29 @@ double CompressionMode::CompressCluster(
   );
 }
 
-double CompressionMode::Compress(
-  BitStream &stream, const int shapeIdx, const RGBACluster *clusters
-) {
-
+void CompressionMode::Pack(Params &params, BitStream &stream) const {
+  
   const int kModeNumber = GetModeNumber();
   const int nPartitionBits = GetNumberOfPartitionBits();
   const int nSubsets = GetNumberOfSubsets();
 
+  
   // Mode #
   stream.WriteBits(1 << kModeNumber, kModeNumber + 1);
 
   // Partition #
-  assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx);
-  stream.WriteBits(shapeIdx, nPartitionBits);
+  assert((((1 << nPartitionBits) - 1) & params.m_ShapeIdx) == params.m_ShapeIdx);
+  stream.WriteBits(params.m_ShapeIdx, nPartitionBits);
 
-  RGBAVector p1[kMaxNumSubsets], p2[kMaxNumSubsets];
-
-  int bestIndices[kMaxNumSubsets][kMaxNumDataPoints];
-  memset(bestIndices, 0xFF, sizeof(bestIndices));
-
-  int bestAlphaIndices[kMaxNumDataPoints];
-  memset(bestAlphaIndices, 0xFF, sizeof(bestAlphaIndices));
-
-  int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 };
-  int bestRotationMode = -1, bestIndexMode = -1;
-
-  double totalErr = 0.0;
-  for(int cidx = 0; cidx < nSubsets; cidx++) {
-    int indices[kMaxNumDataPoints] = {0};
-
-    if(m_Attributes->hasRotation) {
-
-      assert(nSubsets == 1);
-
-      int alphaIndices[kMaxNumDataPoints];
-
-      double bestError = DBL_MAX;
-      for(int rotMode = 0; rotMode < 4; rotMode++) {
-
-        SetRotationMode(rotMode);
-        const int nIdxModes = kModeNumber == 4? 2 : 1;
-
-        for(int idxMode = 0; idxMode < nIdxModes; idxMode++) {
-
-          SetIndexMode(idxMode);
-
-          RGBAVector v1, v2;
-          double error = CompressCluster(
-            clusters[cidx], v1, v2, indices, alphaIndices
-          );
-
-          if(error < bestError) {
-            bestError = error;
-
-            memcpy(bestIndices[cidx], indices, sizeof(indices));
-            memcpy(bestAlphaIndices, alphaIndices, sizeof(alphaIndices));
-
-            bestRotationMode = rotMode;
-            bestIndexMode = idxMode;
-
-            p1[cidx] = v1;
-            p2[cidx] = v2;
-          }
-        }
-      }
-
-      totalErr += bestError;
-    } else {  // ! m_Attributes->hasRotation
-      // Compress this cluster
-      totalErr += CompressCluster(
-        clusters[cidx], p1[cidx], p2[cidx], indices, bestPbitCombo[cidx]
-      );
-
-      // Map the indices to their proper position.
-      int idx = 0;
-      for(int i = 0; i < 16; i++) {
-        int subs = GetSubsetForIndex(i, shapeIdx, GetNumberOfSubsets());
-        if(subs == cidx) {
-          bestIndices[cidx][i] = indices[idx++];
-        }
-      }
-    }
-  }
-
-  stream.WriteBits(bestRotationMode, m_Attributes->hasRotation? 2 : 0);
-  stream.WriteBits(bestIndexMode, m_Attributes->hasIdxMode? 1 : 0);
+  stream.WriteBits(params.m_RotationMode, m_Attributes->hasRotation? 2 : 0);
+  stream.WriteBits(params.m_IndexMode, m_Attributes->hasIdxMode? 1 : 0);
 
 #ifdef _DEBUG
   for(int i = 0; i < kMaxNumDataPoints; i++) {
 
     int nSet = 0;
     for(int j = 0; j < nSubsets; j++) {
-      if(bestIndices[j][i] >= 0)
+      if(params.m_Indices[j][i] < 255)
         nSet++;
     }
 
@@ -1358,14 +1287,14 @@ double CompressionMode::Compress(
     switch(GetPBitType()) {
       default:
       case ePBitType_None:
-        pixel1[i] = p1[i].ToPixel(qmask);
-        pixel2[i] = p2[i].ToPixel(qmask);
+        pixel1[i] = params.m_P1[i].ToPixel(qmask);
+        pixel2[i] = params.m_P2[i].ToPixel(qmask);
       break;
 
       case ePBitType_Shared:
       case ePBitType_NotShared:
-        pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]);
-        pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]);
+        pixel1[i] = params.m_P1[i].ToPixel(qmask, GetPBitCombo(params.m_PbitCombo[i])[0]);
+        pixel2[i] = params.m_P2[i].ToPixel(qmask, GetPBitCombo(params.m_PbitCombo[i])[1]);
       break;
     }
   }
@@ -1374,28 +1303,28 @@ double CompressionMode::Compress(
   // we need to swap EVERYTHING.
   for(int sidx = 0; sidx < nSubsets; sidx++) {
 
-    int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx, nSubsets);
-    assert(bestIndices[sidx][anchorIdx] != -1);
+    int anchorIdx = GetAnchorIndexForSubset(sidx, params.m_ShapeIdx, nSubsets);
+    assert(params.m_Indices[sidx][anchorIdx] != 255);
 
-    const int nAlphaIndexBits = GetNumberOfBitsPerAlpha(bestIndexMode);
-    const int nIndexBits = GetNumberOfBitsPerIndex(bestIndexMode);
-    if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) {
-      uint32 t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t;
+    const int nAlphaIndexBits = GetNumberOfBitsPerAlpha(params.m_IndexMode);
+    const int nIndexBits = GetNumberOfBitsPerIndex(params.m_IndexMode);
+    if(params.m_Indices[sidx][anchorIdx] >> (nIndexBits - 1)) {
+      std::swap(pixel1[sidx], pixel2[sidx]);
 
       int nIndexVals = 1 << nIndexBits;
       for(int i = 0; i < 16; i++) {
-        bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i];
+        params.m_Indices[sidx][i] = (nIndexVals - 1) - params.m_Indices[sidx][i];
       }
 
       int nAlphaIndexVals = 1 << nAlphaIndexBits;
       if(m_Attributes->hasRotation) {
         for(int i = 0; i < 16; i++) {
-          bestAlphaIndices[i] = (nAlphaIndexVals - 1) - bestAlphaIndices[i];
+          params.m_AlphaIndices[i] = (nAlphaIndexVals - 1) - params.m_AlphaIndices[i];
         }
       }
     }
 
-    const bool rotated = (bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)) > 0;
+    const bool rotated = (params.m_AlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)) > 0;
     if(m_Attributes->hasRotation && rotated) {
       uint8 * bp1 = reinterpret_cast<uint8 *>(&pixel1[sidx]);
       uint8 * bp2 = reinterpret_cast<uint8 *>(&pixel2[sidx]);
@@ -1403,13 +1332,13 @@ double CompressionMode::Compress(
 
       int nAlphaIndexVals = 1 << nAlphaIndexBits;
       for(int i = 0; i < 16; i++) {
-        bestAlphaIndices[i] = (nAlphaIndexVals - 1) - bestAlphaIndices[i];
+        params.m_AlphaIndices[i] = (nAlphaIndexVals - 1) - params.m_AlphaIndices[i];
       }
     }
 
-    assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)));
+    assert(!(params.m_Indices[sidx][anchorIdx] >> (nIndexBits - 1)));
     assert(!m_Attributes->hasRotation ||
-           !(bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)));
+           !(params.m_AlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)));
   }
 
   // Get the quantized values...
@@ -1459,7 +1388,7 @@ double CompressionMode::Compress(
   // Write out the best pbits..
   if(GetPBitType() != ePBitType_None) {
     for(int s = 0; s < nSubsets; s++) {
-      const int *pbits = GetPBitCombo(bestPbitCombo[s]);
+      const int *pbits = GetPBitCombo(params.m_PbitCombo[s]);
       stream.WriteBits(pbits[0], 1);
       if(GetPBitType() != ePBitType_Shared)
         stream.WriteBits(pbits[1], 1);
@@ -1468,14 +1397,14 @@ double CompressionMode::Compress(
 
   // If our index mode has changed, then we need to write the alpha indices
   // first.
-  if(m_Attributes->hasIdxMode && bestIndexMode == 1) {
+  if(m_Attributes->hasIdxMode && params.m_IndexMode == 1) {
 
     assert(m_Attributes->hasRotation);
 
     for(int i = 0; i < 16; i++) {
-      const int idx = bestAlphaIndices[i];
-      assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0);
-      assert(GetNumberOfBitsPerAlpha(bestIndexMode) == 2);
+      const int idx = params.m_AlphaIndices[i];
+      assert(GetAnchorIndexForSubset(0, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetNumberOfBitsPerAlpha(params.m_IndexMode) == 2);
       assert(idx >= 0 && idx < (1 << 2));
       assert(i != 0 ||
              !(idx >> 1) ||
@@ -1484,10 +1413,10 @@ double CompressionMode::Compress(
     }
 
     for(int i = 0; i < 16; i++) {
-      const int idx = bestIndices[0][i];
-      assert(GetSubsetForIndex(i, shapeIdx, nSubsets) == 0);
-      assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0);
-      assert(GetNumberOfBitsPerIndex(bestIndexMode) == 3);
+      const int idx = params.m_Indices[0][i];
+      assert(GetSubsetForIndex(i, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetAnchorIndexForSubset(0, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetNumberOfBitsPerIndex(params.m_IndexMode) == 3);
       assert(idx >= 0 && idx < (1 << 3));
       assert(i != 0 ||
              !(idx >> 2) ||
@@ -1496,10 +1425,10 @@ double CompressionMode::Compress(
     }
   } else {
     for(int i = 0; i < 16; i++) {
-      const int subs = GetSubsetForIndex(i, shapeIdx, nSubsets);
-      const int idx = bestIndices[subs][i];
-      const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx, nSubsets);
-      const int nBitsForIdx = GetNumberOfBitsPerIndex(bestIndexMode);
+      const int subs = GetSubsetForIndex(i, params.m_ShapeIdx, nSubsets);
+      const int idx = params.m_Indices[subs][i];
+      const int anchorIdx = GetAnchorIndexForSubset(subs, params.m_ShapeIdx, nSubsets);
+      const int nBitsForIdx = GetNumberOfBitsPerIndex(params.m_IndexMode);
       assert(idx >= 0 && idx < (1 << nBitsForIdx));
       assert(i != anchorIdx ||
              !(idx >> (nBitsForIdx - 1)) ||
@@ -1509,9 +1438,9 @@ double CompressionMode::Compress(
 
     if(m_Attributes->hasRotation) {
       for(int i = 0; i < 16; i++) {
-        const int idx = bestAlphaIndices[i];
+        const int idx = params.m_AlphaIndices[i];
         const int anchorIdx = 0;
-        const int nBitsForIdx = GetNumberOfBitsPerAlpha(bestIndexMode);
+        const int nBitsForIdx = GetNumberOfBitsPerAlpha(params.m_IndexMode);
         assert(idx >= 0 && idx < (1 << nBitsForIdx));
         assert(i != anchorIdx ||
                !(idx >> (nBitsForIdx - 1)) ||
@@ -1521,6 +1450,80 @@ double CompressionMode::Compress(
     }
   }
   assert(stream.GetBitsWritten() == 128);
+}
+
+double CompressionMode::Compress(
+  BitStream &stream, const int shapeIdx, const RGBACluster *clusters
+) {
+
+  const int kModeNumber = GetModeNumber();
+  const int nPartitionBits = GetNumberOfPartitionBits();
+  const int nSubsets = GetNumberOfSubsets();
+
+  Params params(shapeIdx);
+
+  double totalErr = 0.0;
+  for(int cidx = 0; cidx < nSubsets; cidx++) {
+    uint8 indices[kMaxNumDataPoints] = {0};
+
+    if(m_Attributes->hasRotation) {
+
+      assert(nSubsets == 1);
+
+      uint8 alphaIndices[kMaxNumDataPoints];
+
+      double bestError = DBL_MAX;
+      for(int rotMode = 0; rotMode < 4; rotMode++) {
+
+        SetRotationMode(rotMode);
+        const int nIdxModes = kModeNumber == 4? 2 : 1;
+
+        for(int idxMode = 0; idxMode < nIdxModes; idxMode++) {
+
+          SetIndexMode(idxMode);
+
+          RGBAVector v1, v2;
+          double error = CompressCluster(
+            clusters[cidx], v1, v2, indices, alphaIndices
+          );
+
+          if(error < bestError) {
+            bestError = error;
+
+            memcpy(params.m_Indices[cidx], indices, sizeof(indices));
+            memcpy(params.m_AlphaIndices, alphaIndices, sizeof(alphaIndices));
+
+            params.m_RotationMode = rotMode;
+            params.m_IndexMode = idxMode;
+
+            params.m_P1[cidx] = v1;
+            params.m_P2[cidx] = v2;
+          }
+        }
+      }
+
+      totalErr += bestError;
+    } else {  // ! m_Attributes->hasRotation
+      // Compress this cluster
+      totalErr += CompressCluster(
+        clusters[cidx],
+        params.m_P1[cidx], params.m_P2[cidx],
+        indices, params.m_PbitCombo[cidx]
+      );
+
+      // Map the indices to their proper position.
+      int idx = 0;
+      for(int i = 0; i < 16; i++) {
+        int subs = GetSubsetForIndex(i, shapeIdx, GetNumberOfSubsets());
+        if(subs == cidx) {
+          params.m_Indices[cidx][i] = indices[idx++];
+        }
+      }
+    }
+  }
+
+  Pack(params, stream);
+  assert(stream.GetBitsWritten() == 128);
   return totalErr;
 }
 
diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp
index 48ca587..2ab07cf 100755
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@@ -419,7 +419,7 @@ uint32 RGBACluster::GetPowerMethodIterations() {
 double RGBACluster::QuantizedError(
   const RGBAVector &p1, const RGBAVector &p2,
   uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
-  const int pbits[2], int *indices
+  const int pbits[2], uint8 *indices
 ) const {
 
   // nBuckets should be a power of two.
@@ -457,7 +457,7 @@ double RGBACluster::QuantizedError(
     const uint8 *pb = (const uint8 *)(&pixel);
 
     float minError = FLT_MAX;
-    int bestBucket = -1;
+    uint8 bestBucket = 0;
     for(int j = 0; j < nBuckets; j++) {
 
       uint32 interp0 = (*interpVals)[j][0];
diff --git a/BPTCEncoder/src/RGBAEndpoints.h b/BPTCEncoder/src/RGBAEndpoints.h
index 418c0d6..b01e0ab 100755
--- a/BPTCEncoder/src/RGBAEndpoints.h
+++ b/BPTCEncoder/src/RGBAEndpoints.h
@@ -387,8 +387,12 @@ public:
     Min = m_Min, Max = m_Max;
   }
 
-  // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
-  double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
+  // Returns the error if we were to quantize the colors right now with the
+  // given number of buckets and bit mask.
+  double QuantizedError(
+    const RGBAVector &p1, const RGBAVector &p2,
+    uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+    const int pbits[2] = NULL, uint8 *indices = NULL) const;
 
   // Returns the principal axis for this point cluster.
   double GetPrincipalEigenvalue();