diff --git a/BPTCEncoder/include/BC7Compressor.h b/BPTCEncoder/include/BC7Compressor.h index f0a0924..e216f07 100755 --- a/BPTCEncoder/include/BC7Compressor.h +++ b/BPTCEncoder/include/BC7Compressor.h @@ -1,30 +1,39 @@ /* FasTC - * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved. + * Copyright (c) 2012 University of North Carolina at Chapel Hill. + * All rights reserved. * - * Permission to use, copy, modify, and distribute this software and its documentation for educational, - * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph, and the following four paragraphs appear - * in all copies. + * Permission to use, copy, modify, and distribute this software and its + * documentation for educational, research, and non-profit purposes, without + * fee, and without a written agreement is hereby granted, provided that the + * above copyright notice, this paragraph, and the following four paragraphs + * appear in all copies. * - * Permission to incorporate this software into commercial products may be obtained by contacting the - * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill . + * Permission to incorporate this software into commercial products may be + * obtained by contacting the authors or the Office of Technology Development + * at the University of North Carolina at Chapel Hill . * - * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. - * The software program and documentation are supplied "as is," without any accompanying services from the - * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill - * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The - * end-user understands that the program was developed for research purposes and is advised not to rely - * exclusively on the program for any reason. + * This software program and documentation are copyrighted by the University of + * North Carolina at Chapel Hill. The software program and documentation are + * supplied "as is," without any accompanying services from the University of + * North Carolina at Chapel Hill or the authors. The University of North + * Carolina at Chapel Hill and the authors do not warrant that the operation of + * the program will be uninterrupted or error-free. The end-user understands + * that the program was developed for research purposes and is advised not to + * rely exclusively on the program for any reason. * - * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE - * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE - * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE + * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA + * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. * - * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY - * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY - * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, + * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY + * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY + * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON + * AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND + * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, * ENHANCEMENTS, OR MODIFICATIONS. * * Please send all BUG REPORTS to . @@ -46,83 +55,92 @@ // // This code has been modified significantly from the original. -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Copyright 2011 Intel Corporation // All Rights Reserved // -// Permission is granted to use, copy, distribute and prepare derivative works of this -// software for any purpose and without fee, provided, that the above copyright notice -// and this statement appear in all copies. Intel makes no representations about the -// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." -// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, -// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, -// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not -// assume any responsibility for any errors which may appear in this software nor any +// Permission is granted to use, copy, distribute and prepare derivative works +// of this software for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. Intel makes no +// representations about the suitability of this software for any purpose. THIS +// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, +// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER +// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR +// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not assume +// any responsibility for any errors which may appear in this software nor any // responsibility to update it. // -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ + +#ifndef BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_ +#define BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_ #include "BC7Config.h" #include "CompressionJob.h" class BlockStatManager; -namespace BC7C -{ +namespace BC7C { // This is the error metric that is applied to our error measurement algorithm - // in order to bias calculation towards results that are more in-line with - // how the Human Visual System works. Uniform error means that each color - // channel is treated equally. For a while, the widely accepted non-uniform metric - // has been to give red 30%, green 59% and blue 11% weight when computing the error - // between two pixels. - enum ErrorMetric - { - eErrorMetric_Uniform, // Treats r, g, and b channels equally - eErrorMetric_Nonuniform, // { 0.3, 0.59, 0.11 } - + // in order to bias calculation towards results that are more in-line with + // how the Human Visual System works. Uniform error means that each color + // channel is treated equally. For a while, the widely accepted non-uniform + // metric has been to give red 30%, green 59% and blue 11% weight when + // computing the error between two pixels. + enum ErrorMetric { + eErrorMetric_Uniform, // Treats r, g, and b channels equally + eErrorMetric_Nonuniform, // { 0.3, 0.59, 0.11 } + kNumErrorMetrics }; // Sets the error metric to be the one specified. void SetErrorMetric(ErrorMetric e); - // Retreives a float4 pointer for the r, g, b, a weights for each color channel, in - // that order, based on the current error metric. + // Retreives a float4 pointer for the r, g, b, a weights for each color + // channel, in that order, based on the current error metric. const float *GetErrorMetric(); // Returns the enumeration for the current error metric. ErrorMetric GetErrorMetricEnum(); - // Sets the number of steps that we use to perform simulated annealing. In general, a - // larger number produces better results. The default is set to 50. This metric works - // on a logarithmic scale -- twice the value will double the compute time, but only - // decrease the error by two times a factor. + // Sets the number of steps that we use to perform simulated annealing. In + // general, a larger number produces better results. The default is set to 50. + // This metric works on a logarithmic scale -- twice the value will double the + // compute time, but only decrease the error by two times a factor. void SetQualityLevel(int q); int GetQualityLevel(); - // Compress the image given as RGBA data to BC7 format. Width and Height are the dimensions of - // the image in pixels. + // Compress the image given as RGBA data to BC7 format. Width and Height are + // the dimensions of the image in pixels. void Compress(const CompressionJob &); - // Perform a compression while recording all of the choices the compressor made into a - // list of statistics. We can use this to see whether or not certain heuristics are working, such as - // whether or not certain modes are being chosen more often than others, etc. + // Perform a compression while recording all of the choices the compressor + // made into a list of statistics. We can use this to see whether or not + // certain heuristics are working, such as whether or not certain modes are + // being chosen more often than others, etc. void CompressWithStats(const CompressionJob &, BlockStatManager &statManager); #ifdef HAS_SSE_41 - // Compress the image given as RGBA data to BC7 format using an algorithm optimized for SIMD - // enabled platforms. Width and Height are the dimensions of the image in pixels. - void CompressImageBC7SIMD(const unsigned char* inBuf, unsigned char* outBuf, unsigned int width, unsigned int height); + // Compress the image given as RGBA data to BC7 format using an algorithm + // optimized for SIMD enabled platforms. Width and Height are the dimensions + // of the image in pixels. + void CompressImageBC7SIMD(const unsigned char* inBuf, unsigned char* outBuf, + unsigned int width, unsigned int height); #endif #ifdef HAS_ATOMICS - // This is a threadsafe version of the compression function that is designed to compress a list of - // textures. If this function is called with the same argument from multiple threads, they will work - // together to compress all of the images in the list. + // This is a threadsafe version of the compression function that is designed + // to compress a list of textures. If this function is called with the same + // argument from multiple threads, they will work together to compress all of + // the images in the list. void CompressAtomic(CompressionJobList &); #endif - // Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height are the dimensions of the image in pixels. + // Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height + // are the dimensions of the image in pixels. void Decompress(const DecompressionJob &); -} +} // namespace BC7C + +#endif // BPTCENCODER_INCLUDE_BC7COMPRESSOR_H_ diff --git a/BPTCEncoder/src/BC7CompressionMode.h b/BPTCEncoder/src/BC7CompressionMode.h index a56531c..4dc6cf0 100755 --- a/BPTCEncoder/src/BC7CompressionMode.h +++ b/BPTCEncoder/src/BC7CompressionMode.h @@ -1,30 +1,39 @@ /* FasTC - * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved. + * Copyright (c) 2012 University of North Carolina at Chapel Hill. + * All rights reserved. * - * Permission to use, copy, modify, and distribute this software and its documentation for educational, - * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph, and the following four paragraphs appear - * in all copies. + * Permission to use, copy, modify, and distribute this software and its + * documentation for educational, research, and non-profit purposes, without + * fee, and without a written agreement is hereby granted, provided that the + * above copyright notice, this paragraph, and the following four paragraphs + * appear in all copies. * - * Permission to incorporate this software into commercial products may be obtained by contacting the - * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill . + * Permission to incorporate this software into commercial products may be + * obtained by contacting the authors or the Office of Technology Development + * at the University of North Carolina at Chapel Hill . * - * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. - * The software program and documentation are supplied "as is," without any accompanying services from the - * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill - * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The - * end-user understands that the program was developed for research purposes and is advised not to rely - * exclusively on the program for any reason. + * This software program and documentation are copyrighted by the University of + * North Carolina at Chapel Hill. The software program and documentation are + * supplied "as is," without any accompanying services from the University of + * North Carolina at Chapel Hill or the authors. The University of North + * Carolina at Chapel Hill and the authors do not warrant that the operation of + * the program will be uninterrupted or error-free. The end-user understands + * that the program was developed for research purposes and is advised not to + * rely exclusively on the program for any reason. * - * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE - * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE - * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE + * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA + * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. * - * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY - * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY - * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, + * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY + * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY + * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON + * AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND + * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, * ENHANCEMENTS, OR MODIFICATIONS. * * Please send all BUG REPORTS to . @@ -46,25 +55,26 @@ // // This code has been modified significantly from the original. -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Copyright 2011 Intel Corporation // All Rights Reserved // -// Permission is granted to use, copy, distribute and prepare derivative works of this -// software for any purpose and without fee, provided, that the above copyright notice -// and this statement appear in all copies. Intel makes no representations about the -// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." -// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, -// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, -// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not -// assume any responsibility for any errors which may appear in this software nor any +// Permission is granted to use, copy, distribute and prepare derivative works +// of this software for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. Intel makes no +// representations about the suitability of this software for any purpose. THIS +// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, +// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER +// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR +// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not assume +// any responsibility for any errors which may appear in this software nor any // responsibility to update it. // -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ -#ifndef __BC7_COMPRESSIONMODE_SIMD_H__ -#define __BC7_COMPRESSIONMODE_SIMD_H__ +#ifndef BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_ +#define BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_ #include "RGBAEndpoints.h" @@ -80,8 +90,6 @@ static const int kPBits[4][2] = { { 1, 1 } }; -// Abstract class that outlines all of the different settings for BC7 compression modes -// Note that at the moment, we only support modes 0-3, so we don't deal with alpha channels. class BC7CompressionMode { public: @@ -89,36 +97,40 @@ class BC7CompressionMode { static const uint32 kMaxNumSubsets = 3; static const uint32 kNumModes = 8; - // This initializes the compression variables used in order to compress a list of clusters. - // We can increase the speed a tad by specifying whether or not the block is opaque or not. - explicit BC7CompressionMode(int mode, bool opaque = true) + // This initializes the compression variables used in order to compress a list + // of clusters. We can increase the speed a tad by specifying whether or not + // the block is opaque or not. + explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque) , m_Attributes(&(kModeAttributes[mode])) , m_RotateMode(0) - , m_IndexMode(0) + , m_IndexMode(0) { } ~BC7CompressionMode() { } - // This function compresses a group of clusters into the passed bitstream. The size of the - // clusters array is determined by the BC7 compression mode. - double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters); + // This function compresses a group of clusters into the passed bitstream. The + // size of the clusters array is determined by the BC7 compression mode. + double Compress(BitStream &stream, + const int shapeIdx, const RGBACluster *clusters); - // This switch controls the quality of the simulated annealing optimizer. We will not make - // more than this many steps regardless of how bad the error is. Higher values will produce - // better quality results but will run slower. Default is 20. - static int MaxAnnealingIterations; // This is a setting - static const int kMaxAnnealingIterations = 256; // This is a limit + // This switch controls the quality of the simulated annealing optimizer. We + // will not make more than this many steps regardless of how bad the error is. + // Higher values will produce better quality results but will run slower. + // Default is 20. + static int MaxAnnealingIterations; // This is a setting + static const int kMaxAnnealingIterations = 256; // This is a limit - // P-bits are low-order bits that are shared across color channels. This enum says whether or not - // both endpoints share a p-bit or whether or not they even have a p-bit. + // P-bits are low-order bits that are shared across color channels. This enum + // says whether or not both endpoints share a p-bit or whether or not they + // even have a p-bit. enum EPBitType { ePBitType_Shared, ePBitType_NotShared, ePBitType_None }; - // These are all the per-mode attributes that can be set. They are specified in a table - // and we access them through the private m_Attributes variable. + // These are all the per-mode attributes that can be set. They are specified + // in a table and we access them through the private m_Attributes variable. static struct Attributes { int modeNumber; int numPartitionBits; @@ -139,7 +151,7 @@ class BC7CompressionMode { } private: - + const double m_IsOpaque; const Attributes *const m_Attributes; @@ -149,32 +161,36 @@ class BC7CompressionMode { void SetIndexMode(int mode) { m_IndexMode = mode; } void SetRotationMode(int mode) { m_RotateMode = mode; } - int GetRotationMode() const { return m_Attributes->hasRotation? m_RotateMode : 0; } + int GetRotationMode() const { + return m_Attributes->hasRotation? m_RotateMode : 0; + } int GetModeNumber() const { return m_Attributes->modeNumber; } - int GetNumberOfPartitionBits() const { return m_Attributes->numPartitionBits; } + int GetNumberOfPartitionBits() const { + return m_Attributes->numPartitionBits; + } int GetNumberOfSubsets() const { return m_Attributes->numSubsets; } - int GetNumberOfBitsPerIndex(int indexMode = -1) const { + int GetNumberOfBitsPerIndex(int indexMode = -1) const { if(indexMode < 0) indexMode = m_IndexMode; if(indexMode == 0) - return m_Attributes->numBitsPerIndex; + return m_Attributes->numBitsPerIndex; else - return m_Attributes->numBitsPerAlpha; + return m_Attributes->numBitsPerAlpha; } - int GetNumberOfBitsPerAlpha(int indexMode = -1) const { + int GetNumberOfBitsPerAlpha(int indexMode = -1) const { if(indexMode < 0) indexMode = m_IndexMode; if(indexMode == 0) - return m_Attributes->numBitsPerAlpha; + return m_Attributes->numBitsPerAlpha; else - return m_Attributes->numBitsPerIndex; + return m_Attributes->numBitsPerIndex; } // If we handle alpha separately, then we will consider the alpha channel // to be not used whenever we do any calculations... - int GetAlphaChannelPrecision() const { - return m_Attributes->alphaChannelPrecision; + int GetAlphaChannelPrecision() const { + return m_Attributes->alphaChannelPrecision; } // This returns the proper error metric even if we have rotation bits set @@ -192,24 +208,25 @@ class BC7CompressionMode { EPBitType GetPBitType() const { return m_Attributes->pbitType; } // This function creates an integer that represents the maximum values in each - // channel. We can use this to figure out the proper endpoint values for a given - // mode. + // channel. We can use this to figure out the proper endpoint values for a + // given mode. unsigned int GetQuantizationMask() const { const int maskSeed = 0x80000000; const uint32 alphaPrec = GetAlphaChannelPrecision(); + const uint32 cbits = m_Attributes->colorChannelPrecision - 1; + const uint32 abits = GetAlphaChannelPrecision() - 1; if(alphaPrec > 0) { return ( - (maskSeed >> (24 + m_Attributes->colorChannelPrecision - 1) & 0xFF) | - (maskSeed >> (16 + m_Attributes->colorChannelPrecision - 1) & 0xFF00) | - (maskSeed >> (8 + m_Attributes->colorChannelPrecision - 1) & 0xFF0000) | - (maskSeed >> (GetAlphaChannelPrecision() - 1) & 0xFF000000) + (maskSeed >> (24 + cbits) & 0xFF) | + (maskSeed >> (16 + cbits) & 0xFF00) | + (maskSeed >> (8 + cbits) & 0xFF0000) | + (maskSeed >> abits & 0xFF000000) ); - } - else { + } else { return ( - ((maskSeed >> (24 + m_Attributes->colorChannelPrecision - 1) & 0xFF) | - (maskSeed >> (16 + m_Attributes->colorChannelPrecision - 1) & 0xFF00) | - (maskSeed >> (8 + m_Attributes->colorChannelPrecision - 1) & 0xFF0000)) & + ((maskSeed >> (24 + cbits) & 0xFF) | + (maskSeed >> (16 + cbits) & 0xFF00) | + (maskSeed >> (8 + cbits) & 0xFF0000)) & (0x00FFFFFF) ); } @@ -234,7 +251,7 @@ class BC7CompressionMode { } // This performs simulated annealing on the endpoints p1 and p2 based on the - // current MaxAnnealingIterations. This is set by calling the function + // current MaxAnnealingIterations. This is set by calling the function // SetQualityLevel double OptimizeEndpointsForCluster( const RGBACluster &cluster, @@ -247,40 +264,49 @@ class BC7CompressionMode { // endpoints to p1 and p2 based on the compression mode (index precision, // endpoint precision etc) void PickBestNeighboringEndpoints( - const RGBACluster &cluster, - const RGBAVector &p1, const RGBAVector &p2, - const int curPbitCombo, - RGBAVector &np1, RGBAVector &np2, - int &nPbitCombo, - const VisitedState *visitedStates, - int nVisited, + const RGBACluster &cluster, + const RGBAVector &p1, const RGBAVector &p2, + const int curPbitCombo, + RGBAVector &np1, RGBAVector &np2, + int &nPbitCombo, + const VisitedState *visitedStates, + int nVisited, float stepSz = 1.0f ) const; - // This is used by simulated annealing to determine whether or not the newError - // (from the neighboring endpoints) is sufficient to continue the annealing process - // from these new endpoints based on how good the oldError was, and how long we've - // been annealing (temp) - bool AcceptNewEndpointError(double newError, double oldError, float temp) const; + // This is used by simulated annealing to determine whether or not the + // newError (from the neighboring endpoints) is sufficient to continue the + // annealing process from these new endpoints based on how good the oldError + // was, and how long we've been annealing (t) + bool AcceptNewEndpointError(double newError, double oldError, float t) const; - // This function figures out the best compression for the single color p, and places - // the endpoints in p1 and p2. If the compression mode supports p-bits, then we - // choose the best p-bit combo and return it as well. - double CompressSingleColor(const RGBAVector &p, RGBAVector &p1, RGBAVector &p2, int &bestPbitCombo) const; + // This function figures out the best compression for the single color p, and + // places the endpoints in p1 and p2. If the compression mode supports p-bits, + // then we choose the best p-bit combo and return it as well. + double CompressSingleColor(const RGBAVector &p, + RGBAVector &p1, RGBAVector &p2, + int &bestPbitCombo) const; - // Compress the cluster using a generalized cluster fit. This figures out the proper endpoints - // assuming that we have no alpha. - double CompressCluster(const RGBACluster &cluster, RGBAVector &p1, RGBAVector &p2, int *bestIndices, int &bestPbitCombo) const; + // Compress the cluster using a generalized cluster fit. This figures out the + // proper endpoints assuming that we have no alpha. + double CompressCluster(const RGBACluster &cluster, + RGBAVector &p1, RGBAVector &p2, + int *bestIndices, int &bestPbitCombo) const; - // Compress the non-opaque cluster using a generalized cluster fit, and place the - // endpoints within p1 and p2. The color indices and alpha indices are computed as well. - double CompressCluster(const RGBACluster &cluster, RGBAVector &p1, RGBAVector &p2, int *bestIndices, int *alphaIndices) const; + // Compress the non-opaque cluster using a generalized cluster fit, and place + // the endpoints within p1 and p2. The color indices and alpha indices are + // computed as well. + double CompressCluster(const RGBACluster &cluster, + RGBAVector &p1, RGBAVector &p2, + int *bestIndices, int *alphaIndices) const; - // This function takes two endpoints in the continuous domain (as floats) and clamps them - // to the nearest grid points based on the compression mode (and possible pbit values) - void ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo) const; + // This function takes two endpoints in the continuous domain (as floats) and + // clamps them to the nearest grid points based on the compression mode (and + // possible pbit values) + void ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2, + int &bestPBitCombo) const; }; extern const uint32 kBC7InterpolationValues[4][16][2]; -#endif // __BC7_COMPRESSIONMODE_SIMD_H__ +#endif // BPTCENCODER_SRC_BC7COMPRESSIONMODE_H_ diff --git a/BPTCEncoder/src/BC7CompressionModeSIMD.h b/BPTCEncoder/src/BC7CompressionModeSIMD.h index b0f5603..60af9af 100755 --- a/BPTCEncoder/src/BC7CompressionModeSIMD.h +++ b/BPTCEncoder/src/BC7CompressionModeSIMD.h @@ -1,30 +1,39 @@ /* FasTC - * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved. + * Copyright (c) 2012 University of North Carolina at Chapel Hill. + * All rights reserved. * - * Permission to use, copy, modify, and distribute this software and its documentation for educational, - * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph, and the following four paragraphs appear - * in all copies. + * Permission to use, copy, modify, and distribute this software and its + * documentation for educational, research, and non-profit purposes, without + * fee, and without a written agreement is hereby granted, provided that the + * above copyright notice, this paragraph, and the following four paragraphs + * appear in all copies. * - * Permission to incorporate this software into commercial products may be obtained by contacting the - * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill . + * Permission to incorporate this software into commercial products may be + * obtained by contacting the authors or the Office of Technology Development + * at the University of North Carolina at Chapel Hill . * - * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. - * The software program and documentation are supplied "as is," without any accompanying services from the - * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill - * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The - * end-user understands that the program was developed for research purposes and is advised not to rely - * exclusively on the program for any reason. + * This software program and documentation are copyrighted by the University of + * North Carolina at Chapel Hill. The software program and documentation are + * supplied "as is," without any accompanying services from the University of + * North Carolina at Chapel Hill or the authors. The University of North + * Carolina at Chapel Hill and the authors do not warrant that the operation of + * the program will be uninterrupted or error-free. The end-user understands + * that the program was developed for research purposes and is advised not to + * rely exclusively on the program for any reason. * - * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE - * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE - * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE + * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA + * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. * - * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY - * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY - * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, + * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY + * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY + * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON + * AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND + * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, * ENHANCEMENTS, OR MODIFICATIONS. * * Please send all BUG REPORTS to . @@ -46,25 +55,26 @@ // // This code has been modified significantly from the original. -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Copyright 2011 Intel Corporation // All Rights Reserved // -// Permission is granted to use, copy, distribute and prepare derivative works of this -// software for any purpose and without fee, provided, that the above copyright notice -// and this statement appear in all copies. Intel makes no representations about the -// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." -// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, -// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, -// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not -// assume any responsibility for any errors which may appear in this software nor any +// Permission is granted to use, copy, distribute and prepare derivative works +// of this software for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. Intel makes no +// representations about the suitability of this software for any purpose. THIS +// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, +// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER +// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR +// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not assume +// any responsibility for any errors which may appear in this software nor any // responsibility to update it. // -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ -#ifndef __BC7_COMPRESSIONMODE_H__ -#define __BC7_COMPRESSIONMODE_H__ +#ifndef BPTCENCODER_SRC_BC7COMPRESSIONMODESIMD_H_ +#define BPTCENCODER_SRC_BC7COMPRESSIONMODESIMD_H_ #include "TexCompTypes.h" #include "RGBAEndpointsSIMD.h" @@ -73,129 +83,158 @@ class BitStream; static const int kPBits[4][2] = { - { 0, 0 }, - { 0, 1 }, - { 1, 0 }, - { 1, 1 } + { 0, 0 }, + { 0, 1 }, + { 1, 0 }, + { 1, 1 } }; -// Abstract class that outlines all of the different settings for BC7 compression modes -// Note that at the moment, we only support modes 0-3, so we don't deal with alpha channels. class BC7CompressionModeSIMD { -public: + public: - static const int kMaxNumSubsets = 3; - static const int kNumModes = 8; + static const int kMaxNumSubsets = 3; + static const int kNumModes = 8; - enum EPBitType { - ePBitType_Shared, - ePBitType_NotShared, - ePBitType_None - }; + enum EPBitType { + ePBitType_Shared, + ePBitType_NotShared, + ePBitType_None + }; - BC7CompressionModeSIMD(int mode, double err) : m_EstimatedError(err), m_Attributes(&(kModeAttributes[mode])) { } - ~BC7CompressionModeSIMD() { } + BC7CompressionModeSIMD(int mode, double err) + : m_EstimatedError(err) + , m_Attributes(&(kModeAttributes[mode])) + { } + ~BC7CompressionModeSIMD() { } - static int NumUses[8]; - static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); } + static int NumUses[8]; + static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); } - double Compress(BitStream &stream, const int shapeIdx, const RGBAClusterSIMD *clusters) const; + double Compress(BitStream &stream, const int shapeIdx, + const RGBAClusterSIMD *clusters) const; - // This switch controls the quality of the simulated annealing optimizer. We will not make - // more than this many steps regardless of how bad the error is. Higher values will produce - // better quality results but will run slower. Default is 50. - static int MaxAnnealingIterations; // This is a setting + // This switch controls the quality of the simulated annealing optimizer. We + // will not make more than this many steps regardless of how bad the error is. + // Higher values will produce better quality results but will run slower. + // Default is 50. + static int MaxAnnealingIterations; // This is a setting -private: + private: - static struct Attributes { - int modeNumber; - int numPartitionBits; - int numSubsets; - int numBitsPerIndex; - int redChannelPrecision; - int greenChannelPrecision; - int blueChannelPrecision; - int alphaChannelPrecision; - EPBitType pbitType; - } kModeAttributes[kNumModes]; + static struct Attributes { + int modeNumber; + int numPartitionBits; + int numSubsets; + int numBitsPerIndex; + int redChannelPrecision; + int greenChannelPrecision; + int blueChannelPrecision; + int alphaChannelPrecision; + EPBitType pbitType; + } kModeAttributes[kNumModes]; -protected: - const Attributes *const m_Attributes; + protected: + const Attributes *const m_Attributes; - int GetModeNumber() const { return m_Attributes->modeNumber; } - int GetNumberOfPartitionBits() const { return m_Attributes->numPartitionBits; } - int GetNumberOfSubsets() const { return m_Attributes->numSubsets; } - int GetNumberOfBitsPerIndex() const { return m_Attributes->numBitsPerIndex; } + int GetModeNumber() const { return m_Attributes->modeNumber; } + int GetNumberOfPartitionBits() const { + return m_Attributes->numPartitionBits; + } + int GetNumberOfSubsets() const { return m_Attributes->numSubsets; } + int GetNumberOfBitsPerIndex() const { return m_Attributes->numBitsPerIndex; } - int GetRedChannelPrecision() const { return m_Attributes->redChannelPrecision; } - int GetGreenChannelPrecision() const { return m_Attributes->greenChannelPrecision; } - int GetBlueChannelPrecision() const { return m_Attributes->blueChannelPrecision; } - int GetAlphaChannelPrecision() const { return m_Attributes->alphaChannelPrecision; } + int GetRedChannelPrecision() const { + return m_Attributes->redChannelPrecision; + } - EPBitType GetPBitType() const { return m_Attributes->pbitType; } + int GetGreenChannelPrecision() const { + return m_Attributes->greenChannelPrecision; + } - // !SPEED! Add this to the attributes lookup table - void GetQuantizationMask(__m128i &mask) const { - const int maskSeed = 0x80000000; - mask = _mm_set_epi32( - (GetAlphaChannelPrecision() > 0)? (maskSeed >> (24 + GetAlphaChannelPrecision() - 1) & 0xFF) : 0xFF, - (maskSeed >> (24 + GetBlueChannelPrecision() - 1) & 0xFF), - (maskSeed >> (24 + GetGreenChannelPrecision() - 1) & 0xFF), - (maskSeed >> (24 + GetRedChannelPrecision() - 1) & 0xFF) - ); - } + int GetBlueChannelPrecision() const { + return m_Attributes->blueChannelPrecision; + } - int GetNumPbitCombos() const { - switch(GetPBitType()) { - case ePBitType_Shared: return 2; - case ePBitType_NotShared: return 4; - default: - case ePBitType_None: return 1; - } - } + int GetAlphaChannelPrecision() const { + return m_Attributes->alphaChannelPrecision; + } - const int *GetPBitCombo(int idx) const { - switch(GetPBitType()) { - case ePBitType_Shared: return (idx)? kPBits[3] : kPBits[0]; - case ePBitType_NotShared: return kPBits[idx % 4]; - default: - case ePBitType_None: return kPBits[0]; - } - } - - double OptimizeEndpointsForCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const; + EPBitType GetPBitType() const { return m_Attributes->pbitType; } - struct VisitedState { - RGBAVectorSIMD p1; - RGBAVectorSIMD p2; - int pBitCombo; - }; + // !SPEED! Add this to the attributes lookup table + void GetQuantizationMask(__m128i &mask) const { + const int maskSeed = 0x80000000; + const uint32 abits = 24 + GetAlphaChannelPrecision() - 1; + const uint32 rbits = 24 + GetRedChannelPrecision() - 1; + const uint32 gbits = 24 + GetGreenChannelPrecision() - 1; + const uint32 bbits = 24 + GetBlueChannelPrecision() - 1; + mask = _mm_set_epi32( + (GetAlphaChannelPrecision() > 0)? (maskSeed >> abits & 0xFF) : 0xFF, + (maskSeed >> rbits & 0xFF), + (maskSeed >> gbits & 0xFF), + (maskSeed >> bbits & 0xFF) + ); + } - void PickBestNeighboringEndpoints( - const RGBAClusterSIMD &cluster, - const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, - const int curPbitCombo, - RGBAVectorSIMD &np1, RGBAVectorSIMD &np2, - int &nPbitCombo, - const __m128 &stepVec - ) const; + int GetNumPbitCombos() const { + switch(GetPBitType()) { + case ePBitType_Shared: return 2; + case ePBitType_NotShared: return 4; + default: + case ePBitType_None: return 1; + } + } - bool AcceptNewEndpointError(float newError, float oldError, float temp) const; + const int *GetPBitCombo(int idx) const { + switch(GetPBitType()) { + case ePBitType_Shared: return (idx)? kPBits[3] : kPBits[0]; + case ePBitType_NotShared: return kPBits[idx % 4]; + default: + case ePBitType_None: return kPBits[0]; + } + } - double CompressSingleColor(const RGBAVectorSIMD &p, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPbitCombo) const; - double CompressCluster(const RGBAClusterSIMD &cluster, RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, __m128i *bestIndices, int &bestPbitCombo) const; + double OptimizeEndpointsForCluster(const RGBAClusterSIMD &cluster, + RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, + __m128i *bestIndices, + int &bestPbitCombo) const; - void ClampEndpointsToGrid(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, int &bestPBitCombo) const; + struct VisitedState { + RGBAVectorSIMD p1; + RGBAVectorSIMD p2; + int pBitCombo; + }; - int GetSubsetForIndex(int idx, const int shapeIdx) const; - int GetAnchorIndexForSubset(int subset, const int shapeIdx) const; + void PickBestNeighboringEndpoints( + const RGBAClusterSIMD &cluster, + const RGBAVectorSIMD &p1, const RGBAVectorSIMD &p2, + const int curPbitCombo, + RGBAVectorSIMD &np1, RGBAVectorSIMD &np2, + int &nPbitCombo, + const __m128 &stepVec + ) const; - double GetEstimatedError() const { return m_EstimatedError; } - const double m_EstimatedError; + bool AcceptNewEndpointError(float newError, float oldError, float temp) const; + + double CompressSingleColor(const RGBAVectorSIMD &p, + RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, + int &bestPbitCombo) const; + double CompressCluster(const RGBAClusterSIMD &cluster, + RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, + __m128i *bestIndices, + int &bestPbitCombo) const; + + void ClampEndpointsToGrid(RGBAVectorSIMD &p1, RGBAVectorSIMD &p2, + int &bestPBitCombo) const; + + int GetSubsetForIndex(int idx, const int shapeIdx) const; + int GetAnchorIndexForSubset(int subset, const int shapeIdx) const; + + double GetEstimatedError() const { return m_EstimatedError; } + const double m_EstimatedError; }; extern const __m128i kBC7InterpolationValuesSIMD[4][16][2]; extern const uint32 kBC7InterpolationValuesScalar[4][16][2]; -#endif // __BC7_COMPRESSIONMODE_H__ +#endif // BPTCENCODER_SRC_BC7COMPRESSIONMODESIMD_H_ diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp index 1e80aac..1f4a4e1 100755 --- a/BPTCEncoder/src/BC7Compressor.cpp +++ b/BPTCEncoder/src/BC7Compressor.cpp @@ -1,30 +1,39 @@ /* FasTC - * Copyright (c) 2012 University of North Carolina at Chapel Hill. All rights reserved. + * Copyright (c) 2012 University of North Carolina at Chapel Hill. + * All rights reserved. * - * Permission to use, copy, modify, and distribute this software and its documentation for educational, - * research, and non-profit purposes, without fee, and without a written agreement is hereby granted, - * provided that the above copyright notice, this paragraph, and the following four paragraphs appear - * in all copies. + * Permission to use, copy, modify, and distribute this software and its + * documentation for educational, research, and non-profit purposes, without + * fee, and without a written agreement is hereby granted, provided that the + * above copyright notice, this paragraph, and the following four paragraphs + * appear in all copies. * - * Permission to incorporate this software into commercial products may be obtained by contacting the - * authors or the Office of Technology Development at the University of North Carolina at Chapel Hill . + * Permission to incorporate this software into commercial products may be + * obtained by contacting the authors or the Office of Technology Development + * at the University of North Carolina at Chapel Hill . * - * This software program and documentation are copyrighted by the University of North Carolina at Chapel Hill. - * The software program and documentation are supplied "as is," without any accompanying services from the - * University of North Carolina at Chapel Hill or the authors. The University of North Carolina at Chapel Hill - * and the authors do not warrant that the operation of the program will be uninterrupted or error-free. The - * end-user understands that the program was developed for research purposes and is advised not to rely - * exclusively on the program for any reason. + * This software program and documentation are copyrighted by the University of + * North Carolina at Chapel Hill. The software program and documentation are + * supplied "as is," without any accompanying services from the University of + * North Carolina at Chapel Hill or the authors. The University of North + * Carolina at Chapel Hill and the authors do not warrant that the operation of + * the program will be uninterrupted or error-free. The end-user understands + * that the program was developed for research purposes and is advised not to + * rely exclusively on the program for any reason. * - * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE AUTHORS BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE - * USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE - * AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IN NO EVENT SHALL THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL OR THE + * AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF NORTH CAROLINA + * AT CHAPEL HILL OR THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. * - * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY - * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY - * OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, + * THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND THE AUTHORS SPECIFICALLY + * DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND ANY + * STATUTORY WARRANTY OF NON-INFRINGEMENT. THE SOFTWARE PROVIDED HEREUNDER IS ON + * AN "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL AND + * THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, * ENHANCEMENTS, OR MODIFICATIONS. * * Please send all BUG REPORTS to . @@ -46,22 +55,23 @@ // // This code has been modified significantly from the original. -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Copyright 2011 Intel Corporation // All Rights Reserved // -// Permission is granted to use, copy, distribute and prepare derivative works of this -// software for any purpose and without fee, provided, that the above copyright notice -// and this statement appear in all copies. Intel makes no representations about the -// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS." -// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY, -// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, -// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not -// assume any responsibility for any errors which may appear in this software nor any +// Permission is granted to use, copy, distribute and prepare derivative works +// of this software for any purpose and without fee, provided, that the above +// copyright notice and this statement appear in all copies. Intel makes no +// representations about the suitability of this software for any purpose. THIS +// SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, +// EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER +// INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR +// INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not assume +// any responsibility for any errors which may appear in this software nor any // responsibility to update it. // -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ #include "BC7Compressor.h" #include "BC7CompressionMode.h" @@ -81,7 +91,7 @@ #ifdef _MSC_VER # undef min # undef max -#endif // _MSC_VER +#endif // _MSC_VER #include #include @@ -162,50 +172,61 @@ static const uint16 kShapeMask2[kNumShapes2] = { }; static const int kAnchorIdx2[kNumShapes2] = { - 15,15,15,15,15,15,15,15, - 15,15,15,15,15,15,15,15, - 15, 2, 8, 2, 2, 8, 8,15, - 2, 8, 2, 2, 8, 8, 2, 2, - 15,15, 6, 8, 2, 8,15,15, - 2, 8, 2, 2, 2,15,15, 6, - 6, 2, 6, 8,15,15, 2, 2, - 15,15,15,15,15, 2, 2, 15 + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 2, 8, 2, 2, 8, 8, 15, + 2 , 8, 2, 2, 8, 8, 2, 2, + 15, 15, 6, 8, 2, 8, 15, 15, + 2 , 8, 2, 2, 2, 15, 15, 6, + 6 , 2, 6, 8, 15, 15, 2, 2, + 15, 15, 15, 15, 15, 2, 2, 15 }; static const uint32 kNumShapes3 = 64; static const uint16 kShapeMask3[kNumShapes3][2] = { - { 0xfecc, 0xf600 }, { 0xffc8, 0x7300 }, { 0xff90, 0x3310 }, { 0xecce, 0x00ce }, { 0xff00, 0xcc00 }, { 0xcccc, 0xcc00 }, { 0xffcc, 0x00cc }, { 0xffcc, 0x3300 }, - { 0xff00, 0xf000 }, { 0xfff0, 0xf000 }, { 0xfff0, 0xff00 }, { 0xcccc, 0x8888 }, { 0xeeee, 0x8888 }, { 0xeeee, 0xcccc }, { 0xffec, 0xec80 }, { 0x739c, 0x7310 }, - { 0xfec8, 0xc800 }, { 0x39ce, 0x3100 }, { 0xfff0, 0xccc0 }, { 0xfccc, 0x0ccc }, { 0xeeee, 0xee00 }, { 0xff88, 0x7700 }, { 0xeec0, 0xcc00 }, { 0x7730, 0x3300 }, - { 0x0cee, 0x00cc }, { 0xffcc, 0xfc88 }, { 0x6ff6, 0x0660 }, { 0xff60, 0x6600 }, { 0xcbbc, 0xc88c }, { 0xf966, 0xf900 }, { 0xceec, 0x0cc0 }, { 0xff10, 0x7310 }, - { 0xff80, 0xec80 }, { 0xccce, 0x08ce }, { 0xeccc, 0xec80 }, { 0x6666, 0x4444 }, { 0x0ff0, 0x0f00 }, { 0x6db6, 0x4924 }, { 0x6bd6, 0x4294 }, { 0xcf3c, 0x0c30 }, - { 0xc3fc, 0x03c0 }, { 0xffaa, 0xff00 }, { 0xff00, 0x5500 }, { 0xfcfc, 0xcccc }, { 0xcccc, 0x0c0c }, { 0xf6f6, 0x6666 }, { 0xaffa, 0x0ff0 }, { 0xfff0, 0x5550 }, - { 0xfaaa, 0xf000 }, { 0xeeee, 0x0e0e }, { 0xf8f8, 0x8888 }, { 0xfff0, 0x9990 }, { 0xeeee, 0xe00e }, { 0x8ff8, 0x8888 }, { 0xf666, 0xf000 }, { 0xff00, 0x9900 }, - { 0xff66, 0xff00 }, { 0xcccc, 0xc00c }, { 0xcffc, 0xcccc }, { 0xf000, 0x9000 }, { 0x8888, 0x0808 }, { 0xfefe, 0xeeee }, { 0xfffa, 0xfff0 }, { 0x7bde, 0x7310 } + {0xfecc, 0xf600}, {0xffc8, 0x7300}, {0xff90, 0x3310}, {0xecce, 0x00ce}, + {0xff00, 0xcc00}, {0xcccc, 0xcc00}, {0xffcc, 0x00cc}, {0xffcc, 0x3300}, + {0xff00, 0xf000}, {0xfff0, 0xf000}, {0xfff0, 0xff00}, {0xcccc, 0x8888}, + {0xeeee, 0x8888}, {0xeeee, 0xcccc}, {0xffec, 0xec80}, {0x739c, 0x7310}, + {0xfec8, 0xc800}, {0x39ce, 0x3100}, {0xfff0, 0xccc0}, {0xfccc, 0x0ccc}, + {0xeeee, 0xee00}, {0xff88, 0x7700}, {0xeec0, 0xcc00}, {0x7730, 0x3300}, + {0x0cee, 0x00cc}, {0xffcc, 0xfc88}, {0x6ff6, 0x0660}, {0xff60, 0x6600}, + {0xcbbc, 0xc88c}, {0xf966, 0xf900}, {0xceec, 0x0cc0}, {0xff10, 0x7310}, + {0xff80, 0xec80}, {0xccce, 0x08ce}, {0xeccc, 0xec80}, {0x6666, 0x4444}, + {0x0ff0, 0x0f00}, {0x6db6, 0x4924}, {0x6bd6, 0x4294}, {0xcf3c, 0x0c30}, + {0xc3fc, 0x03c0}, {0xffaa, 0xff00}, {0xff00, 0x5500}, {0xfcfc, 0xcccc}, + {0xcccc, 0x0c0c}, {0xf6f6, 0x6666}, {0xaffa, 0x0ff0}, {0xfff0, 0x5550}, + {0xfaaa, 0xf000}, {0xeeee, 0x0e0e}, {0xf8f8, 0x8888}, {0xfff0, 0x9990}, + {0xeeee, 0xe00e}, {0x8ff8, 0x8888}, {0xf666, 0xf000}, {0xff00, 0x9900}, + {0xff66, 0xff00}, {0xcccc, 0xc00c}, {0xcffc, 0xcccc}, {0xf000, 0x9000}, + {0x8888, 0x0808}, {0xfefe, 0xeeee}, {0xfffa, 0xfff0}, {0x7bde, 0x7310} }; -static const uint32 kWMValues[] = { 0x32b92180, 0x32ba3080, 0x31103200, 0x28103c80, 0x32bb3080, 0x25903600, 0x3530b900, 0x3b32b180, 0x34b5b980 }; +static const uint32 kWMValues[] = { + 0x32b92180, 0x32ba3080, 0x31103200, 0x28103c80, + 0x32bb3080, 0x25903600, 0x3530b900, 0x3b32b180, 0x34b5b98 +}; static const uint32 kNumWMVals = sizeof(kWMValues) / sizeof(kWMValues[0]); static uint32 gWMVal = -1; static const int kAnchorIdx3[2][kNumShapes3] = { - { 3, 3,15,15, 8, 3,15,15, - 8, 8, 6, 6, 6, 5, 3, 3, - 3, 3, 8,15, 3, 3, 6,10, - 5, 8, 8, 6, 8, 5,15,15, - 8,15, 3, 5, 6,10, 8,15, - 15, 3,15, 5,15,15,15,15, - 3,15, 5, 5, 5, 8, 5,10, - 5,10, 8,13,15,12, 3, 3 }, + {3, 3, 15, 15, 8, 3, 15, 15, + 8 , 8, 6, 6, 6, 5, 3, 3, + 3 , 3, 8, 15, 3, 3, 6, 10, + 5 , 8, 8, 6, 8, 5, 15, 15, + 8 , 15, 3, 5, 6, 10, 8, 15, + 15, 3, 15, 5, 15, 15, 15, 15, + 3 , 15, 5, 5, 5, 8, 5, 10, + 5 , 10, 8, 13, 15, 12, 3, 3 }, - { 15, 8, 8, 3,15,15, 3, 8, - 15,15,15,15,15,15,15, 8, - 15, 8,15, 3,15, 8,15, 8, - 3,15, 6,10,15,15,10, 8, - 15, 3,15,10,10, 8, 9,10, - 6,15, 8,15, 3, 6, 6, 8, - 15, 3,15,15,15,15,15,15, - 15,15,15,15, 3,15,15, 8 } + {15, 8, 8, 3, 15, 15, 3, 8, + 15 , 15, 15, 15, 15, 15, 15, 8, + 15 , 8, 15, 3, 15, 8, 15, 8, + 3 , 15, 6, 10, 15, 15, 10, 8, + 15 , 3, 15, 10, 10, 8, 9, 10, + 6 , 15, 8, 15, 3, 6, 6, 8, + 15 , 3, 15, 15, 15, 15, 15, 15, + 15 , 15, 15, 15, 3, 15, 15, 8 } }; template @@ -213,10 +234,10 @@ static inline T sad(const T &a, const T &b) { return (a > b)? a - b : b - a; } -static uint8 GetSubsetForIndex(int idx, const int shapeIdx, const int nSubsets) { +static uint8 GetSubsetForIndex(int idx, const int shapeIdx, const int nSubs) { int subset = 0; - - switch(nSubsets) { + + switch(nSubs) { case 2: { subset = !!((1 << idx) & kShapeMask2[shapeIdx]); @@ -239,16 +260,17 @@ static uint8 GetSubsetForIndex(int idx, const int shapeIdx, const int nSubsets) return subset; } -static uint32 GetAnchorIndexForSubset(int subset, const int shapeIdx, const int nSubsets) { - +static uint32 GetAnchorIndexForSubset( + int subset, const int shapeIdx, const int nSubsets +) { + int anchorIdx = 0; switch(subset) { case 1: { if(nSubsets == 2) { anchorIdx = kAnchorIdx2[shapeIdx]; - } - else { + } else { anchorIdx = kAnchorIdx3[0][shapeIdx]; } } @@ -281,26 +303,56 @@ template static inline void swap(T &a, T &b) { T t = a; a = b; b = t; } const uint32 kBC7InterpolationValues[4][16][2] = { - { {64, 0}, {33, 31}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, - { {64, 0}, {43, 21}, {21, 43}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, - { {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, - { {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64} } + { {64, 0}, {33, 31}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, + {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, + { {64, 0}, {43, 21}, {21, 43}, {0, 64}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, + {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, + { {64, 0}, {55, 9}, {46, 18}, {37, 27}, {27, 37}, {18, 46}, {9, 55}, {0, 64}, + {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0} }, + { {64, 0}, {60, 4}, {55, 9}, {51, 13}, {47, 17}, {43, 21}, {38, 26}, {34, 30}, + {30, 34}, {26, 38}, {21, 43}, {17, 47}, {13, 51}, {9, 55}, {4, 60}, {0, 64}} }; -int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting. +int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting. -BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = { - { 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared }, - { 1, 6, 2, 3, 0, 6, 0, false, false, BC7CompressionMode::ePBitType_Shared }, - { 2, 6, 3, 2, 0, 5, 0, false, false, BC7CompressionMode::ePBitType_None }, - { 3, 6, 2, 2, 0, 7, 0, false, false, BC7CompressionMode::ePBitType_NotShared }, - { 4, 0, 1, 2, 3, 5, 6, true, true, BC7CompressionMode::ePBitType_None }, - { 5, 0, 1, 2, 2, 7, 8, true, false, BC7CompressionMode::ePBitType_None }, - { 6, 0, 1, 4, 0, 7, 7, false, false, BC7CompressionMode::ePBitType_NotShared }, - { 7, 6, 2, 2, 0, 5, 5, false, false, BC7CompressionMode::ePBitType_NotShared }, +BC7CompressionMode::Attributes +BC7CompressionMode::kModeAttributes[kNumModes] = { + // Mode 0 + { 0, 4, 3, 3, 0, 4, 0, + false, false, BC7CompressionMode::ePBitType_NotShared }, + + // Mode 1 + { 1, 6, 2, 3, 0, 6, 0, + false, false, BC7CompressionMode::ePBitType_Shared }, + + // Mode 2 + { 2, 6, 3, 2, 0, 5, 0, + false, false, BC7CompressionMode::ePBitType_None }, + + // Mode 3 + { 3, 6, 2, 2, 0, 7, 0, + false, false, BC7CompressionMode::ePBitType_NotShared }, + + // Mode 4 + { 4, 0, 1, 2, 3, 5, 6, + true, true, BC7CompressionMode::ePBitType_None }, + + // Mode 5 + { 5, 0, 1, 2, 2, 7, 8, + true, false, BC7CompressionMode::ePBitType_None }, + + // Mode 6 + { 6, 0, 1, 4, 0, 7, 7, + false, false, BC7CompressionMode::ePBitType_NotShared }, + + // Mode 7 + { 7, 6, 2, 2, 0, 5, 5, + false, false, BC7CompressionMode::ePBitType_NotShared }, }; -void BC7CompressionMode::ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo) const { +void BC7CompressionMode::ClampEndpointsToGrid( + RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo +) const { const int nPbitCombos = GetNumPbitCombos(); const bool hasPbits = nPbitCombos > 1; const uint32 qmask = GetQuantizationMask(); @@ -316,17 +368,13 @@ void BC7CompressionMode::ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2, in if(hasPbits) { qp1 = p1.ToPixel(qmask, GetPBitCombo(i)[0]); qp2 = p2.ToPixel(qmask, GetPBitCombo(i)[1]); - } - else { + } else { qp1 = p1.ToPixel(qmask); qp2 = p2.ToPixel(qmask); } - uint8 *pqp1 = (uint8 *)&qp1; - uint8 *pqp2 = (uint8 *)&qp2; - - RGBAVector np1 = RGBAVector(float(pqp1[0]), float(pqp1[1]), float(pqp1[2]), float(pqp1[3])); - RGBAVector np2 = RGBAVector(float(pqp2[0]), float(pqp2[1]), float(pqp2[2]), float(pqp2[3])); + RGBAVector np1 = RGBAVector(qp1, 0); + RGBAVector np2 = RGBAVector(qp2, 0); RGBAVector d1 = np1 - p1; RGBAVector d2 = np2 - p2; @@ -342,30 +390,34 @@ void BC7CompressionMode::ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2, in p2 = bp2; } -double BC7CompressionMode::CompressSingleColor(const RGBAVector &p, RGBAVector &p1, RGBAVector &p2, int &bestPbitCombo) const { - +double BC7CompressionMode::CompressSingleColor( + const RGBAVector &p, RGBAVector &p1, RGBAVector &p2, + int &bestPbitCombo +) const { const uint32 pixel = p.ToPixel(); - float bestError = FLT_MAX; bestPbitCombo = -1; for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) { - const int *pbitCombo = GetPBitCombo(pbi); - + uint32 dist[4] = { 0x0, 0x0, 0x0, 0x0 }; - uint32 bestValI[kNumColorChannels] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; - uint32 bestValJ[kNumColorChannels] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + uint32 bestValI[kNumColorChannels]; + uint32 bestValJ[kNumColorChannels]; + memset(bestValI, 0xFF, sizeof(bestValI)); + memset(bestValJ, 0xFF, sizeof(bestValJ)); for(uint32 ci = 0; ci < kNumColorChannels; ci++) { - const uint8 val = (pixel >> (ci * 8)) & 0xFF; - int nBits = ci == 3? GetAlphaChannelPrecision() : m_Attributes->colorChannelPrecision; + int nBits = m_Attributes->colorChannelPrecision; + if(ci == 3) { + nBits = GetAlphaChannelPrecision(); + } // If we don't handle this channel, then it must be the full value (alpha) if(nBits == 0) { bestValI[ci] = bestValJ[ci] = 0xFF; - dist[ci] = std::max(dist[ci], (uint32)((uint8)0xFF - val)); + dist[ci] = std::max(dist[ci], static_cast(0xFF - val)); continue; } @@ -396,8 +448,9 @@ double BC7CompressionMode::CompressSingleColor(const RGBAVector &p, RGBAVector & possValsL[i] |= (possValsL[i] >> nBits); } - const uint32 interpVal0 = kBC7InterpolationValues[GetNumberOfBitsPerIndex() - 1][1][0]; - const uint32 interpVal1 = kBC7InterpolationValues[GetNumberOfBitsPerIndex() - 1][1][1]; + const uin32 bpi = GetNumberOfBitsPerIndex() - 1; + const uint32 interpVal0 = kBC7InterpolationValues[bpi][1][0]; + const uint32 interpVal1 = kBC7InterpolationValues[bpi][1][1]; // Find the closest interpolated val that to the given val... uint32 bestChannelDist = 0xFF; @@ -423,7 +476,7 @@ double BC7CompressionMode::CompressSingleColor(const RGBAVector &p, RGBAVector & const float *errorWeights = BC7C::GetErrorMetric(); float error = 0.0; for(int i = 0; i < kNumColorChannels; i++) { - float e = float(dist[i]) * errorWeights[i]; + float e = static_cast(dist[i]) * errorWeights[i]; error += e * e; } @@ -432,8 +485,8 @@ double BC7CompressionMode::CompressSingleColor(const RGBAVector &p, RGBAVector & bestPbitCombo = pbi; for(uint32 ci = 0; ci < kNumColorChannels; ci++) { - p1.c[ci] = float(bestValI[ci]); - p2.c[ci] = float(bestValJ[ci]); + p1.c[ci] = static_cast(bestValI[ci]); + p2.c[ci] = static_cast(bestValJ[ci]); } } } @@ -441,84 +494,80 @@ double BC7CompressionMode::CompressSingleColor(const RGBAVector &p, RGBAVector & return bestError; } -// Fast random number generator. See more information at -// http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/ -static uint32 g_seed = uint32(time(NULL)); -static inline uint32 fastrand() { - g_seed = (214013 * g_seed + 2531011); - return (g_seed>>16) & RAND_MAX; -} +// Fast random number generator. See more information at +// http://software.intel.com/en-us/articles/fast-random-number- +// generator-on-the-intel-pentiumr-4-processor/ +static uint32 g_seed = static_cast(time(NULL)); +static inline uint32 fastrand() { + g_seed = (214013 * g_seed + 2531011); + return (g_seed>>16) & RAND_MAX; +} static const int kNumStepDirections = 8; static const RGBADir kStepDirections[kNumStepDirections] = { - // For pBit changes, we have 8 possible directions. - RGBADir(RGBAVector(1.0f, 1.0f, 1.0f, 0.0f)), + RGBADir(RGBAVector(1.0f, 1.0f, 1.0f, 0.0f)), RGBADir(RGBAVector(-1.0f, 1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, -1.0f, 1.0f, 0.0f)), + RGBADir(RGBAVector(1.0f, -1.0f, 1.0f, 0.0f)), RGBADir(RGBAVector(-1.0f, -1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, 1.0f, -1.0f, 0.0f)), + RGBADir(RGBAVector(1.0f, 1.0f, -1.0f, 0.0f)), RGBADir(RGBAVector(-1.0f, 1.0f, -1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, -1.0f, -1.0f, 0.0f)), + RGBADir(RGBAVector(1.0f, -1.0f, -1.0f, 0.0f)), RGBADir(RGBAVector(-1.0f, -1.0f, -1.0f, 0.0f)) }; -static void ChangePointForDirWithoutPbitChange(RGBAVector &v, int dir, const float step[kNumColorChannels]) { +static void ChangePointForDirWithoutPbitChange( + RGBAVector &v, uint32 dir, const float step[kNumColorChannels] +) { if(dir % 2) { v.x -= step[0]; - } - else { + } else { v.x += step[0]; } if(((dir / 2) % 2)) { v.y -= step[1]; - } - else { + } else { v.y += step[1]; } if(((dir / 4) % 2)) { v.z -= step[2]; - } - else { + } else { v.z += step[2]; } if(((dir / 8) % 2)) { v.a -= step[3]; - } - else { + } else { v.a += step[3]; } } -static void ChangePointForDirWithPbitChange(RGBAVector &v, int dir, int oldPbit, const float step[kNumColorChannels]) { +static void ChangePointForDirWithPbitChange( + RGBAVector &v, uint32 dir, uint32 oldPbit, const float step[kNumColorChannels] +) { if(dir % 2 && oldPbit == 0) { v.x -= step[0]; - } - else if(!(dir % 2) && oldPbit == 1) { + } else if(!(dir % 2) && oldPbit == 1) { v.x += step[0]; } if(((dir / 2) % 2) && oldPbit == 0) { v.y -= step[1]; - } - else if(!((dir / 2) % 2) && oldPbit == 1) { + } else if(!((dir / 2) % 2) && oldPbit == 1) { v.y += step[1]; } if(((dir / 4) % 2) && oldPbit == 0) { v.z -= step[2]; - } - else if(!((dir / 4) % 2) && oldPbit == 1) { + } else if(!((dir / 4) % 2) && oldPbit == 1) { v.z += step[2]; } if(((dir / 8) % 2) && oldPbit == 0) { v.a -= step[3]; - } - else if(!((dir / 8) % 2) && oldPbit == 1) { + } else if(!((dir / 8) % 2) && oldPbit == 1) { v.a += step[3]; } } @@ -536,34 +585,33 @@ void BC7CompressionMode::PickBestNeighboringEndpoints( const VisitedState *visitedStates, int nVisited, float stepSz ) const { - // !SPEED! There might be a way to make this faster since we're working // with floating point values that are powers of two. We should be able // to just set the proper bits in the exponent and leave the mantissa to 0. float step[kNumColorChannels] = { - stepSz * float(1 << (8 - m_Attributes->colorChannelPrecision)), - stepSz * float(1 << (8 - m_Attributes->colorChannelPrecision)), - stepSz * float(1 << (8 - m_Attributes->colorChannelPrecision)), - stepSz * float(1 << (8 - GetAlphaChannelPrecision())) + stepSz * static_cast(1 << (8 - m_Attributes->colorChannelPrecision)), + stepSz * static_cast(1 << (8 - m_Attributes->colorChannelPrecision)), + stepSz * static_cast(1 << (8 - m_Attributes->colorChannelPrecision)), + stepSz * static_cast(1 << (8 - GetAlphaChannelPrecision())) }; if(m_IsOpaque) { step[(GetRotationMode() + 3) % kNumColorChannels] = 0.0f; } - // First, let's figure out the new pbit combo... if there's no pbit then we don't need - // to worry about it. + // First, let's figure out the new pbit combo... if there's no pbit then we + // don't need to worry about it. const bool hasPbits = GetPBitType() != ePBitType_None; if(hasPbits) { - // If there is a pbit, then we must change it, because those will provide the closest values - // to the current point. - if(GetPBitType() == ePBitType_Shared) + // If there is a pbit, then we must change it, because those will provide + // the closest values to the current point. + if(GetPBitType() == ePBitType_Shared) { nPbitCombo = (curPbitCombo + 1) % 2; - else { - // Not shared... p1 needs to change and p2 needs to change... which means that - // combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 and vice - // versa... + } else { + // Not shared... p1 needs to change and p2 needs to change... which means + // that combo 0 gets rotated to combo 3, combo 1 gets rotated to combo 2 + // and vice versa... nPbitCombo = 3 - curPbitCombo; } @@ -580,10 +628,13 @@ void BC7CompressionMode::PickBestNeighboringEndpoints( RGBAVector &np = (pt)? np1 : np2; np = p; - if(hasPbits) - ChangePointForDirWithPbitChange(np, fastrand() % 16, GetPBitCombo(curPbitCombo)[pt], step); - else + if(hasPbits) { + const uint32 rdir = fastrand() % 16; + const uint32 pbit = GetPBitCombo(curPbitCombo)[pt]; + ChangePointForDirWithPbitChange(np, rdir, pbit, step); + } else { ChangePointForDirWithoutPbitChange(np, fastrand() % 16, step); + } for(uint32 i = 0; i < kNumColorChannels; i++) { np.c[i] = std::min(std::max(np.c[i], 0.0f), 255.0f); @@ -602,22 +653,21 @@ void BC7CompressionMode::PickBestNeighboringEndpoints( } // Fast generation of floats between 0 and 1. It generates a float -// whose exponent forces the value to be between 1 and 2, then it +// whose exponent forces the value to be between 1 and 2, then it // populates the mantissa with a random assortment of bits, and returns // the bytes interpreted as a float. This prevents two things: 1, a // division, and 2, a cast from an integer to a float. -#define COMPILE_ASSERT(x) extern int __compile_assert_[(int)(x)]; +#define COMPILE_ASSERT(x) extern int __compile_assert_[static_cast(x)]; COMPILE_ASSERT(RAND_MAX == 0x7FFF) -static inline float frand() { - const uint16 r = fastrand(); - +static inline float frand() { // RAND_MAX is 0x7FFF, which offers 15 bits // of precision. Therefore, we move the bits - // into the top of the 23 bit mantissa, and - // repeat the most significant bits of r in + // into the top of the 23 bit mantissa, and + // repeat the most significant bits of r in // the least significant of the mantissa + const uint16 r = fastrand(); const uint32 m = (r << 8) | (r >> 7); const union { uint32 fltAsInt; @@ -626,11 +676,13 @@ static inline float frand() { return fltUnion.flt - 1.0f; } -bool BC7CompressionMode::AcceptNewEndpointError(double newError, double oldError, float temp) const { - +bool BC7CompressionMode::AcceptNewEndpointError( + double newError, double oldError, float temp +) const { // Always accept better endpoints. - if(newError < oldError) + if(newError < oldError) { return true; + } const double p = exp((0.1f * (oldError - newError)) / temp); const double r = frand(); @@ -638,13 +690,23 @@ bool BC7CompressionMode::AcceptNewEndpointError(double newError, double oldError return r < p; } -double BC7CompressionMode::OptimizeEndpointsForCluster(const RGBACluster &cluster, RGBAVector &p1, RGBAVector &p2, int *bestIndices, int &bestPbitCombo) const { - - const int nBuckets = (1 << GetNumberOfBitsPerIndex()); +double BC7CompressionMode::OptimizeEndpointsForCluster( + const RGBACluster &cluster, + RGBAVector &p1, RGBAVector &p2, + int *bestIndices, + int &bestPbitCombo +) const { + + const uint32 nBuckets = (1 << GetNumberOfBitsPerIndex()); const uint32 qmask = GetQuantizationMask(); - // Here we use simulated annealing to traverse the space of clusters to find the best possible endpoints. - double curError = cluster.QuantizedError(p1, p2, nBuckets, qmask, GetErrorMetric(), GetPBitCombo(bestPbitCombo), bestIndices); + // Here we use simulated annealing to traverse the space of clusters to find + // the best possible endpoints. + double curError = cluster.QuantizedError( + p1, p2, nBuckets, qmask, GetErrorMetric(), + GetPBitCombo(bestPbitCombo), bestIndices + ); + int curPbitCombo = bestPbitCombo; double bestError = curError; @@ -653,27 +715,21 @@ double BC7CompressionMode::OptimizeEndpointsForCluster(const RGBACluster &cluste if(GetPBitType() != ePBitType_None) { qp1 = p1.ToPixel(qmask, GetPBitCombo(bestPbitCombo)[0]); qp2 = p2.ToPixel(qmask, GetPBitCombo(bestPbitCombo)[1]); - } - else { + } else { qp1 = p1.ToPixel(qmask); qp2 = p2.ToPixel(qmask); } - uint8 *pqp1 = (uint8 *)&qp1; - uint8 *pqp2 = (uint8 *)&qp2; - - p1 = RGBAVector(float(pqp1[0]), float(pqp1[1]), float(pqp1[2]), float(pqp1[3])); - p2 = RGBAVector(float(pqp2[0]), float(pqp2[1]), float(pqp2[2]), float(pqp2[3])); + p1 = RGBAVector(qp1, 0); + p2 = RGBAVector(qp2, 0); RGBAVector bp1 = p1, bp2 = p2; - assert(curError == cluster.QuantizedError(p1, p2, nBuckets, qmask, GetErrorMetric(), GetPBitCombo(bestPbitCombo))); - int lastVisitedState = 0; VisitedState visitedStates[kMaxAnnealingIterations]; visitedStates[lastVisitedState].p1 = p1; - visitedStates[lastVisitedState].p2 = p2; + visitedStates[lastVisitedState].p2 = p2; visitedStates[lastVisitedState].pBitCombo = curPbitCombo; lastVisitedState++; @@ -681,15 +737,22 @@ double BC7CompressionMode::OptimizeEndpointsForCluster(const RGBACluster &cluste for(int energy = 0; bestError > 0 && energy < maxEnergy; energy++) { - float temp = float(energy) / float(maxEnergy-1); + float temp = static_cast(energy) / static_cast(maxEnergy-1); int indices[kMaxNumDataPoints]; RGBAVector np1, np2; int nPbitCombo = 0; - PickBestNeighboringEndpoints(cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, visitedStates, lastVisitedState); + PickBestNeighboringEndpoints( + cluster, p1, p2, curPbitCombo, np1, np2, nPbitCombo, + visitedStates, lastVisitedState + ); + + double error = cluster.QuantizedError( + np1, np2, nBuckets, qmask, + GetErrorMetric(), GetPBitCombo(nPbitCombo), indices + ); - double error = cluster.QuantizedError(np1, np2, nBuckets, qmask, GetErrorMetric(), GetPBitCombo(nPbitCombo), indices); if(AcceptNewEndpointError(error, curError, temp)) { curError = error; p1 = np1; @@ -706,7 +769,7 @@ double BC7CompressionMode::OptimizeEndpointsForCluster(const RGBACluster &cluste lastVisitedState = 0; visitedStates[lastVisitedState].p1 = np1; - visitedStates[lastVisitedState].p2 = np2; + visitedStates[lastVisitedState].p2 = np2; visitedStates[lastVisitedState].pBitCombo = nPbitCombo; lastVisitedState++; @@ -721,18 +784,24 @@ double BC7CompressionMode::OptimizeEndpointsForCluster(const RGBACluster &cluste return bestError; } -double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVector &p1, RGBAVector &p2, int *bestIndices, int *alphaIndices) const { - +double BC7CompressionMode::CompressCluster( + const RGBACluster &cluster, + RGBAVector &p1, RGBAVector &p2, + int *bestIndices, + int *alphaIndices +) const { assert(GetModeNumber() == 4 || GetModeNumber() == 5); assert(GetNumberOfSubsets() == 1); assert(cluster.GetNumPoints() == kMaxNumDataPoints); assert(m_Attributes->alphaChannelPrecision > 0); - // If all the points are the same in the cluster, then we need to figure out what the best - // approximation to this point is.... + // If all the points are the same in the cluster, then we need to figure out + // what the best approximation to this point is.... if(cluster.AllSamePoint()) { - assert(!"We should only be using this function in modes 4 & 5 that have a single subset, in which case single colors should have been detected much earlier."); + assert(!"We should only be using this function in modes 4 & 5 that have a" + "single subset, in which case single colors should have been" + "detected much earlier."); const RGBAVector &p = cluster.GetPoint(0); int dummyPbit = 0; @@ -784,21 +853,25 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto int dummyPbit = 0; RGBAVector rgbp1, rgbp2; - double rgbError = CompressCluster(rgbCluster, rgbp1, rgbp2, bestIndices, dummyPbit); + double rgbError = CompressCluster( + rgbCluster, rgbp1, rgbp2, bestIndices, dummyPbit + ); float a1 = alphaMin, a2 = alphaMax; double alphaError = DBL_MAX; typedef uint32 tInterpPair[2]; typedef tInterpPair tInterpLevel[16]; - const tInterpLevel *interpVals = kBC7InterpolationValues + (GetNumberOfBitsPerAlpha() - 1); + + const tInterpLevel *interpVals = + kBC7InterpolationValues + (GetNumberOfBitsPerAlpha() - 1); + const float weight = GetErrorMetric().a; const uint32 nBuckets = (1 << GetNumberOfBitsPerAlpha()); // If they're the same, then we can get them exactly. - if(a1 == a2) - { + if(a1 == a2) { const uint8 a1be = uint8(a1); const uint8 a2be = uint8(a2); @@ -808,25 +881,32 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto alphaIndices[i] = 0; alphaError = 0.0; - } - else { + } else { assert(GetModeNumber() == 4); - + // Mode 4 can be treated like the 6 channel of DXT1 compression. if(Optimal6CompressDXT1[a1be][0][0]) { - a1 = float((Optimal6CompressDXT1[a1be][1][1] << 2) | (Optimal6CompressDXT1[a1be][0][1] >> 4)); - a2 = float((Optimal6CompressDXT1[a2be][1][2] << 2) | (Optimal6CompressDXT1[a2be][0][1] >> 4)); - } - else { - a1 = float((Optimal6CompressDXT1[a1be][0][1] << 2) | (Optimal6CompressDXT1[a1be][0][1] >> 4)); - a2 = float((Optimal6CompressDXT1[a2be][0][2] << 2) | (Optimal6CompressDXT1[a2be][0][1] >> 4)); + a1 = static_cast( + (Optimal6CompressDXT1[a1be][1][1] << 2) | + (Optimal6CompressDXT1[a1be][0][1] >> 4)); + + a2 = static_cast( + (Optimal6CompressDXT1[a2be][1][2] << 2) | + (Optimal6CompressDXT1[a2be][0][1] >> 4)); + } else { + a1 = static_cast( + (Optimal6CompressDXT1[a1be][0][1] << 2) | + (Optimal6CompressDXT1[a1be][0][1] >> 4)); + + a2 = static_cast( + (Optimal6CompressDXT1[a2be][0][2] << 2) | + (Optimal6CompressDXT1[a2be][0][1] >> 4)); } if(m_IndexMode == 1) { for(uint32 i = 0; i < kMaxNumDataPoints; i++) alphaIndices[i] = 1; - } - else { + } else { for(uint32 i = 0; i < kMaxNumDataPoints; i++) alphaIndices[i] = 2; } @@ -834,14 +914,16 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto uint32 interp0 = (*interpVals)[alphaIndices[0] & 0xFF][0]; uint32 interp1 = (*interpVals)[alphaIndices[0] & 0xFF][1]; - const uint8 ip = (((uint32(a1) * interp0) + (uint32(a2) * interp1) + 32) >> 6) & 0xFF; - float pxError = weight * float((a1be > ip)? a1be - ip : ip - a1be); + const uint32 a1i = static_cast(a1); + const uint32 a2i = static_cast(a2); + + const uint8 ip = (((a1i * interp0) + (a2i * interp1) + 32) >> 6) & 0xFF; + float pxError = + weight * static_cast((a1be > ip)? a1be - ip : ip - a1be); pxError *= pxError; alphaError = 16 * pxError; } - } - else { - + } else { // (a1 != a2) float vals[1<<3]; memset(vals, 0, sizeof(vals)); @@ -849,7 +931,9 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto // Figure out initial positioning. for(uint32 i = 0; i < nBuckets; i++) { - vals[i] = alphaMin + (float(i)/float(nBuckets-1)) * (alphaMax - alphaMin); + const float fi = static_cast(i); + const float fb = static_cast(nBuckets - 1); + vals[i] = alphaMin + (fi/fb) * (alphaMax - alphaMin); } // Assign each value to a bucket @@ -864,7 +948,7 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto } } } - + float npts[1 << 3]; // Do k-means @@ -886,8 +970,9 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto } } - if(npts[i] > 0.0f) + if(npts[i] > 0.0f) { avg[i] /= npts[i]; + } } // Did we change anything? @@ -917,8 +1002,12 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto float asq = 0.0, bsq = 0.0, ab = 0.0; float ax(0.0), bx(0.0); for(uint32 i = 0; i < nBuckets; i++) { - float a = float(nBuckets - 1 - i) / float(nBuckets - 1); - float b = float(i) / float(nBuckets - 1); + const float fbi = static_cast(nBuckets - 1 - i); + const float fb = static_cast(nBuckets - 1); + const float fi = static_cast(i); + + float a = fbi / fb; + float b = fi / fb; float n = npts[i]; float x = vals[i]; @@ -940,8 +1029,10 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto a2 = std::min(255.0f, std::max(0.0f, a2)); // Quantize - const uint8 a1b = ::QuantizeChannel(uint8(a1), (((char)0x80) >> (GetAlphaChannelPrecision() - 1))); - const uint8 a2b = ::QuantizeChannel(uint8(a2), (((char)0x80) >> (GetAlphaChannelPrecision() - 1))); + const uint8 a1b = ::QuantizeChannel( + uint8(a1), (0x80 >> (GetAlphaChannelPrecision() - 1))); + const uint8 a2b = ::QuantizeChannel( + uint8(a2), (0x80 >> (GetAlphaChannelPrecision() - 1))); // Compute error alphaError = 0.0; @@ -956,8 +1047,12 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto uint32 interp0 = (*interpVals)[j][0]; uint32 interp1 = (*interpVals)[j][1]; - const uint8 ip = (((uint32(a1b) * interp0) + (uint32(a2b) * interp1) + 32) >> 6) & 0xFF; - float pxError = weight * float((val > ip)? val - ip : ip - val); + uint32 a1i = static_cast(a1b); + uint32 a2i = static_cast(a2b); + + const uint8 ip = ((a1i * interp0) + (a2i * interp1) + 32) >> 6) & 0xFF; + float pxError = + weight * static_cast((val > ip)? val - ip : ip - val); pxError *= pxError; if(pxError < minError) { @@ -979,10 +1074,14 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto return rgbError + alphaError; } -double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVector &p1, RGBAVector &p2, int *bestIndices, int &bestPbitCombo) const { - - // If all the points are the same in the cluster, then we need to figure out what the best - // approximation to this point is.... +double BC7CompressionMode::CompressCluster( + const RGBACluster &cluster, + RGBAVector &p1, RGBAVector &p2, + int *bestIndices, + int &bestPbitCombo +) const { + // If all the points are the same in the cluster, then we need to figure out + // what the best approximation to this point is.... if(cluster.AllSamePoint()) { const RGBAVector &p = cluster.GetPoint(0); double bestErr = CompressSingleColor(p, p1, p2, bestPbitCombo); @@ -991,17 +1090,19 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto for(uint32 i = 0; i < cluster.GetNumPoints(); i++) { bestIndices[i] = 1; } - return cluster.GetNumPoints() * bestErr; } - + const uint32 nBuckets = (1 << GetNumberOfBitsPerIndex()); #if 1 - RGBAVector avg = cluster.GetTotal() / float(cluster.GetNumPoints()); + RGBAVector avg = + cluster.GetTotal() / static_cast(cluster.GetNumPoints()); RGBADir axis; double eigOne; - ::GetPrincipalAxis(cluster.GetNumPoints(), cluster.GetPoints(), axis, eigOne, NULL); + ::GetPrincipalAxis( + cluster.GetNumPoints(), cluster.GetPoints(), axis, eigOne, NULL + ); float mindp = FLT_MAX, maxdp = -FLT_MAX; for(uint32 i = 0 ; i < cluster.GetNumPoints(); i++) { @@ -1009,7 +1110,7 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto if(dp < mindp) mindp = dp; if(dp > maxdp) maxdp = dp; } - + p1 = avg + mindp * axis; p2 = avg + maxdp * axis; #else @@ -1018,12 +1119,12 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto ClampEndpoints(p1, p2); - RGBAVector pts[1 << 4]; // At most 4 bits per index. + RGBAVector pts[1 << 4]; // At most 4 bits per index. uint32 numPts[1<<4]; assert(nBuckets <= 1 << 4); for(uint32 i = 0; i < nBuckets; i++) { - float s = (float(i) / float(nBuckets - 1)); + float s = (static_cast(i) / static_cast(nBuckets - 1)); pts[i] = (1.0f - s) * p1 + s * p2; } @@ -1035,19 +1136,17 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto bool fixed = false; while(!fixed) { - RGBAVector newPts[1 << 4]; // Assign each of the existing points to one of the buckets... for(uint32 i = 0; i < cluster.GetNumPoints(); i++) { - int minBucket = -1; float minDist = FLT_MAX; + for(uint32 j = 0; j < nBuckets; j++) { RGBAVector v = cluster.GetPoint(i) - pts[j]; float distSq = v * v; - if(distSq < minDist) - { + if(distSq < minDist) { minDist = distSq; minBucket = j; } @@ -1059,7 +1158,6 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto // Calculate new buckets based on centroids of clusters... for(uint32 i = 0; i < nBuckets; i++) { - numPts[i] = 0; newPts[i] = RGBAVector(0.0f); for(uint32 j = 0; j < cluster.GetNumPoints(); j++) { @@ -1072,7 +1170,7 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto // If there are no points in this cluster, then it should // remain the same as last time and avoid a divide by zero. if(0 != numPts[i]) - newPts[i] /= float(numPts[i]); + newPts[i] /= static_cast(numPts[i]); } // If we haven't changed, then we're done. @@ -1088,7 +1186,7 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto } } - // If there's only one bucket filled, then just compress for that single color... + // If there's only one bucket filled, then just compress for that single color int numBucketsFilled = 0, lastFilledBucket = -1; for(uint32 i = 0; i < nBuckets; i++) { if(numPts[i] > 0) { @@ -1106,28 +1204,33 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto for(uint32 i = 0; i < cluster.GetNumPoints(); i++) { bestIndices[i] = 1; } - return cluster.GetNumPoints() * bestErr; } - // Now that we know the index of each pixel, we can assign the endpoints based on a least squares fit - // of the clusters. For more information, take a look at this article by NVidia: - // http://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf + // Now that we know the index of each pixel, we can assign the endpoints based + // on a least squares fit of the clusters. For more information, take a look + // at this article by NVidia: http://developer.download.nvidia.com/compute/ + // cuda/1.1-Beta/x86_website/projects/dxtc/doc/cuda_dxtc.pdf float asq = 0.0, bsq = 0.0, ab = 0.0; RGBAVector ax(0.0), bx(0.0); for(uint32 i = 0; i < nBuckets; i++) { - float a = float(nBuckets - 1 - i) / float(nBuckets - 1); - float b = float(i) / float(nBuckets - 1); + const float fbi = static_cast(nBuckets - 1 - i); + const float fb = static_cast(nBuckets - 1); + const float fi = static_cast(i); + const float fn = static_cast(n); + + float a = fbi / fb; + float b = fi / fb; int n = numPts[i]; RGBAVector x = pts[i]; - asq += float(n) * a * a; - bsq += float(n) * b * b; - ab += float(n) * a * b; + asq += fn * a * a; + bsq += fn * b * b; + ab += fn * a * b; - ax += x * a * float(n); - bx += x * b * float(n); + ax += x * a * fn; + bx += x * b * fn; } float f = 1.0f / (asq * bsq - ab * ab); @@ -1148,10 +1251,14 @@ double BC7CompressionMode::CompressCluster(const RGBACluster &cluster, RGBAVecto assert(bestPbitCombo >= 0); - return OptimizeEndpointsForCluster(cluster, p1, p2, bestIndices, bestPbitCombo); + return OptimizeEndpointsForCluster( + cluster, p1, p2, bestIndices, bestPbitCombo + ); } -double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters) { +double BC7CompressionMode::Compress( + BitStream &stream, const int shapeIdx, const RGBACluster *clusters +) { const int kModeNumber = GetModeNumber(); const int nPartitionBits = GetNumberOfPartitionBits(); @@ -1163,14 +1270,15 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const // Partition # assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx); stream.WriteBits(shapeIdx, nPartitionBits); - + RGBAVector p1[kMaxNumSubsets], p2[kMaxNumSubsets]; - int bestIndices[kMaxNumSubsets][kMaxNumDataPoints] = { - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } - }; - int bestAlphaIndices[kMaxNumDataPoints] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + + int bestIndices[kMaxNumSubsets][kMaxNumDataPoints]; + memset(bestIndices, 0xFF, sizeof(bestIndices)); + + int bestAlphaIndices[kMaxNumDataPoints]; + memset(bestAlphaIndices, 0xFF, sizeof(bestAlphaIndices)); + int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 }; int bestRotationMode = -1, bestIndexMode = -1; @@ -1195,7 +1303,10 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const SetIndexMode(idxMode); RGBAVector v1, v2; - double error = CompressCluster(clusters[cidx], v1, v2, indices, alphaIndices); + double error = CompressCluster( + clusters[cidx], v1, v2, indices, alphaIndices + ); + if(error < bestError) { bestError = error; @@ -1212,10 +1323,11 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const } totalErr += bestError; - } - else { + } else { // ! m_Attributes->hasRotation // Compress this cluster - totalErr += CompressCluster(clusters[cidx], p1[cidx], p2[cidx], indices, bestPbitCombo[cidx]); + totalErr += CompressCluster( + clusters[cidx], p1[cidx], p2[cidx], indices, bestPbitCombo[cidx] + ); // Map the indices to their proper position. int idx = 0; @@ -1247,25 +1359,25 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const // Get the quantization mask const uint32 qmask = GetQuantizationMask(); - //Quantize the points... + // Quantize the points... uint32 pixel1[kMaxNumSubsets], pixel2[kMaxNumSubsets]; for(int i = 0; i < nSubsets; i++) { switch(GetPBitType()) { default: - case ePBitType_None: - pixel1[i] = p1[i].ToPixel(qmask); - pixel2[i] = p2[i].ToPixel(qmask); + case ePBitType_None: + pixel1[i] = p1[i].ToPixel(qmask); + pixel2[i] = p2[i].ToPixel(qmask); break; - case ePBitType_Shared: - case ePBitType_NotShared: - pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); - pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); + case ePBitType_Shared: + case ePBitType_NotShared: + pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]); + pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]); break; } } - // If the anchor index does not have 0 in the leading bit, then + // If the anchor index does not have 0 in the leading bit, then // we need to swap EVERYTHING. for(int sidx = 0; sidx < nSubsets; sidx++) { @@ -1290,9 +1402,10 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const } } - if(m_Attributes->hasRotation && bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)) { - uint8 * bp1 = (uint8 *)(&pixel1[sidx]); - uint8 * bp2 = (uint8 *)(&pixel2[sidx]); + const bool rotated = bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1); + if(m_Attributes->hasRotation && rotated) { + uint8 * bp1 = static_cast(&pixel1[sidx]); + uint8 * bp2 = static_cast(&pixel2[sidx]); uint8 t = bp1[3]; bp1[3] = bp2[3]; bp2[3] = t; int nAlphaIndexVals = 1 << nAlphaIndexBits; @@ -1302,12 +1415,15 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const } assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1))); - assert(!m_Attributes->hasRotation || !(bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1))); + assert(!m_Attributes->hasRotation || + !(bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1))); } // Get the quantized values... - uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], b1[kMaxNumSubsets], a1[kMaxNumSubsets]; - uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], b2[kMaxNumSubsets], a2[kMaxNumSubsets]; + uint8 r1[kMaxNumSubsets], g1[kMaxNumSubsets], + b1[kMaxNumSubsets], a1[kMaxNumSubsets]; + uint8 r2[kMaxNumSubsets], g2[kMaxNumSubsets], + b2[kMaxNumSubsets], a2[kMaxNumSubsets]; for(int i = 0; i < nSubsets; i++) { r1[i] = pixel1[i] & 0xFF; r2[i] = pixel2[i] & 0xFF; @@ -1357,7 +1473,8 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const } } - // If our index mode has changed, then we need to write the alpha indices first. + // If our index mode has changed, then we need to write the alpha indices + // first. if(m_Attributes->hasIdxMode && bestIndexMode == 1) { assert(m_Attributes->hasRotation); @@ -1367,7 +1484,9 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0); assert(GetNumberOfBitsPerAlpha(bestIndexMode) == 2); assert(idx >= 0 && idx < (1 << 2)); - assert(i != 0 || !(idx >> 1) || !"Leading bit of anchor index is not zero!"); + assert(i != 0 || + !(idx >> 1) || + !"Leading bit of anchor index is not zero!"); stream.WriteBits(idx, (i == 0)? 1 : 2); } @@ -1377,18 +1496,21 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0); assert(GetNumberOfBitsPerIndex(bestIndexMode) == 3); assert(idx >= 0 && idx < (1 << 3)); - assert(i != 0 || !(idx >> 2) || !"Leading bit of anchor index is not zero!"); + assert(i != 0 || + !(idx >> 2) || + !"Leading bit of anchor index is not zero!"); stream.WriteBits(idx, (i == 0)? 2 : 3); } - } - else { + } else { for(int i = 0; i < 16; i++) { const int subs = GetSubsetForIndex(i, shapeIdx, nSubsets); const int idx = bestIndices[subs][i]; const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx, nSubsets); const int nBitsForIdx = GetNumberOfBitsPerIndex(bestIndexMode); assert(idx >= 0 && idx < (1 << nBitsForIdx)); - assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!"); + assert(i != anchorIdx || + !(idx >> (nBitsForIdx - 1)) || + !"Leading bit of anchor index is not zero!"); stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx); } @@ -1398,7 +1520,9 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const const int anchorIdx = 0; const int nBitsForIdx = GetNumberOfBitsPerAlpha(bestIndexMode); assert(idx >= 0 && idx < (1 << nBitsForIdx)); - assert(i != anchorIdx || !(idx >> (nBitsForIdx - 1)) || !"Leading bit of anchor index is not zero!"); + assert(i != anchorIdx || + !(idx >> (nBitsForIdx - 1)) || + !"Leading bit of anchor index is not zero!"); stream.WriteBits(idx, (i == anchorIdx)? nBitsForIdx - 1 : nBitsForIdx); } } @@ -1407,8 +1531,7 @@ double BC7CompressionMode::Compress(BitStream &stream, const int shapeIdx, const return totalErr; } -namespace BC7C -{ +namespace BC7C { static ErrorMetric gErrorMetric = eErrorMetric_Uniform; void SetErrorMetric(ErrorMetric e) { gErrorMetric = e; } @@ -1422,13 +1545,16 @@ namespace BC7C // Function prototypes static void CompressBC7Block(const uint32 *block, uint8 *outBuf); - static void CompressBC7Block(const uint32 *block, uint8 *outBuf, BlockStatManager &statManager); + static void CompressBC7Block( + const uint32 *block, uint8 *outBuf, BlockStatManager &statManager + ); static int gQualityLevel = 50; void SetQualityLevel(int q) { gQualityLevel = std::max(0, q); const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations; - BC7CompressionMode::MaxAnnealingIterations = std::min(kMaxIters, GetQualityLevel()); + BC7CompressionMode::MaxAnnealingIterations = + std::min(kMaxIters, GetQualityLevel()); } int GetQualityLevel() { return gQualityLevel; } @@ -1454,8 +1580,8 @@ namespace BC7C // Compresses a single color optimally and outputs the result. static void CompressOptimalColorBC7(uint32 pixel, BitStream &stream) { - stream.WriteBits(1 << 5, 6); // Mode 5 - stream.WriteBits(0, 2); // No rotation bits. + stream.WriteBits(1 << 5, 6); // Mode 5 + stream.WriteBits(0, 2); // No rotation bits. uint8 r = pixel & 0xFF; uint8 g = (pixel >> 8) & 0xFF; @@ -1477,7 +1603,7 @@ namespace BC7C // Alpha endpoints... are just the same. stream.WriteBits(a, 8); stream.WriteBits(a, 8); - + // Color indices are 1 for each pixel... // Anchor index is 0, so 1 bit for the first pixel, then // 01 for each following pixel giving the sequence of 31 bits: @@ -1485,31 +1611,29 @@ namespace BC7C stream.WriteBits(0xaaaaaaab, 31); // Alpha indices... - stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); + stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); } static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]); - // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in - // 4-byte RGBA format. The width and height parameters specify the size of the image in pixels. - // The buffer pointed to by outBuf should be large enough to store the compressed image. This - // implementation has an 4:1 compression ratio. - void Compress(const CompressionJob &cj) - { + // Compress an image using BC7 compression. Use the inBuf parameter to point + // to an image in 4-byte RGBA format. The width and height parameters specify + // the size of the image in pixels. The buffer pointed to by outBuf should be + // large enough to store the compressed image. This implementation has an 4:1 + // compression ratio. + void Compress(const CompressionJob &cj) { const unsigned char *inBuf = cj.inBuf; unsigned char *outBuf = cj.outBuf; - for(uint32 j = 0; j < cj.height; j += 4) - { - for(uint32 i = 0; i < cj.width; i += 4) - { - // ExtractBlock(inBuf + i * 4, width, block); + for(uint32 j = 0; j < cj.height; j += 4) { + for(uint32 i = 0; i < cj.width; i += 4) { + CompressBC7Block((const uint32 *)inBuf, outBuf); #ifndef NDEBUG - uint8 *block = (uint8 *)outBuf; + uint8 *block = static_cast(outBuf); uint32 unComp[16]; DecompressBC7Block(block, unComp); - uint8* unCompData = (uint8 *)unComp; + uint8* unCompData = static_cast(unComp); double diffSum = 0.0; for(int k = 0; k < 64; k+=4) { @@ -1517,12 +1641,15 @@ namespace BC7C double gdiff = sad(unCompData[k+1], inBuf[k+1]); double bdiff = sad(unCompData[k+2], inBuf[k+2]); double adiff = sad(unCompData[k+3], inBuf[k+3]); - double avga = ((float(unCompData[k+3]) + float(inBuf[k+3]))*0.5)/255.0; + const float asrc = static_cast(inBuf[k+3]); + const float adst = static_cast(unCompData[k+3]); + double avga = ((asrc + adst)*0.5)/255.0; diffSum += (rdiff + gdiff + bdiff + adiff) * avga; } - double blockError = double(diffSum) / 64.0; + double blockError = static_cast(diffSum) / 64.0; if(blockError > 5.0) { - fprintf(stderr, "WARNING: Block error very high at <%d, %d>: (%.2f)\n", i, j, blockError); + fprintf(stderr, "WARNING: Block error very high" + " at <%d, %d>: (%.2f)\n", i, j, blockError); } #endif @@ -1553,19 +1680,19 @@ namespace BC7C // Variables used for synchronization in threadsafe implementation. void CompressAtomic(CompressionJobList &cjl) { - uint32 jobIdx; while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) { - // !HACK! ... Microsoft has this defined #undef GetJob const CompressionJob *cj = cjl.GetJob(jobIdx); const uint32 nBlocks = (cj->height * cj->width) / 16; - // Help finish whatever texture we're compressing before we start again on my work... + // Help finish whatever texture we're compressing before we start again on + // my work... uint32 blockIdx; - while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks && *(cjl.GetFinishedFlag(jobIdx)) == 0) { + while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks && + *(cjl.GetFinishedFlag(jobIdx)) == 0) { unsigned char *out = cj->outBuf + (16 * blockIdx); const unsigned char *in = cj->inBuf + (64 * blockIdx); @@ -1578,10 +1705,10 @@ namespace BC7C } // Wait until this texture finishes. - while(cjl.m_CurrentJobIndex == jobIdx); + while(cjl.m_CurrentJobIndex == jobIdx) { } } } -#endif // HAS_ATOMICS +#endif // HAS_ATOMICS void CompressWithStats( const CompressionJob &cj, @@ -1589,26 +1716,26 @@ namespace BC7C ) { const unsigned char *inBuf = cj.inBuf; unsigned char *outBuf = cj.outBuf; - for(uint32 j = 0; j < cj.height; j += 4) - { - for(uint32 i = 0; i < cj.width; i += 4) - { - // ExtractBlock(inBuf + i * 4, width, block); + + for(uint32 j = 0; j < cj.height; j += 4) { + for(uint32 i = 0; i < cj.width; i += 4) { + CompressBC7Block((const uint32 *)inBuf, outBuf, statManager); #ifndef NDEBUG - uint8 *block = (uint8 *)outBuf; + uint8 *block = static_cast(outBuf); uint32 unComp[16]; DecompressBC7Block(block, unComp); - uint8* unCompData = (uint8 *)unComp; + uint8* unCompData = static_cast(unComp); int diffSum = 0; for(int i = 0; i < 64; i++) { diffSum += sad(unCompData[i], inBuf[i]); } - double blockError = double(diffSum) / 64.0; + double blockError = static_cast(diffSum) / 64.0; if(blockError > 50.0) { - fprintf(stderr, "WARNING: Block error very high (%.2f)\n", blockError); + fprintf(stderr, "WARNING: Block error very high" + " (%.2f)\n", blockError); } #endif @@ -1619,19 +1746,18 @@ namespace BC7C } static double CompressTwoClusters( - int shapeIdx, - const RGBACluster *clusters, - uint8 *outBuf, - bool opaque, + int shapeIdx, + const RGBACluster *clusters, + uint8 *outBuf, + bool opaque, double *errors = NULL, int *modeChosen = NULL ) { uint8 tempBuf1[16]; BitStream tmpStream1(tempBuf1, 128, 0); - BC7CompressionMode compressor1(1, opaque); - - double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters); + double bestError = + BC7CompressionMode(1, opaque).Compress(tmpStream1, shapeIdx, clusters); if(errors) errors[1] = bestError; if(modeChosen) *modeChosen = 1; @@ -1643,9 +1769,10 @@ namespace BC7C uint8 tempBuf3[16]; BitStream tmpStream3(tempBuf3, 128, 0); - BC7CompressionMode compressor3(3, opaque); - double error = compressor3.Compress(tmpStream3, shapeIdx, clusters); + double error = + BC7CompressionMode(3, opaque).Compress(tmpStream3, shapeIdx, clusters); + if(errors) errors[3] = error; if(error < bestError) { if(modeChosen) *modeChosen = 3; @@ -1655,18 +1782,20 @@ namespace BC7C return 0.0; } } - - // Mode 3 offers more precision for RGB data. Mode 7 is really only if we have alpha. - if(!opaque) - { + + // Mode 3 offers more precision for RGB data. Mode 7 is really only if we + // have alpha. + if(!opaque) { uint8 tempBuf7[16]; BitStream tmpStream7(tempBuf7, 128, 0); - BC7CompressionMode compressor7(7, opaque); - error = compressor7.Compress(tmpStream7, shapeIdx, clusters); + + error = + BC7CompressionMode(7, opaque).Compress(tmpStream7, shapeIdx, clusters); + if(errors) errors[7] = error; if(error < bestError) { - if(modeChosen) *modeChosen = 7; - memcpy(outBuf, tempBuf7, 16); + if(modeChosen) *modeChosen = 7; + memcpy(outBuf, tempBuf7, 16); return error; } } @@ -1675,10 +1804,10 @@ namespace BC7C } static double CompressThreeClusters( - int shapeIdx, - const RGBACluster *clusters, - uint8 *outBuf, - bool opaque, + int shapeIdx, + const RGBACluster *clusters, + uint8 *outBuf, + bool opaque, double *errors = NULL, int *modeChosen = NULL ) { @@ -1688,26 +1817,25 @@ namespace BC7C uint8 tempBuf2[16]; BitStream tmpStream2(tempBuf2, 128, 0); - BC7CompressionMode compressor0(0, opaque); - BC7CompressionMode compressor2(2, opaque); - - double error, bestError; + double error, bestError = DBL_MAX;; if(shapeIdx < 16) { - bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters); + bestError = + BC7CompressionMode(0, opaque).Compress(tmpStream0, shapeIdx, clusters); + if(errors) errors[0] = bestError; - } - else { - bestError = DBL_MAX; + } else { if(errors) errors[0] = -1.0; } - + if(modeChosen) *modeChosen = 0; memcpy(outBuf, tempBuf0, 16); if(bestError == 0.0) { return 0.0; } - error = compressor2.Compress(tmpStream2, shapeIdx, clusters); + error = + BC7CompressionMode(2, opaque).Compress(tmpStream2, shapeIdx, clusters); + if(errors) errors[2] = error; if(error < bestError) { if(modeChosen) *modeChosen = 2; @@ -1718,8 +1846,10 @@ namespace BC7C return bestError; } - static void PopulateTwoClustersForShape(const RGBACluster &points, int shapeIdx, RGBACluster *clusters) { - const uint16 shape = kShapeMask2[shapeIdx]; + static void PopulateTwoClustersForShape( + const RGBACluster &points, int shapeIdx, RGBACluster *clusters + ) { + const uint16 shape = kShapeMask2[shapeIdx]; for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) { const RGBAVector &p = points.GetPoint(pt); @@ -1730,29 +1860,42 @@ namespace BC7C clusters[0].AddPoint(p); } - assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); - assert((clusters[0].GetPointBitString() ^ clusters[1].GetPointBitString()) == 0xFFFF); - assert((shape & clusters[1].GetPointBitString()) == shape); +#ifndef NDEBUG + const uint32 pbs1 = clusters[0].GetPointBitString(); + const uint32 pbs2 = clusters[1].GetPointBitString(); + assert(!(pbs1 & pbs2)); + assert((pbs1 ^ pbs2) == 0xFFFF); + assert((shape & pbs2) == shape); +#endif } - static void PopulateThreeClustersForShape(const RGBACluster &points, int shapeIdx, RGBACluster *clusters) { + static void PopulateThreeClustersForShape( + const RGBACluster &points, int shapeIdx, RGBACluster *clusters + ) { for(uint32 pt = 0; pt < kMaxNumDataPoints; pt++) { const RGBAVector &p = points.GetPoint(pt); if((1 << pt) & kShapeMask3[shapeIdx][0]) { - if((1 << pt) & kShapeMask3[shapeIdx][1]) + if((1 << pt) & kShapeMask3[shapeIdx][1]) { clusters[2].AddPoint(p); - else + } else { clusters[1].AddPoint(p); - } - else + } + } else { clusters[0].AddPoint(p); + } } - assert(!(clusters[0].GetPointBitString() & clusters[1].GetPointBitString())); - assert(!(clusters[2].GetPointBitString() & clusters[1].GetPointBitString())); - assert(!(clusters[0].GetPointBitString() & clusters[2].GetPointBitString())); +#ifndef NDEBUG + const uint32 pbs1 = clusters[0].GetPointBitString(); + const uint32 pbs2 = clusters[1].GetPointBitString(); + const uint32 pbs3 = clusters[2].GetPointBitString(); + + assert(!(pbs1 & pbs2)); + assert(!(pbs3 & pbs2)); + assert(!(pbs3 & pbs1)); +#endif } static double EstimateTwoClusterError(RGBACluster &c) { @@ -1771,12 +1914,12 @@ namespace BC7C double eigTwo = c.GetSecondEigenvalue(); if(eigOne != 0.0) { error += eigTwo / eigOne; - } - else { + } else { error += 1.0; } #else - error += c.QuantizedError(Min, Max, 8, 0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3])); + error += c.QuantizedError(Min, Max, 8, + 0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3])); #endif return error; } @@ -1798,18 +1941,17 @@ namespace BC7C if(eigOne != 0.0) { error += eigTwo / eigOne; - } - else { + } else { error += 1.0; } #else - error += c.QuantizedError(Min, Max, 4, 0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3])); + error += c.QuantizedError(Min, Max, 4, + 0xFFFFFFFF, RGBAVector(w[0], w[1], w[2], w[3])); #endif return error; } static void CompressBC7Block(const uint32 *block, uint8 *outBuf) { - // All a single color? if(AllOneColor(block)) { BitStream bStrm(outBuf, 128, 0); @@ -1844,8 +1986,7 @@ namespace BC7C int bestShapeIdx[2] = { -1, -1 }; RGBACluster bestClusters[2][3]; - for(unsigned int i = 0; i < kNumShapes2; i++) - { + for(unsigned int i = 0; i < kNumShapes2; i++) { RGBACluster clusters[2]; PopulateTwoClustersForShape(blockCluster, i, clusters); @@ -1859,7 +2000,7 @@ namespace BC7C CompressTwoClusters(i, clusters, outBuf, opaque); return; } - + if(err < bestError[0]) { bestError[0] = err; bestShapeIdx[0] = i; @@ -1896,7 +2037,7 @@ namespace BC7C } } } - + uint8 tempBuf1[16], tempBuf2[16]; BitStream tempStream1 (tempBuf1, 128, 0); @@ -1922,30 +2063,33 @@ namespace BC7C if(best == 0.0f) { memcpy(outBuf, tempBuf2, 16); return; - } - else { + } else { memcpy(tempBuf1, tempBuf2, 16); } } } } - double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque); + double error = + CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque); if(error < best) { best = error; if(error == 0.0f) { memcpy(outBuf, tempBuf2, 16); return; - } - else { + } else { memcpy(tempBuf1, tempBuf2, 16); } } if(opaque) { - if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) { - + const newError = + CompressThreeClusters(bestShapeIdx[1], + bestClusters[1], + tempBuf2, + opaque); + if(newError < best) { memcpy(outBuf, tempBuf2, 16); return; } @@ -1954,7 +2098,9 @@ namespace BC7C memcpy(outBuf, tempBuf1, 16); } - static double EstimateTwoClusterErrorStats(RGBACluster &c, double (&estimates)[2]) { + static double EstimateTwoClusterErrorStats( + RGBACluster &c, double (&estimates)[2] + ) { RGBAVector Min, Max, v; c.GetBoundingBox(Min, Max); v = Max - Min; @@ -1965,17 +2111,25 @@ namespace BC7C const float *w = BC7C::GetErrorMetric(); - const double err1 = c.QuantizedError(Min, Max, 8, 0xFFFCFCFC, RGBAVector(w[0], w[1], w[2], w[3])); - if(err1 >= 0.0) - estimates[0] = err1; - else - estimates[0] = std::min(estimates[0], err1); + const double err1 = c.QuantizedError( + Min, Max, 8, 0xFFFCFCFC, RGBAVector(w[0], w[1], w[2], w[3]) + ); - const double err3 = c.QuantizedError(Min, Max, 8, 0xFFFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])); - if(err3 >= 0.0) + if(err1 >= 0.0) { + estimates[0] = err1; + } else { + estimates[0] = std::min(estimates[0], err1); + } + + const double err3 = c.QuantizedError( + Min, Max, 8, 0xFFFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]) + ); + + if(err3 >= 0.0) { estimates[1] = err3; - else + } else { estimates[1] = std::min(estimates[1], err3); + } double error = 0.0001; #ifdef USE_PCA_FOR_SHAPE_ESTIMATION @@ -1983,8 +2137,7 @@ namespace BC7C double eigTwo = c.GetSecondEigenvalue(); if(eigOne != 0.0) { error += eigTwo / eigOne; - } - else { + } else { error += 1.0; } #else @@ -1993,7 +2146,9 @@ namespace BC7C return error; } - static double EstimateThreeClusterErrorStats(RGBACluster &c, double (&estimates)[2]) { + static double EstimateThreeClusterErrorStats( + RGBACluster &c, double (&estimates)[2] + ) { RGBAVector Min, Max, v; c.GetBoundingBox(Min, Max); v = Max - Min; @@ -2003,28 +2158,34 @@ namespace BC7C } const float *w = BC7C::GetErrorMetric(); - const double err0 = 0.0001 + c.QuantizedError(Min, Max, 4, 0xFFF0F0F0, RGBAVector(w[0], w[1], w[2], w[3])); - if(err0 >= 0.0) - estimates[0] = err0; - else - estimates[0] = std::min(estimates[0], err0); + const double err0 = 0.0001 + c.QuantizedError( + Min, Max, 4, 0xFFF0F0F0, RGBAVector(w[0], w[1], w[2], w[3]) + ); - const double err2 = 0.0001 + c.QuantizedError(Min, Max, 4, 0xFFF8F8F8, RGBAVector(w[0], w[1], w[2], w[3])); - if(err2 >= 0.0) + if(err0 >= 0.0) { + estimates[0] = err0; + } else { + estimates[0] = std::min(estimates[0], err0); + } + + const double err2 = 0.0001 + c.QuantizedError( + Min, Max, 4, 0xFFF8F8F8, RGBAVector(w[0], w[1], w[2], w[3]) + ); + + if(err2 >= 0.0) { estimates[1] = err2; - else + } else { estimates[1] = std::min(estimates[1], err2); + } double error = 0.0001; #ifdef USE_PCA_FOR_SHAPE_ESTIMATION double eigOne = c.GetPrincipalEigenvalue(); double eigTwo = c.GetSecondEigenvalue(); - // printf("EigOne: %08.3f\tEigTwo: %08.3f\n", eigOne, eigTwo); if(eigOne != 0.0) { error += eigTwo / eigOne; - } - else { + } else { error += 1.0; } #else @@ -2043,7 +2204,9 @@ namespace BC7C } // Compress a single block but collect statistics as well... - static void CompressBC7Block(const uint32 *block, uint8 *outBuf, BlockStatManager &statManager) { + static void CompressBC7Block( + const uint32 *block, uint8 *outBuf, BlockStatManager &statManager + ) { class RAIIStatSaver { private: @@ -2055,8 +2218,9 @@ namespace BC7C double *m_Errors; public: - RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) - , m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { } + RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) + : m_BlockIdx(blockIdx), m_BSM(m) + , m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { } void SetMode(int *modePtr) { m_ModePtr = modePtr; } void SetEstimates(double *estimates) { m_Estimates = estimates; } void SetErrors(double *errors) { m_Errors = errors; } @@ -2071,11 +2235,16 @@ namespace BC7C m_BSM.AddStat(m_BlockIdx, s); for(uint32 i = 0; i < BC7CompressionMode::kNumModes; i++) { - s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]); + + s = BlockStat( + kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i] + ); m_BSM.AddStat(m_BlockIdx, s); - s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]); - m_BSM.AddStat(m_BlockIdx, s); + s = BlockStat( + kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i] + ); + m_BSM.AddStat(m_BlockIdx, s); } } }; @@ -2086,7 +2255,7 @@ namespace BC7C // reset global variables... bestMode = 0; - for(uint32 i = 0; i < BC7CompressionMode::kNumModes; i++){ + for(uint32 i = 0; i < BC7CompressionMode::kNumModes; i++) { modeError[i] = modeEstimate[i] = -1.0; } @@ -2105,7 +2274,7 @@ namespace BC7C BitStream bStrm(outBuf, 128, 0); CompressOptimalColorBC7(*block, bStrm); bestMode = 5; - + BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0); statManager.AddStat(blockIdx, s); @@ -2119,11 +2288,13 @@ namespace BC7C for(uint32 i = 0; i < kMaxNumDataPoints; i++) { RGBAVector p = RGBAVector(i, block[i]); blockCluster.AddPoint(p); - if(fabs(p.a - 255.0f) > 1e-10) + if(fabs(p.a - 255.0f) > 1e-10) { opaque = false; + } - if(p.a > 0.0f) + if(p.a > 0.0f) { transparent = false; + } } // The whole block is transparent? @@ -2146,10 +2317,11 @@ namespace BC7C v = Max - Min; if(v * v == 0) { modeEstimate[6] = 0.0; - } - else { + } else { const float *w = GetErrorMetric(); - const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])); + const double err = 0.0001 + blockCluster.QuantizedError( + Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]) + ); UpdateErrorEstimate(modeEstimate, 6, err); #ifdef USE_PCA_FOR_SHAPE_ESTIMATION @@ -2158,8 +2330,7 @@ namespace BC7C double error; if(eigOne != 0.0) { error = eigTwo / eigOne; - } - else { + } else { error = 1.0; } @@ -2175,8 +2346,7 @@ namespace BC7C int bestShapeIdx[2] = { -1, -1 }; RGBACluster bestClusters[2][3]; - for(unsigned int i = 0; i < kNumShapes2; i++) - { + for(unsigned int i = 0; i < kNumShapes2; i++) { RGBACluster clusters[2]; PopulateTwoClustersForShape(blockCluster, i, clusters); @@ -2190,8 +2360,7 @@ namespace BC7C if(shapeEstimate[ei] >= 0.0) { if(errEstimate[ei] == -1.0) { errEstimate[ei] = shapeEstimate[ei]; - } - else { + } else { errEstimate[ei] += shapeEstimate[ei]; } } @@ -2211,21 +2380,25 @@ namespace BC7C } if(err < bestError[0]) { - BlockStat s = BlockStat(kBlockStatString[eBlockStat_TwoShapeEstimate], err); - statManager.AddStat(blockIdx, s); + BlockStat s = BlockStat( + kBlockStatString[eBlockStat_TwoShapeEstimate], err + ); + statManager.AddStat(blockIdx, s); } // If it's small, we'll take it! if(err < 1e-9) { int modeChosen; - CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen); + CompressTwoClusters( + i, clusters, outBuf, opaque, modeError, &modeChosen + ); bestMode = modeChosen; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); statManager.AddStat(blockIdx, s); return; } - + if(err < bestError[0]) { bestError[0] = err; bestShapeIdx[0] = i; @@ -2252,8 +2425,7 @@ namespace BC7C if(shapeEstimate[ei] >= 0.0) { if(errEstimate[ei] == -1.0) { errEstimate[ei] = shapeEstimate[ei]; - } - else { + } else { errEstimate[ei] += shapeEstimate[ei]; } } @@ -2267,20 +2439,24 @@ namespace BC7C if(errEstimate[0] != -1.0) { UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]); } - + if(errEstimate[1] != -1.0) { UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]); } if(err < bestError[1]) { - BlockStat s = BlockStat(kBlockStatString[eBlockStat_ThreeShapeEstimate], err); - statManager.AddStat(blockIdx, s); + BlockStat s = BlockStat( + kBlockStatString[eBlockStat_ThreeShapeEstimate], err + ); + statManager.AddStat(blockIdx, s); } // If it's small, we'll take it! if(err < 1e-9) { int modeChosen; - CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen); + CompressThreeClusters( + i, clusters, outBuf, opaque, modeError, &modeChosen + ); bestMode = modeChosen; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); @@ -2298,7 +2474,7 @@ namespace BC7C } } } - + BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 3); statManager.AddStat(blockIdx, s); @@ -2306,7 +2482,7 @@ namespace BC7C BitStream tempStream1 (tempBuf1, 128, 0); BC7CompressionMode compressor(6, opaque); - double best = compressor.Compress(tempStream1, 0, &blockCluster); + double best = compressor.Compress(tempStream1, 0, &blockCluster); modeError[6] = best; bestMode = 6; if(best == 0.0f) { @@ -2330,8 +2506,7 @@ namespace BC7C if(best == 0.0f) { memcpy(outBuf, tempBuf2, 16); return; - } - else { + } else { memcpy(tempBuf1, tempBuf2, 16); } } @@ -2339,23 +2514,28 @@ namespace BC7C } int modeChosen; - double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen); + double error = CompressTwoClusters( + bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen + ); if(error < best) { bestMode = modeChosen; best = error; - + if(error == 0.0f) { memcpy(outBuf, tempBuf2, 16); return; - } - else { + } else { memcpy(tempBuf1, tempBuf2, 16); } } if(opaque) { - if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) { + const double newError = CompressThreeClusters( + bestShapeIdx[1], bestClusters[1], + tempBuf2, opaque, modeError, &modeChosen + ); + if(newError < best) { bestMode = modeChosen; memcpy(outBuf, tempBuf2, 16); @@ -2369,13 +2549,14 @@ namespace BC7C static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]) { BitStreamReadOnly strm(block); - + uint32 mode = 0; while(!strm.ReadBit()) { mode++; } - const BC7CompressionMode::Attributes *attrs = BC7CompressionMode::GetAttributesForMode(mode); + const BC7CompressionMode::Attributes *attrs = + BC7CompressionMode::GetAttributesForMode(mode); const uint32 nSubsets = attrs->numSubsets; uint32 idxMode = 0; @@ -2383,16 +2564,16 @@ namespace BC7C uint32 shapeIdx = 0; if ( nSubsets > 1 ) { shapeIdx = strm.ReadBits(mode == 0? 4 : 6); - } - else if( attrs->hasRotation ) { + } else if( attrs->hasRotation ) { rotMode = strm.ReadBits(2); - if( attrs->hasIdxMode ) + if( attrs->hasIdxMode ) { idxMode = strm.ReadBit(); + } } assert(idxMode < 2); assert(rotMode < 4); - assert(shapeIdx < uint32((mode == 0)? 16 : 64)); + assert(shapeIdx < ((mode == 0)? 16 : 64)); uint32 cp = attrs->colorChannelPrecision; const uint32 shift = 8 - cp; @@ -2400,7 +2581,7 @@ namespace BC7C uint8 eps[3][2][4]; for(uint32 ch = 0; ch < 3; ch++) for(uint32 i = 0; i < nSubsets; i++) - for(uint32 ep = 0; ep < 2; ep++) + for(uint32 ep = 0; ep < 2; ep++) eps[i][ep][ch] = strm.ReadBits(cp) << shift; uint32 ap = attrs->alphaChannelPrecision; @@ -2408,12 +2589,11 @@ namespace BC7C if(ap == 0) { for(uint32 i = 0; i < nSubsets; i++) - for(uint32 ep = 0; ep < 2; ep++) + for(uint32 ep = 0; ep < 2; ep++) eps[i][ep][3] = 0xFF; - } - else { + } else { for(uint32 i = 0; i < nSubsets; i++) - for(uint32 ep = 0; ep < 2; ep++) + for(uint32 ep = 0; ep < 2; ep++) eps[i][ep][3] = strm.ReadBits(ap) << ash; } @@ -2441,10 +2621,10 @@ namespace BC7C break; case BC7CompressionMode::ePBitType_NotShared: - + cp += 1; ap += 1; - + for(uint32 i = 0; i < nSubsets; i++) for(uint32 j = 0; j < 2; j++) { @@ -2463,7 +2643,7 @@ namespace BC7C for(uint32 j = 0; j < 2; j++) for(uint32 ch = 0; ch < kNumColorChannels; ch++) { const uint32 prec = ch == 3? ap : cp; - eps[i][j][ch] |= eps[i][j][ch] >> prec; + eps[i][j][ch] |= eps[i][j][ch] >> prec; } // Figure out indices... @@ -2480,8 +2660,7 @@ namespace BC7C int idx = 0; if(GetAnchorIndexForSubset(subset, shapeIdx, nSubsets) == i) { idx = strm.ReadBits(idxPrec - 1); - } - else { + } else { idx = strm.ReadBits(idxPrec); } colorIndices[i] = idx; @@ -2490,16 +2669,14 @@ namespace BC7C idxPrec = attrs->numBitsPerAlpha; if(idxPrec == 0) { memcpy(alphaIndices, colorIndices, sizeof(alphaIndices)); - } - else { + } else { for(uint32 i = 0; i < kMaxNumDataPoints; i++) { uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets); int idx = 0; if(GetAnchorIndexForSubset(subset, shapeIdx, nSubsets) == i) { idx = strm.ReadBits(idxPrec - 1); - } - else { + } else { idx = strm.ReadBits(idxPrec); } alphaIndices[i] = idx; @@ -2521,27 +2698,35 @@ namespace BC7C const uint32 subset = GetSubsetForIndex(i, shapeIdx, nSubsets); uint32 &pixel = outBuf[i]; - + pixel = 0; for(int ch = 0; ch < 4; ch++) { if(ch == 3 && nBitsPerAlpha > 0) { - uint32 i0 = kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][0]; - uint32 i1 = kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][1]; + uint32 i0 = + kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][0]; + uint32 i1 = + kBC7InterpolationValues[nBitsPerAlpha - 1][alphaIndices[i]][1]; - const uint8 ip = (((uint32(eps[subset][0][3]) * i0) + (uint32(eps[subset][1][3]) * i1) + 32) >> 6) & 0xFF; + const uint32 ep1 = static_cast(eps[subset][0][3]); + const uint32 ep2 = static_cast(eps[subset][1][3]); + const uint8 ip = ((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF; pixel |= ip << 24; - } - else { - uint32 i0 = kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][0]; - uint32 i1 = kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][1]; - const uint8 ip = (((uint32(eps[subset][0][ch]) * i0) + (uint32(eps[subset][1][ch]) * i1) + 32) >> 6) & 0xFF; + } else { + uint32 i0 = + kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][0]; + uint32 i1 = + kBC7InterpolationValues[nBitsPerColor - 1][colorIndices[i]][1]; + + const uint32 ep1 = static_cast(eps[subset][0][ch]); + const uint32 ep2 = static_cast(eps[subset][1][ch]); + const uint8 ip = ((ep1 * i0 + ep2 * i1) + 32) >> 6) & 0xFF; pixel |= ip << (8*ch); } } // Swap colors if necessary... - uint8 *pb = (uint8 *)&pixel; + uint8 *pb = static_cast(&pixel); switch(rotMode) { default: case 0: @@ -2568,21 +2753,16 @@ namespace BC7C unsigned char *outBuf = dj.outBuf; unsigned int blockIdx = 0; - // for(unsigned int j = 0; j < height; j += 4, outBuf += width * 3 * 4) - for(unsigned int j = 0; j < dj.height; j += 4) - { - for(unsigned int i = 0; i < dj.width; i += 4) - { + + for(unsigned int j = 0; j < dj.height; j += 4) { + for(unsigned int i = 0; i < dj.width; i += 4) { + uint32 pixels[16]; DecompressBC7Block(dj.inBuf + (16*(blockIdx++)), pixels); - memcpy(outBuf, pixels, 16 * sizeof(uint32)); - //memcpy(outBuf + (width * 4), pixels + 4, 4 * sizeof(uint32)); - //memcpy(outBuf + 2*(width * 4), pixels + 8, 4 * sizeof(uint32)); - //memcpy(outBuf + 3*(width * 4), pixels + 12, 4 * sizeof(uint32)); - //outBuf += 16; + memcpy(outBuf, pixels, sizeof(pixels)); outBuf += 64; } } } -} +} // namespace BC7C