From c6d7bdc670b752fe3914a05714598ed82f381b1c Mon Sep 17 00:00:00 2001 From: Pavel Krajcevski Date: Tue, 24 Sep 2013 20:35:36 -0400 Subject: [PATCH] Very preliminary compressor --- PVRTCEncoder/src/Compressor.cpp | 221 +++++++++++++++++++++++++++++++- 1 file changed, 217 insertions(+), 4 deletions(-) diff --git a/PVRTCEncoder/src/Compressor.cpp b/PVRTCEncoder/src/Compressor.cpp index b26c51e..c985f61 100644 --- a/PVRTCEncoder/src/Compressor.cpp +++ b/PVRTCEncoder/src/Compressor.cpp @@ -52,11 +52,85 @@ #include "PVRTCCompressor.h" +#include +#include +#include +#include + #include "Pixel.h" #include "Image.h" +#include "Block.h" namespace PVRTCC { + static uint32 Interleave(uint16 inx, uint16 iny) { + // Taken from: + // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN + + static const uint32 B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF}; + static const uint32 S[] = {1, 2, 4, 8}; + + uint32 x = static_cast(inx); + uint32 y = static_cast(iny); + + x = (x | (x << S[3])) & B[3]; + x = (x | (x << S[2])) & B[2]; + x = (x | (x << S[1])) & B[1]; + x = (x | (x << S[0])) & B[0]; + + y = (y | (y << S[3])) & B[3]; + y = (y | (y << S[2])) & B[2]; + y = (y | (y << S[1])) & B[1]; + y = (y | (y << S[0])) & B[0]; + + return x | (y << 1); + } + + template + static T Clamp(const T &v, const T &low, const T &high) { + return ::std::min(::std::max(low, v), high); + } + + template + static T Lookup(const T *vals, + uint32 x, uint32 y, + uint32 width, uint32 height, + const EWrapMode wrapMode) { + while(x >= width) { + if(wrapMode == eWrapMode_Wrap) { + x -= width; + } else { + x = width - 1; + } + } + + while(x < 0) { + if(wrapMode == eWrapMode_Wrap) { + x += width; + } else { + x = 0; + } + } + + while(y >= height) { + if(wrapMode == eWrapMode_Wrap) { + y -= height; + } else { + y = height - 1; + } + } + + while(y < 0) { + if(wrapMode == eWrapMode_Wrap) { + y += height; + } else { + y = 0; + } + } + + return vals[y * width + x]; + } + void Compress(const CompressionJob &dcj, bool bTwoBitMode, const EWrapMode wrapMode) { @@ -83,9 +157,7 @@ namespace PVRTCC { // image features, then reupscale and compute deltas. Use deltas to generate // initial A & B images followed by modulation data. img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, true); - img.DebugOutput("DownscaledOnce"); img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, false); - img.DebugOutput("DownscaledTwice"); Image downscaled = img; @@ -95,14 +167,155 @@ namespace PVRTCC { img.DebugOutput("Reconstruction"); // Compute difference... - Image difference = img; + int16 difference[dcj.height * dcj.width * 4]; for(uint32 j = 0; j < dcj.height; j++) { for(uint32 i = 0; i < dcj.width; i++) { for(uint32 c = 0; c < 4; c++) { - difference(i, j).Component(c) -= img(i, j).Component(c); + int16 o = original(i, j).Component(c); + int16 n = img(i, j).Component(c); + difference[j*dcj.width*4 + i*4 + c] = o - n; } } } + + // Go over the 7x7 texel blocks and extract bounding box diagonals for each + // block. We should be able to choose which diagonal we want... + const uint32 kKernelSz = 7; + int16 maxDiff[dcj.height * dcj.width / 4]; + int16 minDiff[dcj.height * dcj.width / 4]; + for(uint32 j = 2; j < dcj.height; j += 4) { + for(uint32 i = 2; i < dcj.width; i += 4) { + const uint32 startX = i - (kKernelSz / 2); + const uint32 startY = j - (kKernelSz / 2); + for(uint32 c = 0; c < 4; c++) { + int32 pos = 0; + int32 neg = 0; + for(uint32 y = startY; y < startY + kKernelSz; y++) { + for(uint32 x = startX; x < startX + kKernelSz; x++) { + int16 val = Lookup(difference, x*4 + c, y, dcj.width*4, dcj.height, wrapMode); + if(val > 0) { + pos += val; + } else { + neg += val; + } + } + } + + uint32 blockIdx = ((j-2)/4) * dcj.width + (i-2) + c; + assert(blockIdx < (dcj.width * dcj.height) / 4); + if(pos > -neg) { + maxDiff[blockIdx] = pos; + minDiff[blockIdx] = 0; + } else { + maxDiff[blockIdx] = 0; + minDiff[blockIdx] = neg; + } + } + } + } + + // Add maxDiff to image to get high signal, and lowdiff to image to + // get low signal... + Image imgA = downscaled; + Image imgB = downscaled; + + for(uint32 j = 0; j < dcj.height / 4; j++) { + for(uint32 i = 0; i < dcj.width / 4; i++) { + for(uint32 c = 0; c < 4; c++) { + uint8 &a = imgA(i, j).Component(c); + a = Clamp(a + maxDiff[j*dcj.width/4 + i*4 + c], 0, 255); + + uint8 &b = imgB(i, j).Component(c); + b = Clamp(b + minDiff[j*dcj.width/4 + i*4 + c], 0, 255); + } + } + } + + imgA.DebugOutput("ImageA"); + imgB.DebugOutput("ImageB"); + + // Determine modulation values... + Image upA = imgA; + Image upB = imgB; + + upA.BilinearUpscale(2, 2, wrapMode); + upB.BilinearUpscale(2, 2, wrapMode); + + assert(upA.GetHeight() == dcj.height && upA.GetWidth() == dcj.width); + assert(upB.GetHeight() == dcj.height && upB.GetWidth() == dcj.width); + + upA.DebugOutput("UpscaledA"); + upB.DebugOutput("UpscaledB"); + + // Choose the most appropriate modulation values for the two images... + std::vector modValues; + modValues.reserve(dcj.width * dcj.height); + for(uint32 j = 0; j < dcj.height; j++) { + for(uint32 i = 0; i < dcj.width; i++) { + uint8 &mv = modValues[j * dcj.width + i]; + + const Pixel pa = upA(i, j); + const Pixel pb = upB(i, j); + const Pixel po = original(i, j); + + // !FIXME! there are two modulation modes... we're only using one. + uint8 modSteps[4] = { 0, 3, 5, 8 }; + uint8 bestMod = 0; + uint32 bestError = 0xFFFFFFFF; + for(uint32 s = 0; s < 4; s++) { + uint32 error = 0; + for(uint32 c = 0; c < 4; c++) { + uint16 va = static_cast(pa.Component(c)); + uint16 vb = static_cast(pb.Component(c)); + uint16 vo = static_cast(po.Component(c)); + + uint16 lerpVal = modSteps[s]; + uint16 res = (va * (8 - lerpVal) + vb * lerpVal) / 8; + uint16 e = (res > vo)? res - vo : vo - res; + error += e * e; + } + + if(error < bestError) { + bestError = error; + bestMod = modSteps[s]; + } + } + + mv = bestMod; + } + } + + // Pack everything into a PVRTC blocks. + const uint32 blocksW = dcj.width / 4; + const uint32 blocksH = dcj.height / 4; + std::vector blocks; + for(uint32 j = 0; j < blocksH; j++) { + for(uint32 i = 0; i < blocksW; i++) { + Block b; + b.SetColorA(imgA(i, j)); + b.SetColorB(imgB(i, j)); + for(uint32 t = 0; t < 16; t++) { + uint32 x = i + (t%4); + uint32 y = j + (t/4); + b.SetLerpValue(t, modValues[y*dcj.width + x]); + } + blocks.push_back(b.Pack()); + } + } + + // Spit out the blocks... + for(uint32 j = 0; j < blocksH; j++) { + for(uint32 i = 0; i < blocksW; i++) { + + // The blocks are initially arranged in morton order. Let's + // linearize them... + uint32 idx = Interleave(j, i); + + uint32 offset = idx * PVRTCC::kBlockSize; + uint64 *outPtr = reinterpret_cast(dcj.outBuf + offset); + *outPtr = blocks[j * blocksW + i]; + } + } } } // namespace PVRTCC