Very preliminary compressor

2024-11-24 12:55:39 +01:00 · 2013-09-24 20:35:36 -04:00 · 2013-09-24 20:35:36 -04:00 · c6d7bdc670
commit c6d7bdc670
parent 8f4dcca4d7
1 changed files with 217 additions and 4 deletions
--- a/PVRTCEncoder/src/Compressor.cpp
+++ b/PVRTCEncoder/src/Compressor.cpp
@ -52,11 +52,85 @@
 #include "PVRTCCompressor.h"
 #include <algorithm>
 #include <cassert>
 #include <iostream>
 #include <vector>
 #include "Pixel.h"
 #include "Image.h"
 #include "Block.h"
 namespace PVRTCC {
  static uint32 Interleave(uint16 inx, uint16 iny) {
    // Taken from:
    // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
    static const uint32 B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
    static const uint32 S[] = {1, 2, 4, 8};
    uint32 x = static_cast<uint32>(inx);
    uint32 y = static_cast<uint32>(iny);
    x = (x | (x << S[3])) & B[3];
    x = (x | (x << S[2])) & B[2];
    x = (x | (x << S[1])) & B[1];
    x = (x | (x << S[0])) & B[0];
    y = (y | (y << S[3])) & B[3];
    y = (y | (y << S[2])) & B[2];
    y = (y | (y << S[1])) & B[1];
    y = (y | (y << S[0])) & B[0];
    return x | (y << 1);
  }
  template <typename T>
  static T Clamp(const T &v, const T &low, const T &high) {
    return ::std::min(::std::max(low, v), high);
  }
  template <typename T>
  static T Lookup(const T *vals,
                  uint32 x, uint32 y,
                  uint32 width, uint32 height,
                  const EWrapMode wrapMode) {
    while(x >= width) {
      if(wrapMode == eWrapMode_Wrap) {
        x -= width;
      } else {
        x = width - 1;
      }
    }
    while(x < 0) {
      if(wrapMode == eWrapMode_Wrap) {
        x += width;
      } else {
        x = 0;
      }
    }
    while(y >= height) {
      if(wrapMode == eWrapMode_Wrap) {
        y -= height;
      } else {
        y = height - 1;
      }
    }
    while(y < 0) {
      if(wrapMode == eWrapMode_Wrap) {
        y += height;
      } else {
        y = 0;
      }
    }
    return vals[y * width + x];
  }
  void Compress(const CompressionJob &dcj,
                bool bTwoBitMode,
                const EWrapMode wrapMode) {
@ -83,9 +157,7 @@ namespace PVRTCC {
    // image features, then reupscale and compute deltas. Use deltas to generate
    // initial A & B images followed by modulation data.
    img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, true);
    img.DebugOutput("DownscaledOnce");
    img.ContentAwareDownscale(1, 1, eWrapMode_Wrap, false);
    img.DebugOutput("DownscaledTwice");
    Image downscaled = img;
@ -95,14 +167,155 @@ namespace PVRTCC {
    img.DebugOutput("Reconstruction");
    // Compute difference...
-    Image difference = img;
+    int16 difference[dcj.height * dcj.width * 4];
    for(uint32 j = 0; j < dcj.height; j++) {
      for(uint32 i = 0; i < dcj.width; i++) {
        for(uint32 c = 0; c < 4; c++) {
-          difference(i, j).Component(c) -= img(i, j).Component(c);
+          int16 o = original(i, j).Component(c);
          int16 n = img(i, j).Component(c);
          difference[j*dcj.width*4 + i*4 + c] = o - n;
        }
      }
    }
    // Go over the 7x7 texel blocks and extract bounding box diagonals for each
    // block. We should be able to choose which diagonal we want...
    const uint32 kKernelSz = 7;
    int16 maxDiff[dcj.height * dcj.width / 4];
    int16 minDiff[dcj.height * dcj.width / 4];
    for(uint32 j = 2; j < dcj.height; j += 4) {
      for(uint32 i = 2; i < dcj.width; i += 4) {
        const uint32 startX = i - (kKernelSz / 2);
        const uint32 startY = j - (kKernelSz / 2);
        for(uint32 c = 0; c < 4; c++) {
          int32 pos = 0;
          int32 neg = 0;
          for(uint32 y = startY; y < startY + kKernelSz; y++) {
            for(uint32 x = startX; x < startX + kKernelSz; x++) {
              int16 val = Lookup(difference, x*4 + c, y, dcj.width*4, dcj.height, wrapMode);
              if(val > 0) {
                pos += val;
              } else {
                neg += val;
              }
            }
          }
          uint32 blockIdx = ((j-2)/4) * dcj.width + (i-2) + c;
          assert(blockIdx < (dcj.width * dcj.height) / 4);
          if(pos > -neg) {
            maxDiff[blockIdx] = pos;
            minDiff[blockIdx] = 0;
          } else {
            maxDiff[blockIdx] = 0;
            minDiff[blockIdx] = neg;       
          }
        }
      }
    }
    // Add maxDiff to image to get high signal, and lowdiff to image to
    // get low signal...
    Image imgA = downscaled;
    Image imgB = downscaled;
    for(uint32 j = 0; j < dcj.height / 4; j++) {
      for(uint32 i = 0; i < dcj.width / 4; i++) {
        for(uint32 c = 0; c < 4; c++) {
          uint8 &a = imgA(i, j).Component(c);
          a = Clamp<int16>(a + maxDiff[j*dcj.width/4 + i*4 + c], 0, 255);
          uint8 &b = imgB(i, j).Component(c);
          b = Clamp<int16>(b + minDiff[j*dcj.width/4 + i*4 + c], 0, 255);
        }
      }
    }
    imgA.DebugOutput("ImageA");
    imgB.DebugOutput("ImageB");
    // Determine modulation values...
    Image upA = imgA;
    Image upB = imgB;
    upA.BilinearUpscale(2, 2, wrapMode);
    upB.BilinearUpscale(2, 2, wrapMode);
    assert(upA.GetHeight() == dcj.height && upA.GetWidth() == dcj.width);
    assert(upB.GetHeight() == dcj.height && upB.GetWidth() == dcj.width);
    upA.DebugOutput("UpscaledA");
    upB.DebugOutput("UpscaledB");
    // Choose the most appropriate modulation values for the two images...
    std::vector<uint8> modValues;
    modValues.reserve(dcj.width * dcj.height);
    for(uint32 j = 0; j < dcj.height; j++) {
      for(uint32 i = 0; i < dcj.width; i++) {
        uint8 &mv = modValues[j * dcj.width + i];
        const Pixel pa = upA(i, j);
        const Pixel pb = upB(i, j);
        const Pixel po = original(i, j);
        // !FIXME! there are two modulation modes... we're only using one.
        uint8 modSteps[4] = { 0, 3, 5, 8 };
        uint8 bestMod = 0;
        uint32 bestError = 0xFFFFFFFF;
        for(uint32 s = 0; s < 4; s++) {
          uint32 error = 0;
          for(uint32 c = 0; c < 4; c++) {
            uint16 va = static_cast<uint16>(pa.Component(c));
            uint16 vb = static_cast<uint16>(pb.Component(c));
            uint16 vo = static_cast<uint16>(po.Component(c));
            uint16 lerpVal = modSteps[s];
            uint16 res = (va * (8 - lerpVal) + vb * lerpVal) / 8;
            uint16 e = (res > vo)? res - vo : vo - res;
            error += e * e;
          }
          if(error < bestError) {
            bestError = error;
            bestMod = modSteps[s];
          }
        }
        mv = bestMod;
      }
    }
    // Pack everything into a PVRTC blocks.
    const uint32 blocksW = dcj.width / 4;
    const uint32 blocksH = dcj.height / 4;
    std::vector<uint64> blocks;
    for(uint32 j = 0; j < blocksH; j++) {
      for(uint32 i = 0; i < blocksW; i++) {
        Block b;
        b.SetColorA(imgA(i, j));
        b.SetColorB(imgB(i, j));
        for(uint32 t = 0; t < 16; t++) {
          uint32 x = i + (t%4);
          uint32 y = j + (t/4);
          b.SetLerpValue(t, modValues[y*dcj.width + x]);
        }
        blocks.push_back(b.Pack());
      }
    }
    // Spit out the blocks...
    for(uint32 j = 0; j < blocksH; j++) {
      for(uint32 i = 0; i < blocksW; i++) {
        // The blocks are initially arranged in morton order. Let's
        // linearize them...
        uint32 idx = Interleave(j, i);
        uint32 offset = idx * PVRTCC::kBlockSize;
        uint64 *outPtr = reinterpret_cast<uint64 *>(dcj.outBuf + offset);
        *outPtr = blocks[j * blocksW + i];
      }
    }
  }
 }  // namespace PVRTCC