Fleshes out the worker queue implementation.

2024-12-01 01:04:17 +01:00 · 2012-09-21 18:14:38 -04:00 · 2012-09-21 18:14:38 -04:00 · c7bb6170f3
commit c7bb6170f3
parent 62ca4ffee0
3 changed files with 173 additions and 149 deletions
--- a/Core/src/TexComp.cpp
+++ b/Core/src/TexComp.cpp
@ -6,10 +6,13 @@
 #include <assert.h>
 #include "BC7Compressor.h"
 #include "WorkerQueue.h"
 #include "ThreadGroup.h"
 #include "ImageFile.h"
 #include "Image.h"
 template <typename T>
 static T min(const T &a, const T &b) {
  return (a < b)? a : b;
@ -125,12 +128,22 @@ static double CompressImageWithThreads(
 }
 static double CompressImageWithWorkerQueue(
-  const ImageFile &img,
+  const unsigned char *imgData,
  const unsigned int imgDataSz,
  const SCompressionSettings &settings,
  const CompressionFunc f,
  unsigned char *outBuf
 ) {
-  return 0.0;
+  WorkerQueue wq (
    settings.iNumThreads,
    settings.iJobSize,
    imgData,
    imgDataSz,
    f,
    outBuf
  );
  wq.Run();
 }
 bool CompressImageData(
@ -178,6 +191,9 @@ bool CompressImageData(
    double cmpMSTime = 0.0;
    if(settings.iNumThreads > 1) {
      if(settings.iJobSize > 0)
 	cmpMSTime = CompressImageWithWorkerQueue(data, dataSz, settings, f, cmpData);
      else
 	cmpMSTime = CompressImageWithThreads(data, dataSz, settings, f, cmpData);
    }
    else {
--- a/Core/src/WorkerQueue.cpp
+++ b/Core/src/WorkerQueue.cpp
@ -6,9 +6,22 @@
 #include <stdio.h>
 #include <boost/thread/thread.hpp>
-#include <boost/thread/barrier.hpp>
+
-#include <boost/thread/mutex.hpp>
+template <typename T>
-#include <boost/thread/condition_variable.hpp>
+static inline T max(const T &a, const T &b) {
  return (a > b)? a : b;
 }
 template <typename T>
 static inline T min(const T &a, const T &b) {
  return (a < b)? a : b;
 }
 template <typename T>
 static inline void clamp(T &x, const T &min, const T &max) {
  if(x < min) x = min;
  else if(x > max) x = max;
 }
 WorkerThread::WorkerThread(WorkerQueue * parent, uint32 idx) 
  : m_ThreadIdx(idx)
@ -28,159 +41,131 @@ void WorkerThread::operator()() {
    return;
  }
  while(1) {
    EAction action = m_Parent->AcceptThreadData(m_ThreadIdx);
    if(eAction_Quit == action) {
      break;
    }
    const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
    uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
    (*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
  }
  m_Parent->NotifyWorkerFinished();
  return;
 }
-#if 0
+WorkerQueue::WorkerQueue(
-ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
+  uint32 numThreads,
-  : m_StartBarrier(new boost::barrier(numThreads + 1))
+  uint32 jobSize,
-  , m_FinishMutex(new boost::mutex())
+  const uint8 *inBuf,
-  , m_FinishCV(new boost::condition_variable())
+  uint32 inBufSz,
-  , m_NumThreads(numThreads)
+  CompressionFunc func,
  uint8 *outBuf
 )
  : m_NumThreads(numThreads)
  , m_ActiveThreads(0)
-  , m_Func(func)
+  , m_JobSize(max(uint32(1), jobSize))
-  , m_ImageDataSz(inBufSz)
+  , m_InBufSz(inBufSz)
-  , m_ImageData(inBuf)
+  , m_InBuf(inBuf)
  , m_OutBuf(outBuf)
-  , m_ThreadState(eThreadState_Done)
+  , m_CompressionFunc(func)
  , m_ExitFlag(false)
 {
-  for(int i = 0; i < kMaxNumThreads; i++) {
+  clamp(m_NumThreads, uint32(1), kMaxNumWorkerThreads);
-    // Thread synchronization primitives
+
-    m_Threads[i].m_ParentCounterLock = m_FinishMutex;
+#ifndef NDEBUG
-    m_Threads[i].m_FinishCV = m_FinishCV;
+  if(m_InBufSz % 64) {
-    m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
+    fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?");
    m_Threads[i].m_StartBarrier = m_StartBarrier;
    m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
  }
 }
 ThreadGroup::~ThreadGroup() {
  delete m_StartBarrier;
  delete m_FinishMutex;
  delete m_FinishCV;
 }
 unsigned int ThreadGroup::GetCompressedBlockSize() {
  if(m_Func == BC7C::CompressImageBC7) return 16;
 #ifdef HAS_SSE_41
  if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
 #endif
 }
-unsigned int ThreadGroup::GetUncompressedBlockSize() {
+void WorkerQueue::Run() {
  if(m_Func == BC7C::CompressImageBC7) return 64;
 #ifdef HAS_SSE_41
  if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
 #endif
 }
-bool ThreadGroup::PrepareThreads() {
+  // Spawn a bunch of threads...
-
+  boost::unique_lock<boost::mutex> lock(m_Mutex);
  // Make sure that threads aren't running.
  if(m_ThreadState != eThreadState_Done) {
    return false;
  }
  // Have we already activated the thread group?
  if(m_ActiveThreads > 0) {
    m_ThreadState = eThreadState_Waiting;
    return true;
  }
  // We can assume that the image data is in block stream order
  // so, the size of the data given to each thread will be (nb*4)x4
  int numBlocks = m_ImageDataSz / 64;
  int blocksProcessed = 0;
  int blocksPerThread = (numBlocks/m_NumThreads) + ((numBlocks % m_NumThreads)? 1 : 0);
  // Currently no threads are finished...
  m_ThreadsFinished = 0;
  for(int i = 0; i < m_NumThreads; i++) {
-
+    WorkerThread t (this, i);
    if(m_ActiveThreads >= kMaxNumThreads)
      break;
    int numBlocksThisThread = blocksPerThread;
    if(blocksProcessed + numBlocksThisThread > numBlocks) {
      numBlocksThisThread = numBlocks - blocksProcessed;
    }
    CmpThread &t = m_Threads[m_ActiveThreads];
    t.m_Height = 4;
    t.m_Width = numBlocksThisThread * 4;
    t.m_CmpFunc = m_Func;
    t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize());
    t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
    blocksProcessed += numBlocksThisThread;
    m_ThreadHandles[m_ActiveThreads] = new boost::thread(t);
    m_ActiveThreads++;
  }
-  m_ThreadState = eThreadState_Waiting;
+  // Wait for them to finish...
-  return true;
+  while(m_ActiveThreads > 0) {
-}
+    m_CV.wait(lock);
 bool ThreadGroup::Start() {
  if(m_ActiveThreads <= 0) {
    return false;
  }
-  if(m_ThreadState != eThreadState_Waiting) {
+  // Join them all together..
-    return false;
+  for(int i = 0; i < m_NumThreads; i++) {
  }
  m_StopWatch.Reset();
  m_StopWatch.Start();
  // Last thread to activate the barrier is this one.
  m_ThreadState = eThreadState_Running;
  m_StartBarrier->wait();
  return true;
 }
 bool ThreadGroup::CleanUpThreads() {
  // Are the threads currently running?
  if(m_ThreadState == eThreadState_Running) {
    // ... if so, wait for them to finish
    Join();
  }
  assert(m_ThreadState == eThreadState_Done || m_ThreadState == eThreadState_Waiting);
  // Mark all threads for exit
  m_ExitFlag = true;
  // Hit the barrier to signal them to go.
  m_StartBarrier->wait();
  // Clean up.
  for(int i = 0; i < m_ActiveThreads; i++) {
    m_ThreadHandles[i]->join();
    delete m_ThreadHandles[i];
  }
  // Reset active number of threads...
  m_ActiveThreads = 0;
  m_ExitFlag = false;
 }
-void ThreadGroup::Join() {
+void WorkerQueue::NotifyWorkerFinished() {
  {
    boost::lock_guard<boost::mutex> lock(m_Mutex);
    m_ActiveThreads--;
  }
  m_CV.notify_one();
 }
-  boost::unique_lock<boost::mutex> lock(*m_FinishMutex);
+WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
-  while(m_ThreadsFinished != m_ActiveThreads) {
+  if(threadIdx < 0 || threadIdx >= m_ActiveThreads) {
-    m_FinishCV->wait(lock);
+    return WorkerThread::eAction_Quit;
  }
-  m_StopWatch.Stop();
+  // How many blocks total do we have?
-  m_ThreadState = eThreadState_Done;
+  const uint32 totalBlocks = m_InBufSz / 64;
-  m_ThreadsFinished = 0;
+  
  // Make sure we have exclusive access...
  boost::lock_guard<boost::mutex> lock(m_Mutex);
  // If we've completed all blocks, then mark the thread for 
  // completion.
  if(m_NextBlock >= totalBlocks) {
    return WorkerThread::eAction_Quit;
  }
  // Otherwise, this thread's offset is the current block...
  m_Offsets[threadIdx] = m_NextBlock;
  // The number of blocks to process is either the job size
  // or the number of blocks remaining.
  int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock);
  m_NumBlocks[threadIdx] = blocksProcessed;
  // Make sure the next block is updated.
  m_NextBlock += blocksProcessed;
  return WorkerThread::eAction_DoWork;
 }
 const uint8 *WorkerQueue::GetSrcForThread(const int threadIdx) const {
  assert(m_Offsets[threadIdx] >= 0);
  assert(threadIdx >= 0);
  assert(threadIdx < m_NumThreads);
  const uint32 inBufBlockSz = 16 * 4;
  return m_InBuf + m_Offsets[threadIdx] * inBufBlockSz;
 }
 uint8 *WorkerQueue::GetDstForThread(const int threadIdx) const {
  assert(m_Offsets[threadIdx] >= 0);
  assert(threadIdx >= 0);
  assert(threadIdx < m_NumThreads);
  const uint32 outBufBlockSz = 16;
  return m_OutBuf + m_Offsets[threadIdx] * outBufBlockSz;
 }
 uint32 WorkerQueue::GetNumBlocksForThread(const int threadIdx) const {
  assert(m_Offsets[threadIdx] >= 0);
  assert(threadIdx >= 0);
  assert(threadIdx < m_NumThreads);
  return m_NumBlocks[threadIdx];
 }
 #endif 
--- a/Core/src/WorkerQueue.h
+++ b/Core/src/WorkerQueue.h
@ -1,18 +1,18 @@
 #ifndef __TEXCOMP_WORKDER_QUEUE_H__
 #define __TEXCOMP_WORKDER_QUEUE_H__
 #include "TexCompTypes.h"
 #include "TexComp.h"
 // Forward declare...
 class WorkerQueue;
 namespace boost {
  class thread;
  class mutex;
  class barrier;
  class condition_variable;
 }
 // Necessary includes...
 #include "TexCompTypes.h"
 #include "TexComp.h"
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition_variable.hpp>
 struct WorkerThread {
  friend class WorkerQueue;
 public:
@ -20,6 +20,13 @@ public:
  WorkerThread(WorkerQueue *, uint32 idx);
  void operator ()();
  enum EAction {
    eAction_DoWork,
    eAction_Quit,
    kNumWorkerThreadActions
  };
 private:
  uint32 m_ThreadIdx;
  WorkerQueue *const m_Parent;
@ -39,21 +46,37 @@ class WorkerQueue {
  ~WorkerQueue() { }
-  // Runs the 
+  // Runs the workers
  void Run();
 private:
-  static const int kMaxNumWorkerThreads = 256;
+  uint32 m_NumThreads;
-  int m_Offsets[kMaxNumWorkerThreads];
+  uint32 m_ActiveThreads;
  uint32 m_JobSize;
  uint32 m_InBufSz;
  const uint8 *m_InBuf;
  uint8 *m_OutBuf;
-  int GetOffsetForThread(const int threadIdx) const;
+  boost::condition_variable m_CV;
  boost::mutex m_Mutex;
  uint32 m_NextBlock;
  static const uint32 kMaxNumWorkerThreads = 256;
  uint32 m_Offsets[kMaxNumWorkerThreads];
  uint32 m_NumBlocks[kMaxNumWorkerThreads];
  boost::thread *m_ThreadHandles[kMaxNumWorkerThreads];
  const uint8 *GetSrcForThread(const int threadIdx) const;
  uint8 *GetDstForThread(const int threadIdx) const;
  uint32 GetNumBlocksForThread(const int threadIdx) const;
  const CompressionFunc m_CompressionFunc;
  CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
-  void SignalThreadReady(int threadIdx);
+  WorkerThread::EAction AcceptThreadData(uint32 threadIdx);
-  bool AcceptThreadData(int threadIdx) const;
+  void NotifyWorkerFinished();
 };
 #endif //__TEXCOMP_WORKDER_QUEUE_H__