Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f71d6503 authored by Dichen Zhang's avatar Dichen Zhang Committed by Android (Google) Code Review
Browse files

Merge "libjpegrecoverymap: add support for multi-threading"

parents 961656ae 72b6f30f
Loading
Loading
Loading
Loading
+228 −75
Original line number Diff line number Diff line
@@ -30,6 +30,11 @@
#include <sstream>
#include <string>
#include <cmath>
#include <condition_variable>
#include <deque>
#include <mutex>
#include <thread>
#include <unistd.h>

using namespace std;
using namespace photos_editing_formats::image_io;
@@ -62,6 +67,20 @@ static const st2086_metadata kSt2086Metadata = {
  1.0f,
};

#define CONFIG_MULTITHREAD 1
int GetCPUCoreCount() {
  int cpuCoreCount = 1;
#if CONFIG_MULTITHREAD
#if defined(_SC_NPROCESSORS_ONLN)
  cpuCoreCount = sysconf(_SC_NPROCESSORS_ONLN);
#else
  // _SC_NPROC_ONLN must be defined...
  cpuCoreCount = sysconf(_SC_NPROC_ONLN);
#endif
#endif
  return cpuCoreCount;
}

/*
 * Helper function used for writing data to destination.
 *
@@ -626,6 +645,62 @@ status_t RecoveryMap::compressRecoveryMap(jr_uncompressed_ptr uncompressed_recov
  return NO_ERROR;
}

const int kJobSzInRows = 16;
static_assert(kJobSzInRows > 0 && kJobSzInRows % kMapDimensionScaleFactor == 0,
              "align job size to kMapDimensionScaleFactor");

class JobQueue {
 public:
  bool dequeueJob(size_t& rowStart, size_t& rowEnd);
  void enqueueJob(size_t rowStart, size_t rowEnd);
  void markQueueForEnd();
  void reset();

 private:
  bool mQueuedAllJobs = false;
  std::deque<std::tuple<size_t, size_t>> mJobs;
  std::mutex mMutex;
  std::condition_variable mCv;
};

bool JobQueue::dequeueJob(size_t& rowStart, size_t& rowEnd) {
  std::unique_lock<std::mutex> lock{mMutex};
  while (true) {
    if (mJobs.empty()) {
      if (mQueuedAllJobs) {
        return false;
      } else {
        mCv.wait(lock);
      }
    } else {
      auto it = mJobs.begin();
      rowStart = std::get<0>(*it);
      rowEnd = std::get<1>(*it);
      mJobs.erase(it);
      return true;
    }
  }
  return false;
}

void JobQueue::enqueueJob(size_t rowStart, size_t rowEnd) {
  std::unique_lock<std::mutex> lock{mMutex};
  mJobs.push_back(std::make_tuple(rowStart, rowEnd));
  lock.unlock();
  mCv.notify_one();
}

void JobQueue::markQueueForEnd() {
  std::unique_lock<std::mutex> lock{mMutex};
  mQueuedAllJobs = true;
}

void JobQueue::reset() {
  std::unique_lock<std::mutex> lock{mMutex};
  mJobs.clear();
  mQueuedAllJobs = false;
}

status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_image,
                                          jr_uncompressed_ptr uncompressed_p010_image,
                                          jr_metadata_ptr metadata,
@@ -697,34 +772,50 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4
      return ERROR_JPEGR_INVALID_COLORGAMUT;
  }

  std::mutex mutex;
  float hdr_y_nits_max = 0.0f;
  double hdr_y_nits_avg = 0.0f;
  for (size_t y = 0; y < image_height; ++y) {
    for (size_t x = 0; x < image_width; ++x) {
  const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
  size_t rowStep = threads == 1 ? image_height : kJobSzInRows;
  JobQueue jobQueue;

  std::function<void()> computeMetadata = [uncompressed_p010_image, hdrInvOetf,
                                           hdrGamutConversionFn, luminanceFn, hdr_white_nits,
                                           threads, &mutex, &hdr_y_nits_avg,
                                           &hdr_y_nits_max, &jobQueue]() -> void {
    size_t rowStart, rowEnd;
    float hdr_y_nits_max_th = 0.0f;
    double hdr_y_nits_avg_th = 0.0f;
    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
      for (size_t y = rowStart; y < rowEnd; ++y) {
        for (size_t x = 0; x < uncompressed_p010_image->width; ++x) {
          Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y);
          Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
          Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
          hdr_rgb = hdrGamutConversionFn(hdr_rgb);
          float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;

      hdr_y_nits_avg += hdr_y_nits;
      if (hdr_y_nits > hdr_y_nits_max) {
        hdr_y_nits_max = hdr_y_nits;
          hdr_y_nits_avg_th += hdr_y_nits;
          if (hdr_y_nits > hdr_y_nits_max_th) {
            hdr_y_nits_max_th = hdr_y_nits;
          }
        }
      }
  hdr_y_nits_avg /= image_width * image_height;

  metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits;
  if (metadata->transferFunction == JPEGR_TF_PQ) {
    metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg;
    metadata->hdr10Metadata.maxCLL = hdr_y_nits_max;
    }
    std::unique_lock<std::mutex> lock{mutex};
    hdr_y_nits_avg += hdr_y_nits_avg_th;
    hdr_y_nits_max = std::max(hdr_y_nits_max, hdr_y_nits_max_th);
  };

  for (size_t y = 0; y < map_height; ++y) {
    for (size_t x = 0; x < map_width; ++x) {
      Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image,
                                         kMapDimensionScaleFactor, x, y);
  std::function<void()> generateMap = [uncompressed_yuv_420_image, uncompressed_p010_image,
                                       metadata, dest, hdrInvOetf, hdrGamutConversionFn,
                                       luminanceFn, hdr_white_nits, &jobQueue]() -> void {
    size_t rowStart, rowEnd;
    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
      for (size_t y = rowStart; y < rowEnd; ++y) {
        for (size_t x = 0; x < dest->width; ++x) {
          Color sdr_yuv_gamma =
              sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y);
          Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma);
          Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma);
          float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits;
@@ -735,11 +826,52 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4
          hdr_rgb = hdrGamutConversionFn(hdr_rgb);
          float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;

      size_t pixel_idx =  x + y * map_width;
          size_t pixel_idx = x + y * dest->width;
          reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] =
              encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor);
        }
      }
    }
  };

  std::vector<std::thread> workers;
  for (int th = 0; th < threads - 1; th++) {
    workers.push_back(std::thread(computeMetadata));
  }

  // compute metadata
  for (size_t rowStart = 0; rowStart < image_height;) {
    size_t rowEnd = std::min(rowStart + rowStep, image_height);
    jobQueue.enqueueJob(rowStart, rowEnd);
    rowStart = rowEnd;
  }
  jobQueue.markQueueForEnd();
  computeMetadata();
  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
  workers.clear();
  hdr_y_nits_avg /= image_width * image_height;

  metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits;
  if (metadata->transferFunction == JPEGR_TF_PQ) {
    metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg;
    metadata->hdr10Metadata.maxCLL = hdr_y_nits_max;
  }

  // generate map
  jobQueue.reset();
  for (int th = 0; th < threads - 1; th++) {
    workers.push_back(std::thread(generateMap));
  }

  rowStep = (threads == 1 ? image_height : kJobSzInRows) / kMapDimensionScaleFactor;
  for (size_t rowStart = 0; rowStart < map_height;) {
    size_t rowEnd = std::min(rowStart + rowStep, map_height);
    jobQueue.enqueueJob(rowStart, rowEnd);
    rowStart = rowEnd;
  }
  jobQueue.markQueueForEnd();
  generateMap();
  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });

  map_data.release();
  return NO_ERROR;
@@ -756,13 +888,17 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_
    return ERROR_JPEGR_INVALID_NULL_PTR;
  }

  dest->width = uncompressed_yuv_420_image->width;
  dest->height = uncompressed_yuv_420_image->height;
  ShepardsIDW idwTable(kMapDimensionScaleFactor);

  JobQueue jobQueue;
  std::function<void()> applyRecMap = [uncompressed_yuv_420_image, uncompressed_recovery_map,
                                       metadata, dest, &jobQueue, &idwTable]() -> void {
    const float hdr_ratio = metadata->rangeScalingFactor;
    size_t width = uncompressed_yuv_420_image->width;
    size_t height = uncompressed_yuv_420_image->height;

  dest->width = width;
  dest->height = height;
  size_t pixel_count = width * height;

    ColorTransformFn hdrOetf = nullptr;
    switch (metadata->transferFunction) {
      case JPEGR_TF_LINEAR:
@@ -776,12 +912,12 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_
        break;
      case JPEGR_TF_UNSPECIFIED:
        // Should be impossible to hit after input validation.
      return ERROR_JPEGR_INVALID_TRANS_FUNC;
        hdrOetf = identityConversion;
    }

  ShepardsIDW idwTable(kMapDimensionScaleFactor);

  for (size_t y = 0; y < height; ++y) {
    size_t rowStart, rowEnd;
    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
      for (size_t y = rowStart; y < rowEnd; ++y) {
        for (size_t x = 0; x < width; ++x) {
          Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y);
          Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr);
@@ -799,7 +935,7 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_
            recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y,
                                idwTable);
          }
      Color rgb_hdr = applyRecovery(rgb_sdr, recovery, metadata->rangeScalingFactor);
          Color rgb_hdr = applyRecovery(rgb_sdr, recovery, hdr_ratio);

          Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor);
          uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr);
@@ -808,6 +944,23 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_
          reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102;
        }
      }
    }
  };

  const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
  std::vector<std::thread> workers;
  for (int th = 0; th < threads - 1; th++) {
    workers.push_back(std::thread(applyRecMap));
  }
  const int rowStep = threads == 1 ? uncompressed_yuv_420_image->height : kJobSzInRows;
  for (int rowStart = 0; rowStart < uncompressed_yuv_420_image->height;) {
    int rowEnd = std::min(rowStart + rowStep, uncompressed_yuv_420_image->height);
    jobQueue.enqueueJob(rowStart, rowEnd);
    rowStart = rowEnd;
  }
  jobQueue.markQueueForEnd();
  applyRecMap();
  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
  return NO_ERROR;
}