Loading libs/jpegrecoverymap/recoverymap.cpp +228 −75 Original line number Diff line number Diff line Loading @@ -30,6 +30,11 @@ #include <sstream> #include <string> #include <cmath> #include <condition_variable> #include <deque> #include <mutex> #include <thread> #include <unistd.h> using namespace std; using namespace photos_editing_formats::image_io; Loading Loading @@ -62,6 +67,20 @@ static const st2086_metadata kSt2086Metadata = { 1.0f, }; #define CONFIG_MULTITHREAD 1 int GetCPUCoreCount() { int cpuCoreCount = 1; #if CONFIG_MULTITHREAD #if defined(_SC_NPROCESSORS_ONLN) cpuCoreCount = sysconf(_SC_NPROCESSORS_ONLN); #else // _SC_NPROC_ONLN must be defined... cpuCoreCount = sysconf(_SC_NPROC_ONLN); #endif #endif return cpuCoreCount; } /* * Helper function used for writing data to destination. * Loading Loading @@ -626,6 +645,62 @@ status_t RecoveryMap::compressRecoveryMap(jr_uncompressed_ptr uncompressed_recov return NO_ERROR; } const int kJobSzInRows = 16; static_assert(kJobSzInRows > 0 && kJobSzInRows % kMapDimensionScaleFactor == 0, "align job size to kMapDimensionScaleFactor"); class JobQueue { public: bool dequeueJob(size_t& rowStart, size_t& rowEnd); void enqueueJob(size_t rowStart, size_t rowEnd); void markQueueForEnd(); void reset(); private: bool mQueuedAllJobs = false; std::deque<std::tuple<size_t, size_t>> mJobs; std::mutex mMutex; std::condition_variable mCv; }; bool JobQueue::dequeueJob(size_t& rowStart, size_t& rowEnd) { std::unique_lock<std::mutex> lock{mMutex}; while (true) { if (mJobs.empty()) { if (mQueuedAllJobs) { return false; } else { mCv.wait(lock); } } else { auto it = mJobs.begin(); rowStart = std::get<0>(*it); rowEnd = std::get<1>(*it); mJobs.erase(it); return true; } } return false; } void JobQueue::enqueueJob(size_t rowStart, size_t rowEnd) { std::unique_lock<std::mutex> lock{mMutex}; mJobs.push_back(std::make_tuple(rowStart, rowEnd)); lock.unlock(); mCv.notify_one(); } void JobQueue::markQueueForEnd() { std::unique_lock<std::mutex> lock{mMutex}; mQueuedAllJobs = true; } void JobQueue::reset() { std::unique_lock<std::mutex> lock{mMutex}; mJobs.clear(); mQueuedAllJobs = false; } status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_image, jr_uncompressed_ptr uncompressed_p010_image, jr_metadata_ptr metadata, Loading Loading @@ -697,34 +772,50 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4 return ERROR_JPEGR_INVALID_COLORGAMUT; } std::mutex mutex; float hdr_y_nits_max = 0.0f; double hdr_y_nits_avg = 0.0f; for (size_t y = 0; y < image_height; ++y) { for (size_t x = 0; x < image_width; ++x) { const int threads = std::clamp(GetCPUCoreCount(), 1, 4); size_t rowStep = threads == 1 ? image_height : kJobSzInRows; JobQueue jobQueue; std::function<void()> computeMetadata = [uncompressed_p010_image, hdrInvOetf, hdrGamutConversionFn, luminanceFn, hdr_white_nits, threads, &mutex, &hdr_y_nits_avg, &hdr_y_nits_max, &jobQueue]() -> void { size_t rowStart, rowEnd; float hdr_y_nits_max_th = 0.0f; double hdr_y_nits_avg_th = 0.0f; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < uncompressed_p010_image->width; ++x) { Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y); Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma); Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma); hdr_rgb = hdrGamutConversionFn(hdr_rgb); float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits; hdr_y_nits_avg += hdr_y_nits; if (hdr_y_nits > hdr_y_nits_max) { hdr_y_nits_max = hdr_y_nits; hdr_y_nits_avg_th += hdr_y_nits; if (hdr_y_nits > hdr_y_nits_max_th) { hdr_y_nits_max_th = hdr_y_nits; } } } hdr_y_nits_avg /= image_width * image_height; metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits; if (metadata->transferFunction == JPEGR_TF_PQ) { metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg; metadata->hdr10Metadata.maxCLL = hdr_y_nits_max; } std::unique_lock<std::mutex> lock{mutex}; hdr_y_nits_avg += hdr_y_nits_avg_th; hdr_y_nits_max = std::max(hdr_y_nits_max, hdr_y_nits_max_th); }; for (size_t y = 0; y < map_height; ++y) { for (size_t x = 0; x < map_width; ++x) { Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y); std::function<void()> generateMap = [uncompressed_yuv_420_image, uncompressed_p010_image, metadata, dest, hdrInvOetf, hdrGamutConversionFn, luminanceFn, hdr_white_nits, &jobQueue]() -> void { size_t rowStart, rowEnd; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < dest->width; ++x) { Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y); Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma); Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma); float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits; Loading @@ -735,11 +826,52 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4 hdr_rgb = hdrGamutConversionFn(hdr_rgb); float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits; size_t pixel_idx = x + y * map_width; size_t pixel_idx = x + y * dest->width; reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] = encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor); } } } }; std::vector<std::thread> workers; for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(computeMetadata)); } // compute metadata for (size_t rowStart = 0; rowStart < image_height;) { size_t rowEnd = std::min(rowStart + rowStep, image_height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); computeMetadata(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); workers.clear(); hdr_y_nits_avg /= image_width * image_height; metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits; if (metadata->transferFunction == JPEGR_TF_PQ) { metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg; metadata->hdr10Metadata.maxCLL = hdr_y_nits_max; } // generate map jobQueue.reset(); for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(generateMap)); } rowStep = (threads == 1 ? image_height : kJobSzInRows) / kMapDimensionScaleFactor; for (size_t rowStart = 0; rowStart < map_height;) { size_t rowEnd = std::min(rowStart + rowStep, map_height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); generateMap(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); map_data.release(); return NO_ERROR; Loading @@ -756,13 +888,17 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ return ERROR_JPEGR_INVALID_NULL_PTR; } dest->width = uncompressed_yuv_420_image->width; dest->height = uncompressed_yuv_420_image->height; ShepardsIDW idwTable(kMapDimensionScaleFactor); JobQueue jobQueue; std::function<void()> applyRecMap = [uncompressed_yuv_420_image, uncompressed_recovery_map, metadata, dest, &jobQueue, &idwTable]() -> void { const float hdr_ratio = metadata->rangeScalingFactor; size_t width = uncompressed_yuv_420_image->width; size_t height = uncompressed_yuv_420_image->height; dest->width = width; dest->height = height; size_t pixel_count = width * height; ColorTransformFn hdrOetf = nullptr; switch (metadata->transferFunction) { case JPEGR_TF_LINEAR: Loading @@ -776,12 +912,12 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ break; case JPEGR_TF_UNSPECIFIED: // Should be impossible to hit after input validation. return ERROR_JPEGR_INVALID_TRANS_FUNC; hdrOetf = identityConversion; } ShepardsIDW idwTable(kMapDimensionScaleFactor); for (size_t y = 0; y < height; ++y) { size_t rowStart, rowEnd; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < width; ++x) { Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y); Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr); Loading @@ -799,7 +935,7 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y, idwTable); } Color rgb_hdr = applyRecovery(rgb_sdr, recovery, metadata->rangeScalingFactor); Color rgb_hdr = applyRecovery(rgb_sdr, recovery, hdr_ratio); Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor); uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr); Loading @@ -808,6 +944,23 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102; } } } }; const int threads = std::clamp(GetCPUCoreCount(), 1, 4); std::vector<std::thread> workers; for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(applyRecMap)); } const int rowStep = threads == 1 ? uncompressed_yuv_420_image->height : kJobSzInRows; for (int rowStart = 0; rowStart < uncompressed_yuv_420_image->height;) { int rowEnd = std::min(rowStart + rowStep, uncompressed_yuv_420_image->height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); applyRecMap(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); return NO_ERROR; } Loading Loading
libs/jpegrecoverymap/recoverymap.cpp +228 −75 Original line number Diff line number Diff line Loading @@ -30,6 +30,11 @@ #include <sstream> #include <string> #include <cmath> #include <condition_variable> #include <deque> #include <mutex> #include <thread> #include <unistd.h> using namespace std; using namespace photos_editing_formats::image_io; Loading Loading @@ -62,6 +67,20 @@ static const st2086_metadata kSt2086Metadata = { 1.0f, }; #define CONFIG_MULTITHREAD 1 int GetCPUCoreCount() { int cpuCoreCount = 1; #if CONFIG_MULTITHREAD #if defined(_SC_NPROCESSORS_ONLN) cpuCoreCount = sysconf(_SC_NPROCESSORS_ONLN); #else // _SC_NPROC_ONLN must be defined... cpuCoreCount = sysconf(_SC_NPROC_ONLN); #endif #endif return cpuCoreCount; } /* * Helper function used for writing data to destination. * Loading Loading @@ -626,6 +645,62 @@ status_t RecoveryMap::compressRecoveryMap(jr_uncompressed_ptr uncompressed_recov return NO_ERROR; } const int kJobSzInRows = 16; static_assert(kJobSzInRows > 0 && kJobSzInRows % kMapDimensionScaleFactor == 0, "align job size to kMapDimensionScaleFactor"); class JobQueue { public: bool dequeueJob(size_t& rowStart, size_t& rowEnd); void enqueueJob(size_t rowStart, size_t rowEnd); void markQueueForEnd(); void reset(); private: bool mQueuedAllJobs = false; std::deque<std::tuple<size_t, size_t>> mJobs; std::mutex mMutex; std::condition_variable mCv; }; bool JobQueue::dequeueJob(size_t& rowStart, size_t& rowEnd) { std::unique_lock<std::mutex> lock{mMutex}; while (true) { if (mJobs.empty()) { if (mQueuedAllJobs) { return false; } else { mCv.wait(lock); } } else { auto it = mJobs.begin(); rowStart = std::get<0>(*it); rowEnd = std::get<1>(*it); mJobs.erase(it); return true; } } return false; } void JobQueue::enqueueJob(size_t rowStart, size_t rowEnd) { std::unique_lock<std::mutex> lock{mMutex}; mJobs.push_back(std::make_tuple(rowStart, rowEnd)); lock.unlock(); mCv.notify_one(); } void JobQueue::markQueueForEnd() { std::unique_lock<std::mutex> lock{mMutex}; mQueuedAllJobs = true; } void JobQueue::reset() { std::unique_lock<std::mutex> lock{mMutex}; mJobs.clear(); mQueuedAllJobs = false; } status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_image, jr_uncompressed_ptr uncompressed_p010_image, jr_metadata_ptr metadata, Loading Loading @@ -697,34 +772,50 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4 return ERROR_JPEGR_INVALID_COLORGAMUT; } std::mutex mutex; float hdr_y_nits_max = 0.0f; double hdr_y_nits_avg = 0.0f; for (size_t y = 0; y < image_height; ++y) { for (size_t x = 0; x < image_width; ++x) { const int threads = std::clamp(GetCPUCoreCount(), 1, 4); size_t rowStep = threads == 1 ? image_height : kJobSzInRows; JobQueue jobQueue; std::function<void()> computeMetadata = [uncompressed_p010_image, hdrInvOetf, hdrGamutConversionFn, luminanceFn, hdr_white_nits, threads, &mutex, &hdr_y_nits_avg, &hdr_y_nits_max, &jobQueue]() -> void { size_t rowStart, rowEnd; float hdr_y_nits_max_th = 0.0f; double hdr_y_nits_avg_th = 0.0f; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < uncompressed_p010_image->width; ++x) { Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y); Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma); Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma); hdr_rgb = hdrGamutConversionFn(hdr_rgb); float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits; hdr_y_nits_avg += hdr_y_nits; if (hdr_y_nits > hdr_y_nits_max) { hdr_y_nits_max = hdr_y_nits; hdr_y_nits_avg_th += hdr_y_nits; if (hdr_y_nits > hdr_y_nits_max_th) { hdr_y_nits_max_th = hdr_y_nits; } } } hdr_y_nits_avg /= image_width * image_height; metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits; if (metadata->transferFunction == JPEGR_TF_PQ) { metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg; metadata->hdr10Metadata.maxCLL = hdr_y_nits_max; } std::unique_lock<std::mutex> lock{mutex}; hdr_y_nits_avg += hdr_y_nits_avg_th; hdr_y_nits_max = std::max(hdr_y_nits_max, hdr_y_nits_max_th); }; for (size_t y = 0; y < map_height; ++y) { for (size_t x = 0; x < map_width; ++x) { Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y); std::function<void()> generateMap = [uncompressed_yuv_420_image, uncompressed_p010_image, metadata, dest, hdrInvOetf, hdrGamutConversionFn, luminanceFn, hdr_white_nits, &jobQueue]() -> void { size_t rowStart, rowEnd; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < dest->width; ++x) { Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y); Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma); Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma); float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits; Loading @@ -735,11 +826,52 @@ status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_4 hdr_rgb = hdrGamutConversionFn(hdr_rgb); float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits; size_t pixel_idx = x + y * map_width; size_t pixel_idx = x + y * dest->width; reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] = encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor); } } } }; std::vector<std::thread> workers; for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(computeMetadata)); } // compute metadata for (size_t rowStart = 0; rowStart < image_height;) { size_t rowEnd = std::min(rowStart + rowStep, image_height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); computeMetadata(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); workers.clear(); hdr_y_nits_avg /= image_width * image_height; metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits; if (metadata->transferFunction == JPEGR_TF_PQ) { metadata->hdr10Metadata.maxFALL = hdr_y_nits_avg; metadata->hdr10Metadata.maxCLL = hdr_y_nits_max; } // generate map jobQueue.reset(); for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(generateMap)); } rowStep = (threads == 1 ? image_height : kJobSzInRows) / kMapDimensionScaleFactor; for (size_t rowStart = 0; rowStart < map_height;) { size_t rowEnd = std::min(rowStart + rowStep, map_height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); generateMap(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); map_data.release(); return NO_ERROR; Loading @@ -756,13 +888,17 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ return ERROR_JPEGR_INVALID_NULL_PTR; } dest->width = uncompressed_yuv_420_image->width; dest->height = uncompressed_yuv_420_image->height; ShepardsIDW idwTable(kMapDimensionScaleFactor); JobQueue jobQueue; std::function<void()> applyRecMap = [uncompressed_yuv_420_image, uncompressed_recovery_map, metadata, dest, &jobQueue, &idwTable]() -> void { const float hdr_ratio = metadata->rangeScalingFactor; size_t width = uncompressed_yuv_420_image->width; size_t height = uncompressed_yuv_420_image->height; dest->width = width; dest->height = height; size_t pixel_count = width * height; ColorTransformFn hdrOetf = nullptr; switch (metadata->transferFunction) { case JPEGR_TF_LINEAR: Loading @@ -776,12 +912,12 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ break; case JPEGR_TF_UNSPECIFIED: // Should be impossible to hit after input validation. return ERROR_JPEGR_INVALID_TRANS_FUNC; hdrOetf = identityConversion; } ShepardsIDW idwTable(kMapDimensionScaleFactor); for (size_t y = 0; y < height; ++y) { size_t rowStart, rowEnd; while (jobQueue.dequeueJob(rowStart, rowEnd)) { for (size_t y = rowStart; y < rowEnd; ++y) { for (size_t x = 0; x < width; ++x) { Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y); Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr); Loading @@ -799,7 +935,7 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y, idwTable); } Color rgb_hdr = applyRecovery(rgb_sdr, recovery, metadata->rangeScalingFactor); Color rgb_hdr = applyRecovery(rgb_sdr, recovery, hdr_ratio); Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor); uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr); Loading @@ -808,6 +944,23 @@ status_t RecoveryMap::applyRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_ reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102; } } } }; const int threads = std::clamp(GetCPUCoreCount(), 1, 4); std::vector<std::thread> workers; for (int th = 0; th < threads - 1; th++) { workers.push_back(std::thread(applyRecMap)); } const int rowStep = threads == 1 ? uncompressed_yuv_420_image->height : kJobSzInRows; for (int rowStart = 0; rowStart < uncompressed_yuv_420_image->height;) { int rowEnd = std::min(rowStart + rowStep, uncompressed_yuv_420_image->height); jobQueue.enqueueJob(rowStart, rowEnd); rowStart = rowEnd; } jobQueue.markQueueForEnd(); applyRecMap(); std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); }); return NO_ERROR; } Loading