Revision 53f2e14fc0fc015621bb0f49ea216c2c513ee2eb authored by johannes hanika on 23 December 2016, 10:38:36 UTC, committed by johannes hanika on 23 December 2016, 10:38:36 UTC
1 parent 4d54ba5
Raw File
histogram.c
/*
    This file is part of darktable,
    copyright (c) 2014-2016 LebedevRI.

    darktable is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    darktable is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with darktable.  If not, see <http://www.gnu.org/licenses/>.
*/
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#include <assert.h>
#include <stdlib.h>

#include "common/darktable.h"
#include "common/histogram.h"
#include "develop/imageop.h"

#define S(V, params) ((params->mul) * ((float)V))
#define P(V, params) (CLAMP((V), 0, (params->bins_count - 1)))
#define PU(V, params) (MIN((V), (params->bins_count - 1)))
#define PS(V, params) (P(S(V, params), params))

//------------------------------------------------------------------------------

inline static void histogram_helper_cs_RAW_helper_process_pixel_float(
    const dt_dev_histogram_collection_params_t *const histogram_params, const float *pixel, uint32_t *histogram)
{
  const uint32_t i = PS(*pixel, histogram_params);
  histogram[4 * i]++;
}

inline static void histogram_helper_cs_RAW(const dt_dev_histogram_collection_params_t *const histogram_params,
                                           const void *pixel, uint32_t *histogram, int j)
{
  const dt_histogram_roi_t *roi = histogram_params->roi;
  const float *input = (float *)pixel + roi->width * j + roi->crop_x;
  for(int i = 0; i < roi->width - roi->crop_width - roi->crop_x; i++, input++)
  {
    histogram_helper_cs_RAW_helper_process_pixel_float(histogram_params, input, histogram);
  }
}

//------------------------------------------------------------------------------

// WARNING: you must ensure that bins_count is big enough
inline static void histogram_helper_cs_RAW_helper_process_pixel_uint16(
    const dt_dev_histogram_collection_params_t *const histogram_params, const uint16_t *pixel, uint32_t *histogram)
{
  const uint16_t i = PU(*pixel, histogram_params);
  histogram[4 * i]++;
}

inline void dt_histogram_helper_cs_RAW_uint16(const dt_dev_histogram_collection_params_t *const histogram_params,
                                              const void *pixel, uint32_t *histogram, int j)
{
  const dt_histogram_roi_t *roi = histogram_params->roi;
  uint16_t *in = (uint16_t *)pixel + roi->width * j + roi->crop_x;

  // process pixels
  for(int i = 0; i < roi->width - roi->crop_width - roi->crop_x; i++, in++)
    histogram_helper_cs_RAW_helper_process_pixel_uint16(histogram_params, in, histogram);
}

//------------------------------------------------------------------------------

inline static void __attribute__((__unused__)) histogram_helper_cs_rgb_helper_process_pixel_float(
    const dt_dev_histogram_collection_params_t *const histogram_params, const float *pixel, uint32_t *histogram)
{
  const uint32_t R = PS(pixel[0], histogram_params);
  const uint32_t G = PS(pixel[1], histogram_params);
  const uint32_t B = PS(pixel[2], histogram_params);
  histogram[4 * R]++;
  histogram[4 * G + 1]++;
  histogram[4 * B + 2]++;
}

#if defined(__SSE2__)
inline static void histogram_helper_cs_rgb_helper_process_pixel_m128(
    const dt_dev_histogram_collection_params_t *const histogram_params, const float *pixel, uint32_t *histogram)
{
  const __m128 scale = _mm_set1_ps(histogram_params->mul);
  const __m128 val_min = _mm_setzero_ps();
  const __m128 val_max = _mm_set1_ps(histogram_params->bins_count - 1);

  assert(dt_is_aligned(pixel, 16));
  const __m128 input = _mm_load_ps(pixel);
  const __m128 scaled = _mm_mul_ps(input, scale);
  const __m128 clamped = _mm_max_ps(_mm_min_ps(scaled, val_max), val_min);

  const __m128i indexes = _mm_cvtps_epi32(clamped);

  __m128i values __attribute__((aligned(16)));
  _mm_store_si128(&values, indexes);

  const uint32_t *valuesi = (uint32_t *)(&values);

  histogram[4 * valuesi[0]]++;
  histogram[4 * valuesi[1] + 1]++;
  histogram[4 * valuesi[2] + 2]++;
}
#endif

inline static void histogram_helper_cs_rgb(const dt_dev_histogram_collection_params_t *const histogram_params,
                                           const void *pixel, uint32_t *histogram, int j)
{
  const dt_histogram_roi_t *roi = histogram_params->roi;
  float *in = (float *)pixel + 4 * (roi->width * j + roi->crop_x);

  // process aligned pixels with SSE
  for(int i = 0; i < roi->width - roi->crop_width - roi->crop_x; i++, in += 4)
  {
    if(darktable.codepath.OPENMP_SIMD)
      histogram_helper_cs_rgb_helper_process_pixel_float(histogram_params, in, histogram);
#if defined(__SSE2__)
    else if(darktable.codepath.SSE2)
      histogram_helper_cs_rgb_helper_process_pixel_m128(histogram_params, in, histogram);
#endif
    else
      dt_unreachable_codepath();
  }
}

//------------------------------------------------------------------------------

inline static void __attribute__((__unused__)) histogram_helper_cs_Lab_helper_process_pixel_float(
    const dt_dev_histogram_collection_params_t *const histogram_params, const float *pixel, uint32_t *histogram)
{
  const float Lv = pixel[0];
  const float av = pixel[1];
  const float bv = pixel[2];
  const float max = histogram_params->bins_count - 1;
  const uint32_t L = CLAMP(histogram_params->mul / 100.0f * (Lv), 0, max);
  const uint32_t a = CLAMP(histogram_params->mul / 256.0f * (av + 128.0f), 0, max);
  const uint32_t b = CLAMP(histogram_params->mul / 256.0f * (bv + 128.0f), 0, max);
  histogram[4 * L]++;
  histogram[4 * a + 1]++;
  histogram[4 * b + 2]++;
}

#if defined(__SSE2__)
inline static void histogram_helper_cs_Lab_helper_process_pixel_m128(
    const dt_dev_histogram_collection_params_t *const histogram_params, const float *pixel, uint32_t *histogram)
{
  const float fscale = histogram_params->mul;

  const __m128 shift = _mm_set_ps(0.0f, 128.0f, 128.0f, 0.0f);
  const __m128 scale = _mm_set_ps(fscale / 1.0f, fscale / 256.0f, fscale / 256.0f, fscale / 100.0f);
  const __m128 val_min = _mm_setzero_ps();
  const __m128 val_max = _mm_set1_ps(histogram_params->bins_count - 1);

  assert(dt_is_aligned(pixel, 16));
  const __m128 input = _mm_load_ps(pixel);
  const __m128 shifted = _mm_add_ps(input, shift);
  const __m128 scaled = _mm_mul_ps(shifted, scale);
  const __m128 clamped = _mm_max_ps(_mm_min_ps(scaled, val_max), val_min);

  const __m128i indexes = _mm_cvtps_epi32(clamped);

  __m128i values __attribute__((aligned(16)));
  _mm_store_si128(&values, indexes);

  const uint32_t *valuesi = (uint32_t *)(&values);

  histogram[4 * valuesi[0]]++;
  histogram[4 * valuesi[1] + 1]++;
  histogram[4 * valuesi[2] + 2]++;
}
#endif

inline static void histogram_helper_cs_Lab(const dt_dev_histogram_collection_params_t *const histogram_params,
                                           const void *pixel, uint32_t *histogram, int j)
{
  const dt_histogram_roi_t *roi = histogram_params->roi;
  float *in = (float *)pixel + 4 * (roi->width * j + roi->crop_x);

  // process aligned pixels with SSE
  for(int i = 0; i < roi->width - roi->crop_width - roi->crop_x; i++, in += 4)
  {
    if(darktable.codepath.OPENMP_SIMD)
      histogram_helper_cs_Lab_helper_process_pixel_float(histogram_params, in, histogram);
#if defined(__SSE2__)
    else if(darktable.codepath.SSE2)
      histogram_helper_cs_Lab_helper_process_pixel_m128(histogram_params, in, histogram);
#endif
    else
      dt_unreachable_codepath();
  }
}

//==============================================================================

void dt_histogram_worker(dt_dev_histogram_collection_params_t *const histogram_params,
                         dt_dev_histogram_stats_t *histogram_stats, const void *const pixel,
                         uint32_t **histogram, const dt_worker Worker)
{
  const int nthreads = omp_get_max_threads();

  const size_t bins_total = (size_t)4 * histogram_params->bins_count;
  const size_t buf_size = bins_total * sizeof(uint32_t);
  void *partial_hists = calloc(nthreads, buf_size);

  if(histogram_params->mul == 0) histogram_params->mul = (double)(histogram_params->bins_count - 1);

  const dt_histogram_roi_t *const roi = histogram_params->roi;

#ifdef _OPENMP
#pragma omp parallel for schedule(static) default(none) shared(partial_hists)
#endif
  for(int j = roi->crop_y; j < roi->height - roi->crop_height; j++)
  {
    uint32_t *thread_hist = (uint32_t *)partial_hists + bins_total * omp_get_thread_num();
    Worker(histogram_params, pixel, thread_hist, j);
  }

#ifdef _OPENMP
  *histogram = realloc(*histogram, buf_size);
  memset(*histogram, 0, buf_size);
  uint32_t *hist = *histogram;

#pragma omp parallel for schedule(static) default(none) shared(hist, partial_hists)
  for(size_t k = 0; k < bins_total; k++)
  {
    for(size_t n = 0; n < nthreads; n++)
    {
      const uint32_t *thread_hist = (uint32_t *)partial_hists + bins_total * n;
      hist[k] += thread_hist[k];
    }
  }
#else
  *histogram = realloc(*histogram, buf_size);
  memmove(*histogram, partial_hists, buf_size);
#endif
  free(partial_hists);

  histogram_stats->bins_count = histogram_params->bins_count;
  histogram_stats->pixels = (roi->width - roi->crop_width - roi->crop_x)
                            * (roi->height - roi->crop_height - roi->crop_y);
}

//------------------------------------------------------------------------------

void dt_histogram_helper(dt_dev_histogram_collection_params_t *histogram_params,
                         dt_dev_histogram_stats_t *histogram_stats, dt_iop_colorspace_type_t cst,
                         const void *pixel, uint32_t **histogram)
{
  switch(cst)
  {
    case iop_cs_RAW:
      dt_histogram_worker(histogram_params, histogram_stats, pixel, histogram, histogram_helper_cs_RAW);
      histogram_stats->ch = 1u;
      break;

    case iop_cs_rgb:
      dt_histogram_worker(histogram_params, histogram_stats, pixel, histogram, histogram_helper_cs_rgb);
      histogram_stats->ch = 3u;
      break;

    case iop_cs_Lab:
    default:
      dt_histogram_worker(histogram_params, histogram_stats, pixel, histogram, histogram_helper_cs_Lab);
      histogram_stats->ch = 3u;
      break;
  }
}

void dt_histogram_max_helper(const dt_dev_histogram_stats_t *const histogram_stats,
                             dt_iop_colorspace_type_t cst, uint32_t **histogram, uint32_t *histogram_max)
{
  if(*histogram == NULL) return;
  histogram_max[0] = histogram_max[1] = histogram_max[2] = histogram_max[3] = 0;
  uint32_t *hist = *histogram;
  switch(cst)
  {
    case iop_cs_RAW:
      for(int k = 0; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[0] = histogram_max[0] > hist[k] ? histogram_max[0] : hist[k];
      break;

    case iop_cs_rgb:
      // don't count <= 0 pixels
      for(int k = 4; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[0] = histogram_max[0] > hist[k] ? histogram_max[0] : hist[k];
      for(int k = 5; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[1] = histogram_max[1] > hist[k] ? histogram_max[1] : hist[k];
      for(int k = 6; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[2] = histogram_max[2] > hist[k] ? histogram_max[2] : hist[k];
      for(int k = 7; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[3] = histogram_max[3] > hist[k] ? histogram_max[3] : hist[k];
      break;

    case iop_cs_Lab:
    default:
      // don't count <= 0 pixels in L
      for(int k = 4; k < 4 * histogram_stats->bins_count; k += 4)
        histogram_max[0] = histogram_max[0] > hist[k] ? histogram_max[0] : hist[k];

      // don't count <= -128 and >= +128 pixels in a and b
      for(int k = 5; k < 4 * (histogram_stats->bins_count - 1); k += 4)
        histogram_max[1] = histogram_max[1] > hist[k] ? histogram_max[1] : hist[k];
      for(int k = 6; k < 4 * (histogram_stats->bins_count - 1); k += 4)
        histogram_max[2] = histogram_max[2] > hist[k] ? histogram_max[2] : hist[k];
      break;
  }
}

// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
// vim: shiftwidth=2 expandtab tabstop=2 cindent
// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;
back to top