Content - c67350cc6e3bace22916294701870aae98e45f21 - ffda410/filter/singleton_removal.c

visit type:
Tip revision: d5a5c566e3ab7037e0960b1441613062ade661c9 authored by Alexander Kruppa on 29 March 2021, 19:23:48 UTC
Merge branch 'torture_redc_timing' into 'master'
Tip revision: d5a5c56
singleton_removal.c
#include "cado.h" // IWYU pragma: keep
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>         // for errno
#include <inttypes.h>      // for PRIu64, PRId64
#include <string.h>        // for strerror
#include <pthread.h>
#include "bit_vector.h" // BV_BITS
#include "macros.h"
#include "misc.h"       // for UMAX
#include "purge_matrix.h"
#include "singleton_removal.h"
#include "timing.h"     // seconds
#include "typedefs.h"      // for index_t, weight_t

/* If HAVE_SYNC_FETCH is not defined, we will use mutex for multithreaded
 * version of the code. May be too slow. */
#ifndef HAVE_SYNC_FETCH
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
#endif

/* Code for singletons removal --- monothread version */

#if 1
void singleton_removal_oneiter_mono (purge_matrix_ptr mat)
{
  for (uint64_t i = 0; i < mat->nrows_init; i++)
  {
    index_t h, *row_ptr;
    if (purge_matrix_is_row_active(mat, i))
    {
      for (row_ptr = mat->row_compact[i]; (h = *row_ptr++) != UMAX(h);)
      {
        if (mat->cols_weight[h] == 1)
        {
          /* mat->nrows and mat->ncols are updated by purge_matrix_delete_row.*/
          purge_matrix_delete_row (mat, i);
          break;
        }
      }
    }
  }
}
#else
/* idem, but only removes rows that contain a singleton at the beginning of
   the pass */
void singleton_removal_oneiter_mono (purge_matrix_ptr mat)
{
  char *T = malloc (mat->nrows_init);
  memset (T, 0, mat->nrows_init);
  for (uint64_t i = 0; i < mat->nrows_init; i++)
    {
      index_t h, *row_ptr;
      if (purge_matrix_is_row_active(mat, i))
	for (row_ptr = mat->row_compact[i]; (h = *row_ptr++) != UMAX(h);)
	  if (mat->cols_weight[h] == 1)
	    {
	      T[i] = 1; /* mark row i for deletion */
	      break;
	    }
    }
  for (uint64_t i = 0; i < mat->nrows_init; i++)
    if (T[i])
      purge_matrix_delete_row (mat, i);
  free (T);
}
#endif

/* Code for singletons removal --- multithread version */

typedef struct sing_rem_mt_data_s
{
  /* Read only */
  uint64_t begin, end;
  /* Read / Write */
  uint64_t sup_nrow, sup_ncol;
  purge_matrix_ptr mat;
} sing_rem_mt_data_t;

/* Hightest criticality for performance. I inline all myself. */
static void *
singleton_removal_mt_thread (void *arg)
{
  sing_rem_mt_data_t *data = (sing_rem_mt_data_t *) arg;
  purge_matrix_ptr mat = data->mat;

  data->sup_nrow = data->sup_ncol = 0;
  for (uint64_t i = data->begin; i < data->end; i++)
  {
    if (purge_matrix_is_row_active(mat, i))
    {
      index_t h, *row_ptr;
      for (row_ptr = mat->row_compact[i]; (h = *row_ptr++) != UMAX(h);)
      {
        if (UNLIKELY(mat->cols_weight[h] == 1)) /* We found a singleton */
        {
          for (row_ptr = mat->row_compact[i]; (h = *row_ptr++) != UMAX(h);)
          {
            weight_t *w = &(mat->cols_weight[h]);
            ASSERT(*w);
            /* Decrease only if not equal to the maximum value */
            /* If weight becomes 0, we just remove a column */
#ifdef HAVE_SYNC_FETCH
            if (*w < UMAX(*w) && !__sync_sub_and_fetch(w, 1))
              (data->sup_ncol)++;
#else /* else we use mutex to protect the subtraction on w */
            pthread_mutex_lock (&lock);
            if (*w < UMAX(*w) && !(--(*w)))
              (data->sup_ncol)++;
            pthread_mutex_unlock (&lock);
#endif
          }
          /* We do not free mat->row_compact[i] as it is freed later with
             my_malloc_free_all */
          purge_matrix_set_row_inactive (mat, i); /* mark as deleted */
          (data->sup_nrow)++;
          break;
        }
      }
    }
  }
  return NULL;
}

void
singleton_removal_oneiter_mt (purge_matrix_ptr mat, unsigned int nthreads)
{
  pthread_attr_t attr;
  pthread_t *threads = NULL;
  sing_rem_mt_data_t *th_data = NULL;
  uint64_t nrows_per_thread, k;
  int err;

  th_data = (sing_rem_mt_data_t *) malloc (nthreads*sizeof(sing_rem_mt_data_t));
  ASSERT_ALWAYS(th_data != NULL);
  threads = (pthread_t *) malloc (nthreads * sizeof(pthread_t));
  ASSERT_ALWAYS(threads != NULL);

  /* nrows_per_thread MUST be a multiple of BV_BITS to ensure that each address
   * of the bit_vector array is accessed by only one thread. */
  nrows_per_thread = (mat->nrows_init / nthreads) & ((uint64_t) ~(BV_BITS - 1));
  k = 0;
  for (unsigned int i = 0; i < nthreads; i++)
  {
    th_data[i].mat = mat;
    th_data[i].begin = k;
    k += nrows_per_thread;
    th_data[i].end = k;
  }
  th_data[nthreads-1].end = mat->nrows_init;

  pthread_attr_init (&attr);
  pthread_attr_setstacksize (&attr, 1 << 16);
  pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE);
  for (unsigned int i = 0; i < nthreads; i++)
  {
    if ((err = pthread_create(&threads[i], &attr, &singleton_removal_mt_thread,
                              (void *) &(th_data[i]))))
    {
      fprintf(stderr, "Error, pthread_create failed in singleton_removal_"
                      "oneiter_mt: %d. %s\n", err, strerror(errno));
      abort();
    }
  }
  for (unsigned int i = 0; i < nthreads; i++)
  {
    pthread_join (threads[i], NULL);
    mat->nrows -= th_data[i].sup_nrow;
    mat->ncols -= th_data[i].sup_ncol;
  }
  pthread_attr_destroy(&attr);
  free(threads);
  free(th_data);
}

/* Perform a complete singleton removal step:  call singleton_removal_oneiter_*
 * until there is no more singleton.
 * Return the excess at the end *
 */
int64_t
singleton_removal (purge_matrix_ptr mat, unsigned int nthreads, int verbose)
{
  int64_t excess;
  uint64_t oldnrows;
  unsigned int iter = 0;
  do
  {
    oldnrows = mat->nrows;
    excess = purge_matrix_compute_excess (mat);
    if (verbose >= 0) /* if no quiet */
    {
      if (iter == 0)
        fprintf(stdout, "Sing. rem.: begin with: ");
      else
        fprintf(stdout, "Sing. rem.:   iter %03u: ", iter);
      fprintf(stdout, "nrows=%" PRIu64 " ncols=%" PRIu64 " excess=%" PRId64 " "
                      "at %2.2lf\n", mat->nrows, mat->ncols, excess, seconds());
      fflush(stdout);
    }

    if (nthreads > 1)
        singleton_removal_oneiter_mt (mat, nthreads);
    else
        singleton_removal_oneiter_mono (mat);

    iter++;
  } while (oldnrows != mat->nrows);

  if (verbose >= 0) /* if no quiet */
    fprintf(stdout, "Sing. rem.:   iter %03u: No more singletons, finished at "
                    "%2.2lf\n", iter, seconds());

  return excess;
}
Browse the archive

https://gitlab.inria.fr/cado-nfs/cado-nfs