https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
benchmark-fgemv.C
/* Copyright (c) FFLAS-FFPACK
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 */

//#include "goto-def.h"

// declare that the call to openblas_set_numthread will be made here, hence don't do it
// everywhere in the call stack
#define __FFLASFFPACK_OPENBLAS_NT_ALREADY_SET 1

#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include <givaro/modular-balanced.h>

#include "fflas-ffpack/config-blas.h"
#include "fflas-ffpack/fflas/fflas.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/args-parser.h"

#include "fflas-ffpack/utils/fflas_io.h"
#include "fflas-ffpack/utils/test-utils.h"

#include "fflas-ffpack/utils/timer.h"
#include "givaro/modular-integer.h"
#include "givaro/givcaster.h"

using namespace FFPACK;

using namespace std;
using namespace FFLAS;

template <typename Field>
struct need_field_characteristic { static constexpr bool value = false; };
template <typename Field>
struct need_field_characteristic<Givaro::Modular<Field>>{ static constexpr bool value = true; };
template <typename Field>
struct need_field_characteristic<Givaro::ModularBalanced<Field>>{ static constexpr bool value = true; };

template <typename Field>
struct compatible_data_type { static constexpr bool value = true; };
template <>
struct compatible_data_type<Givaro::ZRing<float>>{ static constexpr bool value = false; };
template <>
struct compatible_data_type<Givaro::ZRing<double>>{ static constexpr bool value = false; };


template <class Field, class RandIter, class Matrix, class Vector>
void fill_value(Field& F, RandIter& Rand, 
		Matrix& A, Vector& X, Vector& Y,
		size_t m, size_t k, size_t incX, size_t incY, size_t lda, int NBK){
  // TODO: replace by a 1D pfrand
  SYNCH_GROUP(
	      FORBLOCK1D(iter, m, SPLITTER(NBK, CuttingStrategy::Row, StrategyParameter::Threads),
			 TASK(MODE(CONSTREFERENCE(F,Rand,A)),
			      {
                    frand(F, Rand, iter.end()-iter.begin(), k, A+iter.begin()*lda, lda);
			      }
			      );
			 );
	      );
  //FFLAS::pfrand(F,Rand, m,k,A,m/NBK);
  FFLAS::frand(F,Rand, k,1,X,incX);
  FFLAS::fzero(F, m,1,Y,incY);
}

template <class Field, class Matrix, class Vector>
void genData(Field& F, 
	     Matrix& A, Vector& X, Vector& Y,
	     size_t m, size_t k, size_t incX, size_t incY, size_t lda, int NBK,
	     int bitsize, uint64_t seed){
  typename Field::RandIter Rand(F,bitsize,seed);
  fill_value(F, Rand, A, X, Y, m, k, incX, incY, lda, NBK);
}

template <class Field, class Matrix, class Vector>
bool check_result(Field& F, size_t m, size_t lda, Matrix& A, Vector& X, size_t incX, Vector& Y, size_t incY){
  //Naive result checking by comparing result from pfgemv against the one from fgemv
  typename Field::Element_ptr Y2 = FFLAS::fflas_new(F,m,1);
  FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y2,  incY);

  for(size_t j=0; j<m; ++j){
    if(!F.areEqual(Y2[j],Y[j])){
      FFLAS::fflas_delete(Y2);
      return false;
    }
  }
  FFLAS::fflas_delete(Y2);
  return true;
}


template <class Field, class Matrix, class Vector>
bool benchmark_with_timer(Field& F, int p, Matrix& A, Vector& X, Vector& Y, size_t m, size_t k, size_t incX,
			  size_t incY, size_t lda, size_t iters, int t, double& time, size_t GrainSize){
  Timer chrono;
  bool pass = true;
  for (size_t i=0;i<=iters;++i){

    chrono.clear();

    if (p){

      typedef CuttingStrategy::Row row;
      typedef CuttingStrategy::Recursive rec;
      typedef StrategyParameter::Threads threads;
      typedef StrategyParameter::Grain grain;

      if (i) { chrono.start(); }

      switch (p){
      case 1:{
	ParSeqHelper::Parallel<rec, threads>  H(t);
	FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y,  incY, H);
	break;}
      case 2:{
	ParSeqHelper::Parallel<row, threads>  H(t);
	FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y,  incY, H);
	break;
      }
      case 3:{
	ParSeqHelper::Parallel<row, grain>  H(GrainSize);
	FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y,  incY, H);
	break;
      }
      default:{
	FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y,  incY);
	break;
      }
      }

      if (i) {chrono.stop(); time+=chrono.realtime();}
    }else{
      if (i) chrono.start();
      FFLAS::fgemv(F, FFLAS::FflasNoTrans, m, lda, F.one, A, lda, X, incX, F.zero, Y,  incY);
      if (i) {chrono.stop(); time+=chrono.realtime();}
    }

    if(!check_result(F, m, lda,  A,  X, incX,  Y, incY)){
      pass = false;
      break;
    }

  }
  return pass;
}

template <class Field, class arg>
void benchmark_disp(Field& F, bool pass, double& time, size_t iters, int p,  size_t m, size_t k, arg& as){
  if(pass){
    std::cout << "Time: " << time / double(iters)
	      << " Gflops: " << (2.*double(m)/1000.*double(k)/1000.0/1000.0) / time * double(iters);
    writeCommandString(std::cout, as) << std::endl;
  }else{
    std::cout<<"FAILED for "<<typeid(Field).name()<<std::endl;
    std::cout << "p:=" << p << ';'<<std::endl;
  }
}


template <class Field, class arg>
void benchmark_in_Field(Field& F, int p,  size_t m, size_t k, int NBK, int bitsize, uint64_t seed, size_t iters,
			int t, arg& as, size_t GrainSize){
  double time=0.0;
  size_t lda,incX,incY;
  lda=k;
  incX=1;
  incY=1;
  typename Field::Element_ptr A,X,Y;

    A = FFLAS::fflas_new(F,m,lda);
    X = FFLAS::fflas_new(F,k,incX);
    Y = FFLAS::fflas_new(F,m,incY);

    genData(F, A, X, Y, m, k, incX, incY, lda, NBK, bitsize, seed);

    bool pass=benchmark_with_timer( F, p, A, X, Y, m, k, incX, incY, lda, iters, t, time, GrainSize);

    benchmark_disp(F, pass, time, iters, p, m, k, as);

    FFLAS::fflas_delete(A);
    FFLAS::fflas_delete(X);
    FFLAS::fflas_delete(Y);    

}

template <class Field,  class arg >
void benchmark_with_field(int p,  size_t m, size_t k, int NBK, int bitsize, uint64_t seed, size_t iters,
			  int t, arg& as, size_t GrainSize){
  Field F;
  //static assert to raise compile time error for Non ZRing without providing a characteristic
  static_assert(!need_field_characteristic<Field>::value,
		"A field characteristic should be provided for Non ZRing data type !");
  //static assert to raise compile time error for ZRing with either float or double that could lead to inconsistent result
  static_assert(compatible_data_type<Field>::value,
		"The provided data type for ZRing is not compatible for the desired operation and could lead to inconsistent result !");

  benchmark_in_Field(F, p,  m, k, NBK, bitsize, seed, iters, t, as, GrainSize);

}

template <class Field, class arg>
void benchmark_with_field(const Givaro::Integer& q, int p,  size_t m, size_t k,
			  int NBK, int bitsize, uint64_t seed, size_t iters, int t,
			  arg& as, size_t GrainSize){
    Field  F(q);
    benchmark_in_Field(F, p,  m, k, NBK, bitsize, seed, iters, t, as, GrainSize);
}

int main(int argc, char** argv) {
 
#ifdef __FFLASFFPACK_OPENBLAS_NUM_THREADS
    openblas_set_num_threads(__FFLASFFPACK_OPENBLAS_NUM_THREADS);
#endif

    int p=0;

  size_t iters = 3;
  Givaro::Integer q = 131071;
  size_t m = 4000;
  size_t k = 4000;

  uint64_t seed = getSeed();
  int t;
  PAR_BLOCK { t = NUM_THREADS; }
  int NBK = -1;
  int b=0;
  size_t GrainSize = 64;

  Argument as[] = {
    { 'q', "-q Q", "Set the field characteristic (-1 for random).",                 TYPE_INTEGER , &q },
    { 'b', "-b B", "Set the bitsize of input.",                                     TYPE_INT , &b },
    { 'p', "-p P", "0 for sequential, 1 for <Recursive,Thread>, 2 for <Row,Thread>, 3 for <Row,Grain>.",
                                                                                    TYPE_INT , &p },
    { 'm', "-m M", "Set the dimension m of the matrix.",                            TYPE_INT , &m },
    { 'k', "-k K", "Set the dimension k of the matrix.",                            TYPE_INT , &k },
    { 't', "-t T", "number of virtual threads to drive the partition.",             TYPE_INT , &t },
    { 'N', "-n N", "number of numa blocks per dimension for the numa placement",    TYPE_INT , &NBK },
    { 'i', "-i R", "Set number of repetitions.",                                    TYPE_INT , &iters },
    { 's', "-s S", "Sets seed.",                            				        TYPE_INT , &seed },
    { 'g', "-g G", "Sets GrainSize.",                            			        TYPE_INT , &GrainSize },
    END_OF_ARGUMENTS
  };

  parseArguments(argc,argv,as);

  if (NBK==-1) NBK = t;
  if(q==0){
    PAR_BLOCK {
      //benchmark_with_field<Givaro::ZRing<int32_t>>( p,  m, k, NBK, b, seed, iters, t, as);
      benchmark_with_field<Givaro::ZRing<Givaro::Integer>>( p,  m, k, NBK, b, seed, iters, t, as, GrainSize);
    }
  }else{
    PAR_BLOCK {
      //benchmark_with_field<Givaro::Modular<float>>(q, p,  m, k, NBK, b, seed, iters, t, as);
      //benchmark_with_field<Givaro::Modular<double>>(q, p,  m, k, NBK, b, seed, iters, t, as);
      //benchmark_with_field<Givaro::Modular<int32_t>>(q, p,  m, k, NBK, b, seed, iters, t, as);

      //benchmark_with_field<Givaro::Modular<Givaro::Integer>>(q, p,  m, k, NBK, b, seed, iters, t, as);

      //benchmark_with_field<Givaro::ModularBalanced<float>>(q, p,  m, k, NBK, b, seed, iters, t, as);
      benchmark_with_field<Givaro::ModularBalanced<double>>(q, p,  m, k, NBK, b, seed, iters, t, as, GrainSize);
      //benchmark_with_field<Givaro::ModularBalanced<int32_t>>(q, p,  m, k, NBK, b, seed, iters, t, as);
    }
  }

  return 0;
}
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top