https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
benchmark-fgemm-mp.C
/*
 * Copyright (C) FFLAS-FFPACK
 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr>
 *
 * This file is Free Software and part of FFLAS-FFPACK.
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */

// declare that the call to openblas_set_numthread will be made here, hence don't do it
// everywhere in the call stack
#define __FFLASFFPACK_OPENBLAS_NT_ALREADY_SET 1


#if not defined(MG_DEFAULT)
#define MG_DEFAULT MG_ACTIVE
#endif
#if not defined(STD_RECINT_SIZE)
#define STD_RECINT_SIZE 8
#endif

#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include <typeinfo>
#include <vector>
#include <string>
using namespace std;

#include "fflas-ffpack/fflas/fflas.h"
#include "fflas-ffpack/utils/fflas_io.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/args-parser.h"
#include "givaro/modular-integer.h"
#include "givaro/givcaster.h"
#include "fflas-ffpack/paladin/parallel.h"
#ifdef BENCH_RECINT
#include "recint/recint.h"
#endif

#ifdef	BENCH_FLINT
#define __GMP_BITS_PER_MP_LIMB 64
extern "C" {
#include "flint/longlong.h"
#include "flint/long_extras.h"
#include "flint/fmpz_mat.h"
#include "flint/fmpz.h"
#include "flint/flint.h"
}
#endif


static size_t iters = 3 ;
static Givaro::Integer q = -1 ;
static unsigned long b = 512 ;
static size_t m = 512 ;
static size_t k = 512 ;
static size_t n = 512 ;
static int nbw = -1 ;
static size_t seed= time(NULL);
static Argument as[] = {
    { 'q', "-q Q", "Set the field characteristic (-1 for random).",         TYPE_INTEGER , &q },
    { 'b', "-b B", "Set the bitsize of the random characteristic.",         TYPE_INT , &b },
    { 'm', "-m M", "Set the dimension m of the matrix.",                    TYPE_INT , &m },
    { 'k', "-k K", "Set the dimension k of the matrix.",                    TYPE_INT , &k },
    { 'n', "-n N", "Set the dimension n of the matrix.",                    TYPE_INT , &n },
    { 'w', "-w N", "Set the number of winograd levels (-1 for random).",    TYPE_INT , &nbw },
    { 'i', "-i R", "Set number of repetitions.",                            TYPE_INT , &iters },
    { 's', "-s S", "Sets seed.",                            				TYPE_INT , &seed },
    END_OF_ARGUMENTS
};

template<typename Ints>
int tmain(){
    srand( (int)seed);
    srand48(seed);
    Givaro::Integer::seeding(seed);

    typedef Givaro::Modular<Ints> Field;
    Givaro::Integer p;
    FFLAS::Timer chrono, TimFreivalds;
    double time=0.,timev=0.;
#ifdef BENCH_FLINT
    double timeFlint=0.;
#endif
    for (size_t loop=0;loop<iters;loop++){
        Givaro::Integer::random_exact_2exp(p, b);
        Givaro::IntPrimeDom IPD;
        IPD.nextprimein(p);
        Ints ip; Givaro::Caster<Ints,Givaro::Integer>(ip,p);
        Givaro::Caster<Givaro::Integer,Ints>(p,ip); // to check consistency

        Field F(ip);
        size_t lda,ldb,ldc;
        lda=k;
        ldb=n;
        ldc=n;

        typename Field::RandIter Rand(F,seed);
        typename Field::Element_ptr A,B,C;
        A= FFLAS::fflas_new(F,m,lda);
        B= FFLAS::fflas_new(F,k,ldb);
        C= FFLAS::fflas_new(F,m,ldc);

        // 		for (size_t i=0;i<m;++i)
        // 			for (size_t j=0;j<k;++j)
        // 				Rand.random(A[i*lda+j]);
        // 		for (size_t i=0;i<k;++i)
        // 			for (size_t j=0;j<n;++j)
        // 				Rand.random(B[i*ldb+j]);
        // 		for (size_t i=0;i<m;++i)
        // 			for (size_t j=0;j<n;++j)
        // 				Rand.random(C[i*ldc+j]);

        PAR_BLOCK { FFLAS::pfrand(F,Rand, m,k,A,m/size_t(MAX_THREADS)); }
        PAR_BLOCK { FFLAS::pfrand(F,Rand, k,n,B,k/MAX_THREADS); }
        PAR_BLOCK { FFLAS::pfzero(F, m,n,C,m/MAX_THREADS); }


        Ints alpha,beta;
        alpha=F.one;
        beta=F.zero;


#ifdef	BENCH_FLINT
        // FLINT MUL //
        fmpz_t modp,tmp;
        fmpz_init(modp);
        fmpz_init(tmp);
        fmpz_set_mpz(modp, *(reinterpret_cast<const mpz_t*>(&p)));
        fmpz_mat_t AA,BB,CC,DD;
        fmpz_mat_init (AA, m, k);
        fmpz_mat_init (BB, k, n);
        fmpz_mat_init (CC, m, n);
        fmpz_mat_init (DD, m, n);
        fmpz_t aalpha, bbeta;
        fmpz_set_mpz(aalpha,*(reinterpret_cast<const mpz_t*>(&alpha)));
        fmpz_set_mpz(bbeta,*(reinterpret_cast<const mpz_t*>(&beta)));

        for (size_t i=0;i<m;++i)
            for (size_t j=0;j<k;++j)
                fmpz_set_mpz(fmpz_mat_entry(AA,i,j),*(reinterpret_cast<const mpz_t*>(A+i*lda+j)));
        for (size_t i=0;i<k;++i)
            for (size_t j=0;j<n;++j)
                fmpz_set_mpz(fmpz_mat_entry(BB,i,j),*(reinterpret_cast<const mpz_t*>(B+i*ldb+j)));
        for (size_t i=0;i<m;++i)
            for (size_t j=0;j<n;++j)
                fmpz_set_mpz(fmpz_mat_entry(CC,i,j),*(reinterpret_cast<const mpz_t*>(C+i*ldc+j)));
        chrono.clear();chrono.start();
        // DD= A.B
        fmpz_mat_mul(DD,AA,BB);
        // CC = beta.C
        fmpz_mat_scalar_mul_fmpz(CC,CC,bbeta);
        // CC = CC + DD.alpha
        fmpz_mat_scalar_addmul_fmpz(CC,DD,aalpha);
        // CC = CC mod p
        for (size_t i=0;i<m;++i)
            for (size_t j=0;j<n;++j)
                fmpz_mod(fmpz_mat_entry(CC,i,j),fmpz_mat_entry(CC,i,j),modp);

        chrono.stop();
        timeFlint+=chrono.usertime();
        fmpz_mat_clear(AA);
        fmpz_mat_clear(BB);
#endif
        //END FLINT CODE //
        using  FFLAS::CuttingStrategy::Recursive;
        using  FFLAS::StrategyParameter::TwoDAdaptive;
        // RNS MUL_LA
        chrono.clear();chrono.start();
        // 		PAR_BLOCK{
        //             FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc, SPLITTER(NUM_THREADS,Recursive,TwoDAdaptive) );
        // 		}
        {
            FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential());
        }

        chrono.stop();
        time+=chrono.realtime();

        TimFreivalds.start();
        bool pass = FFLAS::freivalds(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,k, alpha, A, k, B, n, C,n);
        TimFreivalds.stop();
        timev+=TimFreivalds.usertime();
        if (!pass) {
            std::cout<<"FAILED"<<std::endl;
            std::cout << "p:=" << p << ';'<<std::endl;
            FFLAS::WriteMatrix (std::cout<<"A:=",F,m,k,A,lda)<<';'<<std::endl;
            FFLAS::WriteMatrix(std::cout<<"B:=",F,k,n,B,ldb)<<';'<<std::endl;
            FFLAS::WriteMatrix(std::cout<<"C:=",F,m,n,C,ldc)<<';'<<std::endl;
        }

        FFLAS::fflas_delete(A);
        FFLAS::fflas_delete(B);
        FFLAS::fflas_delete(C);

    }

    double Gflops=(2.*double(m)/1000.*double(n)/1000.*double(k)/1000.0) / time * double(iters);
    // 	Gflops*=p.bitsize()/16.;
    cout  << "Time: "<< (time/double(iters))
    <<" Gfops: "<<Gflops
    << " (total:" << time <<") "
    <<typeid(Ints).name()
    <<"  | perword: "<< (Gflops*double(p.bitsize()))/64. ;

    FFLAS::writeCommandString(std::cout << '|' << p << " (" << p.bitsize()<<")|", as) << "  | Freivalds: "<< timev/double(iters) << std::endl;

#ifdef BENCH_FLINT
    cout<<"Time FLINT: "<<timeFlint<<endl;
#endif
    return 0;
}

int main(int argc, char** argv){
  
#ifdef __FFLASFFPACK_OPENBLAS_NUM_THREADS
    openblas_set_num_threads(__FFLASFFPACK_OPENBLAS_NUM_THREADS);
#endif

    FFLAS::parseArguments(argc,argv,as);

    int r1 = tmain<Givaro::Integer>();

#ifdef BENCH_RECINT
    r1 += tmain<RecInt::rint<STD_RECINT_SIZE>>();
#endif
    return r1;
}

/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top