https://github.com/linbox-team/fflas-ffpack
Raw File
Tip revision: 432071cb5f212d683607ab8f8ff42d31b437fc4a authored by Clement Pernet on 30 July 2016, 17:26:20 UTC
preparing release
Tip revision: 432071c
benchmark-echelon.C
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s

/* Copyright (c) FFLAS-FFPACK
* Written by Clement Pernet <clement.pernet@imag.fr>, from benchmark-pluq by Ziad Sultan
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
*
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the  GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
* ========LICENCE========
*/
#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include "fflas-ffpack/config-blas.h"
#include "fflas-ffpack/fflas/fflas.h"
#include <givaro/modular-balanced.h>
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
#include "fflas-ffpack/utils/args-parser.h"

using namespace std;

typedef Givaro::ModularBalanced<double> Field;

// random generator function:                                    
ptrdiff_t myrandom (ptrdiff_t i) { return rand()%i;}

// pointer object to it:                                                                                               
ptrdiff_t (*p_myrandom)(ptrdiff_t) = myrandom;

typename Field::Element* construct_U(const Field& F, Field::RandIter& G, size_t n, size_t r, std::vector<size_t>& P, size_t commonseed, size_t seed)
{
	size_t lda = n;
	Field::Element *U=new Field::Element[n*n];
    
    FFLAS::ParSeqHelper::Parallel H;

	std::vector<size_t> E(r);
	PARFOR1D(i,r,H, E[i]=i; );
    
    
	srand48(commonseed);
	std::vector<size_t> Z(n);
	PARFOR1D(i,n,H, Z[i]=i;);
    
	P.resize(r);
	for(size_t i=0; i<r; ++i) {
		size_t index=lrand48() % Z.size();
		P[i] = Z[ index ];
		Z.erase(Z.begin()+index);
	}

	PARFOR1D(i,r,H,
             while( F.isZero( G.random(U[ P[i]*lda+P[i] ]) ) ) {}
             for(size_t j=P[i]+1;j<n;++j)
             	G.random(U[ P[i]*lda+j]);
             );
    
    return U;
}

typename Field::Element* construct_L(const Field& F, Field::RandIter& G, size_t m, size_t r, const std::vector<size_t>& P, size_t seed)
{
    FFLAS::ParSeqHelper::Parallel H;

	size_t lda = m;
	size_t taille=m*m;
	Field::Element * L= new Field::Element[taille];
	PARFOR1D(i,taille,H, F.init(L[i],F.zero); );
	
	std::vector<size_t> E(r);
	PARFOR1D(i,r,H, E[i]=i;);
	
	srand48(seed);
	std::vector<size_t> Z(m);
	PARFOR1D(i,m,H, Z[i]=i; );
    
	std::vector<size_t> Q(r);
	for(size_t i=0; i<r; ++i) {
		size_t index=lrand48() % Z.size();
		Q[i] = Z[ index ];
		Z.erase(Z.begin()+index);
	}
	
	for(size_t i=0; i<r; ++i) {
		size_t index=lrand48() % E.size();
		size_t perm = E[ index ];
		
		E.erase(E.begin()+index);
		F.init(L[Q[perm]*lda+P[perm]],F.one);
		for(size_t j=Q[perm]+1;j<m;++j)
			G.random(L[j*lda+P[perm]]);
	}
	return L;
}


typename Field::Element* M_randgen(const Field& F, typename Field::Element* L,typename Field::Element* U, size_t r, size_t m, size_t n)
{
	Field::Element alpha, beta;
	F.init(alpha,1.0);
	F.init(beta,0.0);
	size_t lda = n;
	Field::Element * A = new Field::Element[m*n];

	// Computing produit L * U (ideally should use parallel ftrmm

	/*
	  FFLAS::ftrmm(F,  FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, m,n,1.0, U, lda, L, lda);
	*/

	const FFLAS::CuttingStrategy meth = FFLAS::RECURSIVE;
	const FFLAS::StrategyParameter strat = FFLAS::THREE_D;
	typename FFLAS::ParSeqHelper::Parallel pWH (MAX_THREADS, meth, strat);
	PAR_BLOCK{
	FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
				m,n,r, alpha, L,r, U,
		      lda,beta,A,lda,pWH);
	}
	return L;
}

void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A,
		       size_t * P, size_t * Q, size_t m, size_t n, size_t R)
{

    FFLAS::ParSeqHelper::Parallel H;

	Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
	Field::Element * L, *U;
	L = FFLAS::fflas_new<Field::Element>(m*R);
	U = FFLAS::fflas_new<Field::Element>(R*n);
	
	PARFOR1D (i,m*R,H, F.init(L[i], 0.0); );
	
	PARFOR1D (i,n*R,H, F.init(U[i], 0.0); );
	
	PARFOR1D (i,m*n,H, F.init(X[i], 0.0); );
	
	
	Field::Element zero,one;
	F.init(zero,0.0);
	F.init(one,1.0);
	PARFOR1D (i,R,H,
              for (size_t j=0; j<i; ++j)
              	F.assign ( *(U + i*n + j), zero);
              for (size_t j=i; j<n; ++j)
				F.assign (*(U + i*n + j), *(A+ i*n+j));
              );
	PARFOR1D (j,R,H,
              for (size_t i=0; i<=j; ++i )
              	F.assign( *(L+i*R+j), zero);
              F.assign(*(L+j*R+j), one);
              for (size_t i=j+1; i<m; i++)
              	F.assign( *(L + i*R+j), *(A+i*n+j));
              );
	
	PAR_BLOCK{
#pragma omp task shared(F, P, L)
		FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P);
#pragma omp task shared(F, Q, U)
		FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q);
#pragma omp taskwait
		const FFLAS::CuttingStrategy method = FFLAS::THREE_D;
		typename FFLAS::ParSeqHelper::Parallel pWH (MAX_THREADS, method);
#pragma omp task shared(F, L, U, X)
		FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
			      F.one, L,R, U,n, F.zero, X,n, pWH);

	}
	bool fail = false;
	//  PARFOR1D (size_t i=0; i<m; ++i)
	for(size_t i=0; i<m; ++i)
		for (size_t j=0; j<n; ++j)
			if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
				std::cout << " Initial["<<i<<","<<j<<"] = " << (*(B+i*n+j))
					  << " Result"<<i<<","<<j<<"] = " << (*(X+i*n+j))
					  << std::endl;

				std::stringstream errs;
				errs << " B["<<i<<","<<j<<"] = " << (*(B+i*n+j))
				     << " X["<<i<<","<<j<<"] = " << (*(X+i*n+j))
				     << std::endl;
				std::cout << errs;
				fail=true;
				std::cout<<" end verification"<<std::endl;
				exit(1);
			}
	
	if (fail)
		std::cout<<"FAIL"<<std::endl;
	else
		std::cout<<"PASS"<<std::endl;
	
	FFLAS::fflas_delete( U);
	FFLAS::fflas_delete( L);
	FFLAS::fflas_delete( X);
}



int main(int argc, char** argv) {
	
	size_t iter = 3 ;
	int q = 131071 ;
	Field F(q);
	int m = 2000 ;
	int n = 2000 ;
	size_t r = 2000 ;
	int v = 0;
	//	int p=0;
	int t=MAX_THREADS;
	int NBK = -1;
	int  a=1;
	bool transform = false;
	Argument as[] = {
		{ 'q', "-q Q", "Set the field characteristic (-1 for random).",         TYPE_INT , &q },
		{ 'm', "-m M", "Set the row dimension of A.",      TYPE_INT , &m },
		{ 'n', "-n N", "Set the col dimension of A.",      TYPE_INT , &n },
		{ 'r', "-r R", "Set the rank of matrix A.",            TYPE_INT , &r },
		{ 'i', "-i I", "Set number of repetitions.",            TYPE_INT , &iter },
		{ 'v', "-v V", "Set 1 if need verification of result else 0.",            TYPE_INT , &v },
		{ 't', "-t T", "whether to compute the transformation matrix.", TYPE_BOOL , &transform },
		{ 'a', "-a A", "Algorithm for PLUQ decomposition: 0=LUdivine 1=PLUQ.", TYPE_INT , &a },
		{ 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT , &NBK },
		END_OF_ARGUMENTS
	};
	//		{ 'p', "-p P", "0 for sequential, 1 for 2D iterative,
//2 for 2D rec, 3 for 2D rec adaptive, 4 for 3D rc in-place, 5 for 3D rec, 6 for 3D rec adaptive.", TYPE_INT , &p },
        FFLAS::parseArguments(argc,argv,as);
	FFPACK::FFPACK_LU_TAG LuTag = a?FFPACK::FfpackTileRecursive:FFPACK::FfpackSlabRecursive;
	if (NBK==-1) NBK = t;
	typedef Field::Element Element;
	Element * A, * Acop;
	A = FFLAS::fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);
	Acop = FFLAS::fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);

	Field::Element * U = new Field::Element[n*n];
	// random seed                                                                                         
        ifstream f("/dev/urandom");
        size_t seed1, seed2, seed3,seed4;
        f.read(reinterpret_cast<char*>(&seed1), sizeof(seed1));
        f.read(reinterpret_cast<char*>(&seed2), sizeof(seed2));
        f.read(reinterpret_cast<char*>(&seed3), sizeof(seed3));
        f.read(reinterpret_cast<char*>(&seed4), sizeof(seed4));
	std::vector<size_t> Index_P(r);
	Field::RandIter GG(F, seed1);
	
    PAR_BLOCK{ pfrand(F,GG,m,n,A,m/NBK); }
    
       
       //       std::cout<<"Construct U"<<endl;
       U = construct_U(F,GG, n, r, Index_P, seed4, seed3);
       //       std::cout<<"Construct L"<<endl;
       A = construct_L(F,GG, m, r, Index_P, seed2);
       //       std::cout<<"randgen"<<endl;
       A = M_randgen(F, A, U, r, m, n);
       size_t R=0;
       FFLAS::Timer chrono;
       double time=0.0;
       
//       enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
       size_t maxP, maxQ;
       maxP = m;
       maxQ = n;
       
       size_t *P = FFLAS::fflas_new<size_t>(maxP);
       size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
       
       FFLAS::ParSeqHelper::Parallel H;

       PARFOR1D(i,(size_t)m,H,
                for (size_t j=0; j<(size_t)n; ++j)
                	Acop[i*n+j]= (*(A+i*n+j));
                );
       
       for (size_t i=0;i<=iter;++i){
	       	       
	       PARFOR1D(j,maxP,H, P[j]=0; );
           
	       PARFOR1D(j,maxQ,H, Q[j]=0; );
	       
	       PARFOR1D(k,(size_t)m,H,
                    for (size_t j=0; j<(size_t)n; ++j)
                    	*(A+k*n+j) = *(Acop+k*n+j) ;  
                    );
           
	       chrono.clear();
	       
	       if (i) chrono.start();
// Added by AB 2014-12-15
//#ifdef __FFLASFFPACK_USE_OPENMP
	       r = RowEchelonForm(F,m,n,A,n,P,Q,transform,LuTag);
	       if (i) {chrono.stop(); time+=chrono.realtime();}
       }
  
	// -----------
	// Standard output for benchmark - Alexis Breust 2014/11/14
	#define CUBE(x) ((x)*(x)*(x))
       double gflop =  2.0/3.0*CUBE(double(r)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0  - double(r)/1000.0*double(r)/1000.0*(m+n)/1000;
       if (transform)
	       gflop += CUBE(double(r)/1000.0)/3.0 + double(r)/1000.0*double(r)/1000.0*double(n-r)/1000.0;
       std::cout << "Time: " << time / double(iter)  << " Gflops: " << gflop / time * double(iter-1);
       FFLAS::writeCommandString(std::cout, as) << std::endl;
       
       //verification
       if(v)
	       verification_PLUQ(F,Acop,A,P,Q,m,n,R);
       FFLAS::fflas_delete( A);
       FFLAS::fflas_delete( Acop);
       
       return 0;
}

back to top