https://github.com/linbox-team/fflas-ffpack
Raw File
Tip revision: 6cf3257580d8b93b1519468e97cf4739c1379be6 authored by Clement Pernet on 08 April 2016, 17:10:57 UTC
update Changelog for 2.2.1
Tip revision: 6cf3257
benchmark-pluq.C
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
//#include "goto-def.h"

/* Copyright (c) FFLAS-FFPACK
* Written by Ziad Sultan <ziad.sultan@imag.fr>
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
*
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the  GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
* ========LICENCE========
*/

//#define __FFLASFFPACK_USE_OPENMP
//#define __FFLASFFPACK_USE_TBB

//#define __FFLASFFPACK_USE_DATAFLOW
//#define  __FFLASFFPACK_FORCE_SEQ
//#define WINOPAR_KERNEL
//#define CLASSIC_SEQ
#include "fflas-ffpack/fflas-ffpack-config.h"
#include <givaro/modular.h>
#include <givaro/givranditer.h>
#include <iostream>

#include "fflas-ffpack/config-blas.h"
#include "fflas-ffpack/fflas/fflas.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/fflas_randommatrix.h"
#include "fflas-ffpack/utils/Matio.h"
#include "fflas-ffpack/utils/args-parser.h"
#include "fflas-ffpack/ffpack/ffpack.h"

#ifdef __FFLASFFPACK_USE_KAAPI
#include "libkomp.h"
#endif

using namespace std;

//typedef Givaro::ModularBalanced<double> Field;
//typedef Givaro::ModularBalanced<float> Field;
typedef Givaro::ZRing<double> Field;
//typedef Givaro::UnparametricZRing<double> Field;

void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A,
					   size_t * P, size_t * Q, size_t m, size_t n, size_t R)
{
	
	FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> H;
	
	Field::Element_ptr X = FFLAS::fflas_new (F, m,n);
	Field::Element_ptr L, U;
	L = FFLAS::fflas_new(F, m,R);
	U = FFLAS::fflas_new(F, R,n);
	
	PARFOR1D (i, m*R,H, F.init(L[i], 0.0); );
	
	PARFOR1D (i,n*R,H, F.init(U[i], 0.0); );
	
	PARFOR1D (i,m*n,H, F.init(X[i], 0.0); );	
	
	Field::Element zero,one;
	F.init(zero,0.0);
	F.init(one,1.0);
	PARFOR1D (i,R,H,
              for (size_t j=0; j<i; ++j)
				  F.assign ( *(U + i*n + j), zero);
              for (size_t j=i; j<n; ++j)
				  F.assign (*(U + i*n + j), *(A+ i*n+j));
              );
	
	PARFOR1D (j,R,H, 
			  for (size_t i=0; i<=j; ++i )
				  F.assign( *(L+i*R+j), zero);
			  F.assign(*(L+j*R+j), one);
			  for (size_t i=j+1; i<m; i++)
				  F.assign( *(L + i*R+j), *(A+i*n+j));
			  );
	
	PAR_BLOCK{
		SYNCH_GROUP(
			
			TASK(MODE(CONSTREFERENCE(F,P,L)),
				 FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P););
			TASK(MODE(CONSTREFERENCE(F,Q,U)),
				 FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q););
			WAIT;
			typename FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> pWH (MAX_THREADS);

			TASK(MODE(CONSTREFERENCE(F,U,L,X)),
				 FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
							   F.one, L,R, U,n, F.zero, X,n, pWH););
					);
	}
	bool fail = false;
	for(size_t i=0; i<m; ++i)
		for (size_t j=0; j<n; ++j)
			if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
				std::cout << " Initial["<<i<<","<<j<<"] = " << (*(B+i*n+j))
						  << " Result["<<i<<","<<j<<"] = " << (*(X+i*n+j))
						  << std::endl;
				fail=true;
			}
	
	if (fail)
		std::cout<<"FAIL"<<std::endl;
	else
		std::cout<<"PASS"<<std::endl;
	
	FFLAS::fflas_delete( U);
	FFLAS::fflas_delete( L);
	FFLAS::fflas_delete( X);
}

void Rec_Initialize(Field &F, Field::Element * C, size_t m, size_t n, size_t ldc)
{
	if(std::min(m,n) <= ldc/NUM_THREADS){
			for(size_t i=0; i<m; i++)
				FFLAS::fzero(F, 1, n, C+i*n, n);
	}
	else{
    size_t M2 = m >> 1;
	size_t N2 = n >> 1;
	typename Field::Element * C2 = C + N2;
	typename Field::Element * C3 = C + M2*ldc;
	typename Field::Element * C4 = C3 + N2;
	
	SYNCH_GROUP(
				TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C,M2,N2, ldc););
				TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C2,M2,n-N2, ldc););
				TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C3,m-M2,N2, ldc););
				TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C4,m-M2,n-N2, ldc););
		);
	}
}

int main(int argc, char** argv) {
	
	size_t iter = 3 ;
	int q = 131071 ;
	Field F(q);
	int m = 2000 ;
	int n = 2000 ;
	int r = 2000 ;
	int v = 0;
	int t=MAX_THREADS;
	int NBK = -1;
	bool par=true;
	Argument as[] = {
		{ 'q', "-q Q", "Set the field characteristic (-1 for random).",         TYPE_INT , &q },
		{ 'm', "-m M", "Set the row dimension of A.",      TYPE_INT , &m },
		{ 'n', "-n N", "Set the col dimension of A.",      TYPE_INT , &n },
		{ 'r', "-r R", "Set the rank of matrix A.",            TYPE_INT , &r },
		{ 'i', "-i I", "Set number of repetitions.",            TYPE_INT , &iter },
		{ 'v', "-v V", "Set 1 if need verification of result else 0.",            TYPE_INT , &v },
		{ 't', "-t T", "number of virtual threads to drive the partition.", TYPE_INT , &t },
		{ 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT , &NBK },
		{ 'p', "-p P", "whether to run or not the parallel PLUQ", TYPE_BOOL , &par },
		END_OF_ARGUMENTS
	};
	FFLAS::parseArguments(argc,argv,as);

	if (r > std::min(m,n)){
		std::cerr<<"Warning: rank can not be greater than min (m,n). It has been forced to min (m,n)"<<std::endl;
		r=std::min(m,n);
	}
	if (!par) t=1;NBK=1;
	if (NBK==-1) NBK = t;

	Field::Element_ptr A,  Acop;
	A = FFLAS::fflas_new(F,m,n);

	PAR_BLOCK{
		Rec_Initialize(F, A, m, n, n);
		//				FFLAS::pfzero(F,m,n,A,m/NBK);
		FFPACK::RandomMatrixWithRankandRandomRPM (F, A, n, r, m,n);
	}

	size_t R;
	FFLAS::Timer chrono;
	double *time=new double[iter];
    
	enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
	size_t maxP, maxQ;
	maxP = m;
	maxQ = n;
    
	size_t *P = FFLAS::fflas_new<size_t>(maxP);
	size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
       
	FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::TwoDAdaptive> H;
	   
	Acop = FFLAS::fflas_new(F,m,n);
	PARFOR1D(i,(size_t)m,H, 
			 FFLAS::fassign(F, n, A + i*n, 1, Acop + i*n, 1);
				 // for (size_t j=0; j<(size_t)n; ++j)
				 // 	Acop[i*n+j]= A[i*n+j];
			 );
	size_t BC;
	for (size_t i=0;i<=iter;++i){
		
		PARFOR1D(j,maxP,H, P[j]=0; );
		PARFOR1D(j,maxQ,H, Q[j]=0; );
		PARFOR1D(k,(size_t)m,H,
				 FFLAS::fassign(F, n, Acop + k*n, 1, A + k*n, 1);
					 // for (size_t j=0; j<(size_t)n; ++j)
					 //     F.assign( A[k*n+j] , Acop[k*n+j]) ;  
				 );
		chrono.clear();
		
		if (i) chrono.start();
		if (par){
			

			PAR_BLOCK{
				R = FFPACK::pPLUQ(F, diag, m, n, A, n, P, Q, t);
				BC = n/NUM_THREADS;
			}
		}
		else
			R = FFPACK::PLUQ(F, diag, m, n, A, n, P, Q);
		if (i) {chrono.stop(); time[i-1]=chrono.realtime();}
		
	}
	std::sort(time, time+iter);
	double meantime = time[iter/2];
	delete[] time;
		// -----------
		// Standard output for benchmark - Alexis Breust 2014/11/14
#define CUBE(x) ((x)*(x)*(x))
	double gflop =  2.0/3.0*CUBE(double(r)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0  - double(r)/1000.0*double(r)/1000.0*(m+n)/1000;
	std::cout << "Time: " << meantime
			  << " Gflops: " << gflop / meantime << " BC: "<<BC;
	FFLAS::writeCommandString(std::cout, as) << std::endl;
	
		//verification
	if(v)
		verification_PLUQ(F,Acop,A,P,Q,m,n,R);
	
	FFLAS::fflas_delete (P);
	FFLAS::fflas_delete (Q);
	FFLAS::fflas_delete (A);
	FFLAS::fflas_delete (Acop);
	
	return 0;
}

back to top