/* Copyright (c) FFLAS-FFPACK * Written by Clement Pernet , from benchmark-pluq by Ziad Sultan * ========LICENCE======== * This file is part of the library FFLAS-FFPACK. * * FFLAS-FFPACK is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * ========LICENCE======== */ // declare that the call to openblas_set_numthread will be made here, hence don't do it // everywhere in the call stack #define __FFLASFFPACK_OPENBLAS_NT_ALREADY_SET #include "fflas-ffpack/fflas-ffpack-config.h" #include #include "fflas-ffpack/config-blas.h" #include "fflas-ffpack/fflas/fflas.h" #include #include "fflas-ffpack/utils/timer.h" #include "fflas-ffpack/utils/args-parser.h" #include "fflas-ffpack/ffpack/ffpack.h" using namespace std; typedef Givaro::ModularBalanced Field; // random generator function: ptrdiff_t myrandom (ptrdiff_t i) { return rand()%i;} // pointer object to it: ptrdiff_t (*p_myrandom)(ptrdiff_t) = myrandom; typename Field::Element* construct_U(const Field& F, Field::RandIter& G, size_t n, size_t r, std::vector& P, size_t commonseed, size_t seed, int nt) { size_t lda = n; Field::Element *U=new Field::Element[n*n]; FFLAS::ParSeqHelper::Parallel H(nt); std::vector E(r); PARFOR1D(i,r,H, E[i]=i; ); srand48(commonseed); std::vector Z(n); PARFOR1D(i,n,H, Z[i]=i;); P.resize(r); for(size_t i=0; i& P, size_t seed, int nt) { FFLAS::ParSeqHelper::Parallel H(nt); size_t lda = m; size_t taille=m*m; Field::Element * L= new Field::Element[taille]; PARFOR1D(i,taille,H, { F.init(L[i]); F.assign(L[i],F.zero); } ); std::vector E(r); PARFOR1D(i,r,H, E[i]=i;); srand48(seed); std::vector Z(m); PARFOR1D(i,m,H, Z[i]=i; ); std::vector Q(r); for(size_t i=0; i pWH(nt); PAR_BLOCK{ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,r, alpha, L,r, U, lda,beta,A,lda,pWH); } return L; } void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A, size_t * P, size_t * Q, size_t m, size_t n, size_t R, int nt) { FFLAS::ParSeqHelper::Parallel H(nt); Field::Element * X = FFLAS::fflas_new(m*n); Field::Element * L, *U; L = FFLAS::fflas_new(m*R); U = FFLAS::fflas_new(R*n); PARFOR1D (i,m*R,H, { F.init(L[i]); F.assign(L[i], 0.0); } ); PARFOR1D (i,n*R,H, { F.init(U[i]); F.assign(U[i], 0.0); } ); PARFOR1D (i,m*n,H, { F.init(X[i]); F.assign(X[i], 0.0); } ); PARFOR1D (i,R,H, for (size_t j=0; j pWH (nt); #pragma omp task shared(F, L, U, X) FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R, F.one, L,R, U,n, F.zero, X,n, pWH); } bool fail = false; // PARFOR1D (size_t i=0; i(&seed1), sizeof(seed1)); f.read(reinterpret_cast(&seed2), sizeof(seed2)); f.read(reinterpret_cast(&seed3), sizeof(seed3)); f.read(reinterpret_cast(&seed4), sizeof(seed4)); std::vector Index_P(r); Field::RandIter GG(F, seed1); PAR_BLOCK{ FFLAS::pfrand(F,GG,m,n,A,m/NBK); } // std::cout<<"Construct U"<(maxP); size_t *Q = FFLAS::fflas_new(maxQ); FFLAS::ParSeqHelper::Parallel H(nt); PARFOR1D(i,(size_t)m,H, for (size_t j=0; j<(size_t)n; ++j) Acop[i*n+j]= (*(A+i*n+j)); ); for (size_t i=0;i<=iter;++i){ PARFOR1D(j,maxP,H, P[j]=0; ); PARFOR1D(j,maxQ,H, Q[j]=0; ); PARFOR1D(k,(size_t)m,H, for (size_t j=0; j<(size_t)n; ++j) *(A+k*n+j) = *(Acop+k*n+j) ; ); //Additional tests for templated helper FFLAS::ParSeqHelper::Parallel parH(nt); chrono.clear(); if (i) chrono.start(); // Added by AB 2014-12-15 //#ifdef __FFLASFFPACK_USE_OPENMP PAR_BLOCK{ r = FFPACK::RowEchelonForm(F,m,n,A,n,P,Q,transform,LuTag,parH); } if (i) {chrono.stop(); time+=chrono.realtime();} } // ----------- // Standard output for benchmark - Alexis Breust 2014/11/14 #define CUBE(x) ((x)*(x)*(x)) double gflop = 2.0/3.0*CUBE(double(r)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0 - double(r)/1000.0*double(r)/1000.0*(m+n)/1000; if (transform) gflop += CUBE(double(r)/1000.0)/3.0 + double(r)/1000.0*double(r)/1000.0*double(n-r)/1000.0; std::cout << "Time: " << time / double(iter) << " Gfops: " << gflop / time * double(iter-1); FFLAS::writeCommandString(std::cout, as) << std::endl; //verification if(v) verification_PLUQ(F,Acop,A,P,Q,m,n,R,nt); FFLAS::fflas_delete( A); FFLAS::fflas_delete( Acop); return 0; } /* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s