https://github.com/linbox-team/fflas-ffpack
Raw File
Tip revision: a7801a65e9972b71558322e43812f5a7e08bbb4d authored by Clement Pernet on 14 November 2017, 16:52:10 UTC
fix parallel transpose
Tip revision: a7801a6
testeur_lqup.C
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
//--------------------------------------------------------------------------
//                        Test for the  lqup decomposition
//
//--------------------------------------------------------------------------
// Clement Pernet
//-------------------------------------------------------------------------

/*
 * Copyright (C) FFLAS-FFPACK
 * Written by Clément Pernet
 * This file is Free Software and part of FFLAS-FFPACK.
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */

#include <iostream>
#include <iomanip>
using namespace std;
//#include "fflas-ffpack/field/modular-int.h"
//#include "fflas-ffpack/field/modular-positive.h"
#include "givaro/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/fflas_io.h"
#include "fflas-ffpack/ffpack/ffpack.h"
#include "givaro/givintprime.h"


using namespace FFPACK;

//typedef Givaro::Modular<double> Field;
typedef ModularBalanced<double> Field;
//typedef Givaro::Modular<float> Field;
//typedef ModularBalanced<float> Field;
//typedef Givaro::Modular<int> Field;
//typedef GivaroZpz<int32_t> Field;
//typedef GivaroGfq Field;

int main(int argc, char** argv){
 FFLAS::Timer tim;
	Givaro::IntPrimeDom IPD;
	uint64_t p;
	size_t M, N ;
	bool keepon = true;
	Givaro::Integer _p,tmp;
	Field::Element zero,one;
	cerr<<setprecision(10);
	size_t TMAX = 100;
	size_t PRIMESIZE = 23;

	if (argc > 1 )
		TMAX = atoi(argv[1]);
	if (argc > 2 )
		PRIMESIZE = atoi(argv[2]);

	FFLAS::FFLAS_TRANSPOSE ta;
	FFLAS::FFLAS_DIAG diag;
	size_t lda;

	Field::Element * A, *Abis, *X,* U, *L;
	size_t *P, *Q;
	while (keepon){
		srandom(_p);
		do{
			//		max = Integer::random(2);
			_p = random();//max % (2<<30);
			IPD.prevprime( tmp, (_p% (1<<PRIMESIZE)) );
			p =  tmp;

		}while( (p <= 2) );

		Field F( p);
		F.init(zero,0.0);
		F.init(one,1.0);
		Field::RandIter RValue( F );

		do{
			M = (size_t)  random() % TMAX;
			N = (size_t)  random() % TMAX;
		} while ((M == 0) || (N == 0));
		lda = N;
		if (random()%2)
			diag = FFLAS::FflasUnit;
		else
			diag = FFLAS::FflasNonUnit;


		if (random()%2){
			ta = FFLAS::FflasTrans;
			L = FFLAS::fflas_new<Field::Element>(M*N);
			U = FFLAS::fflas_new<Field::Element>(N*N);
			P = FFLAS::fflas_new<size_t>(M);
			Q = FFLAS::fflas_new<size_t>(N);
			for (size_t i=0; i<M; ++i) P[i] = 0;
			for (size_t i=0; i<N; ++i) Q[i] = 0;
		}
		else{
			ta = FFLAS::FflasNoTrans;
			L = FFLAS::fflas_new<Field::Element>(M*M);
			U = FFLAS::fflas_new<Field::Element>(M*N);
			P = FFLAS::fflas_new<size_t>(N);
			Q = FFLAS::fflas_new<size_t>(M);
			for (size_t i=0; i<N; ++i) P[i] = 0;
			for (size_t i=0; i<M; ++i) Q[i] = 0;
		}

		size_t R=0;
		Field::Element * G = FFLAS::fflas_new<Field::Element>(M*M);
		Field::Element * H = FFLAS::fflas_new<Field::Element>(M*N);
		size_t t;
		do{
			t = (size_t) random() % 10;
		} while ((!t)||(t==1));
		for (size_t i=0; i<M; ++i)
			if (!(random() % t))
				for (size_t j=0; j < M; ++j)
					RValue.random (*(G+i*M+j));
			else
				for (size_t j=0; j < M; ++j)
					F.assign(*(G+i*M+j), zero);



		for (size_t j=0; j < N; ++j)
			if (!(random() % t))
				for (size_t i=0; i<M; ++i)
					RValue.random (*(H+i*N+j));
			else
				for (size_t i=0; i<M; ++i)
					F.assign(*(H+i*N+j), zero);

// 		FFLAS::WriteMatrix (cerr<<"G = "<<endl,F,M,M,G,M);
// 		FFLAS::WriteMatrix (cerr<<"H = "<<endl,F,M,N,H,N);
		A = FFLAS::fflas_new<Field::Element>(M*N);
		FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, N, M, one, G, M, H, N, zero, A, N);
		FFLAS::fflas_delete( G);
		FFLAS::fflas_delete( H);

		Abis = FFLAS::fflas_new<Field::Element>(M*N);
		for (size_t i=0; i<M*N; ++i)
			*(Abis+i) = *(A+i);

		X = FFLAS::fflas_new<Field::Element>(M*N);


		cout <<"p = "<<(size_t)p<<" M = "<<M
		     <<" N = "<<N
		     <<((diag==FFLAS::FflasUnit)?" Unit ":" Non Unit ")
		     <<((ta==FFLAS::FflasNoTrans)?"LQUP ( A ) ":"LQUP ( A^T ) ")
		     <<"....";


		tim.clear();
		tim.start();
		R = FFPACK::LUdivine (F, diag, ta, M, N, A, lda, P, Q);
		tim.stop();


		//FFLAS::WriteMatrix (cerr<<"Result = "<<endl,F,M,N,Abis,lda);

		if (ta == FFLAS::FflasNoTrans){

			for (size_t i=0; i<R; ++i){
				for (size_t j=0; j<i; ++j)
					F.assign ( *(U + i*N + j), zero);
				for (size_t j=i+1; j<N; ++j)
					F.assign (*(U + i*N + j), *(A+ i*N+j));
			}
			for (size_t i=R;i<M; ++i)
				for (size_t j=0; j<N; ++j)
					F.assign(*(U+i*N+j), zero);
			for ( size_t i=0; i<M; ++i ){
				size_t j=0;
				for (; j< ((i<R)?i:R) ; ++j )
					F.assign( *(L + i*M+j), *(A+i*N+j));
				for (; j<M; ++j )
					F.assign( *(L+i*M+j), zero);
			}

			//FFLAS::WriteMatrix (cerr<<"L = "<<endl,F,M,M,U,M);
			//FFLAS::WriteMatrix (cerr<<"U = "<<endl,F,M,N,U,N);
			FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
					M,0,(int) R, L, M, Q);
			for ( size_t  i=0; i<M; ++i )
				F.assign(*(L+i*(M+1)), one);

			if (diag == FFLAS::FflasNonUnit)
				for ( size_t  i=0; i<R; ++i )
					F.assign (*(U+i*(N+1)), *(A+i*(lda+1)));

			else{
				for (size_t i=0; i<R; ++i ){
					*(L+Q[i]*(M+1)) = *(A+Q[i]*lda+i);
					F.assign (*(U+i*(N+1)),one);
				}
			}

			FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
					M,0,(int) R, U, N, P);
			FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
					N,0,(int) R, U, N, Q);
			FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M,N,M, 1.0, L,M, U,N, 0.0, X,N);
			//FFLAS::fflas_delete( A);
		} else {

			for (size_t i=0; i<R; ++i){
				for (size_t j=0; j<i; ++j)
					F.assign ( *(L + i + j*N), zero);
				for (size_t j=i+1; j<M; ++j)
					F.assign (*(L + i + j*N), *(A+ i+j*N));
			}

			for (size_t i=R;i<N; ++i)
				for (size_t j=0; j<M; ++j)
					F.assign(*(L+i+j*N), zero);
			for ( size_t i=0; i<N; ++i ){
				size_t j=0;
				for (;  j< ((i<R)?i:R) ; ++j )
					F.assign( *(U + i+j*N), *(A+i+j*N));
				for (; j<N; ++j )
					F.assign( *(U+i+j*N), zero);
			}

			FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
					N,0,(int) R, U, N, Q);
			for (size_t i=0; i<N; ++i)
				F.assign (*(U+i*(N+1)),one);
			if (diag == FFLAS::FflasNonUnit)
				for ( size_t i=0; i<R; ++i )
					F.assign (*(L+i*(N+1)), *(A+i*(lda+1)));
			else{
				for ( size_t i=0; i<R; ++i ){
					*(U+Q[i]*(N+1)) = *(A+Q[i]+i*N);
					F.assign (*(L+i*(N+1)),one);
				}
			}
			// FFLAS::WriteMatrix (cerr<<"L = "<<endl,F,M,N,L,N);
// 			FFLAS::WriteMatrix (cerr<<"U = "<<endl,F,N,N,U,N);

			FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
					N,0,(int) R, L, N, P);
			FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
					M,0,(int) R, L, N, Q);
			FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M,N,N, 1.0, L,N, U,N, 0.0, X,N);
		}
		for (size_t i=0; i<M; ++i)
			for (size_t j=0; j<N; ++j)
				if (!F.areEqual (*(Abis+i*N+j), *(X+i*N+j))){
					cerr<<"error for i,j="<<i<<" "<<j<<" "<<*(Abis+i*N+j)<<" "<<*(X+i*N+j)<<endl;
					keepon = false;
				}

		//FFLAS::WriteMatrix (cerr<<"X = "<<endl,F,m,n,X,n);
		//FFLAS::WriteMatrix (cerr<<"B = "<<endl,F,m,n,B,n);

		if (keepon){
			cout<<"R = "<<R
			    <<" Passed "
			    <<(double(M*M)/1000.0*(double(N)-double(M)/3.0)/tim.usertime()/1000.0)<<"Mfops"<<endl;
			FFLAS::fflas_delete( A);
			FFLAS::fflas_delete( L);
			FFLAS::fflas_delete( U);
			FFLAS::fflas_delete( Abis);
			FFLAS::fflas_delete( X);
			FFLAS::fflas_delete( P);
			FFLAS::fflas_delete( Q);
		}
		else{
			cerr<<"Abis = "<<endl;
			FFLAS::WriteMatrix (cerr, F, M, N, Abis, N);
			cerr<<"X = "<<endl;
			FFLAS::WriteMatrix (cerr, F, M, N, X, N);
		}
	}
	cout<<endl;
	cerr<<"FAILED with p = "<<(size_t)p<<" M = "<<M<<" N = "<<N
	    <<" trans = "<<ta<<" diag = "<<diag<<endl;

	cerr<<"A:"<<endl;
	cerr<<M<<" "<<N<<" M"<<endl;
	for (size_t i=0; i<M; ++i)
		for (size_t j=0; j<N; ++j)
			if (*(Abis+i*lda+j))
				cerr<<i+1<<" "<<j+1<<" "<<((int) *(Abis+i*lda+j) )<<endl;
	cerr<<"0 0 0"<<endl<<endl;

	FFLAS::fflas_delete( A);
	FFLAS::fflas_delete( Abis);
	FFLAS::fflas_delete( L);
	FFLAS::fflas_delete( U);
	FFLAS::fflas_delete( X);
	FFLAS::fflas_delete( P);
	FFLAS::fflas_delete( Q);
}














back to top