https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
test-igemm.C
/*
 * Copyright (C) the FFLAS-FFPACK group
 * Written by Clément Pernet <clement.pernet@imag.fr>
 * This file is Free Software and part of FFLAS-FFPACK.
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */

//#define SIMD_INT

#include "fflas-ffpack/fflas-ffpack.h"
#include "fflas-ffpack/fflas/fflas_igemm/igemm.h"
#include "fflas-ffpack/utils/fflas_io.h"
#include "fflas-ffpack/utils/args-parser.h"
#include "fflas-ffpack/utils/timer.h"

#include <givaro/udl.h>

// COL_MAJOR true not supported in test. To be updated.
#define COL_MAJOR false
#define LEAD_GEN true
#define DISPLAY false
#define TRUST_FGEMM false

using namespace FFLAS;

int test_igemm(size_t m, size_t n, size_t k, enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB, int a_scal, int b_scal, bool timing)
{


    FFLAS::Timer tim;

    srand((unsigned int)time(NULL));
    typedef Givaro::Modular<Givaro::Integer> IField ;
    IField Z(1_ui64<<63);

    size_t ra = (tA==FflasNoTrans) ? m : k ;
    size_t ca = (tA==FflasNoTrans) ? k : m ;
    size_t rb = (tB==FflasNoTrans) ? k : n;
    size_t cb = (tB==FflasNoTrans) ? n : k;

    size_t lda = ca ;
    size_t ldb = cb ; // n
    size_t ldc = n  ; // n

#if COL_MAJOR
    size_t ldA = m;//+rand() % 3 ; // m
    size_t ldB = k;//+rand() % 3 ; // k
    size_t ldC = m;//+rand() % 3 ; // m
#else
    size_t ldA = ca ; // k
    size_t ldB = cb ; // n
    size_t ldC = n  ; // n
#endif

#if LEAD_GEN
    lda += rand() % 5;
    ldb += rand() % 5;
    ldc += rand() % 5;
    ldA += rand() % 5;
    ldB += rand() % 5;
    ldC += rand() % 5;
#endif


    int seed=0;
    typename IField::RandIter Rand(Z,seed);
    // typename IField::RandIter Rand(Z,seed);

    IField::Element_ptr A,B,C,D;
    C= FFLAS::fflas_new(Z,m,ldc);
    D= FFLAS::fflas_new(Z,m,n);
    A= FFLAS::fflas_new(Z,ra,lda);
    B= FFLAS::fflas_new(Z,rb,ldb) ;

    for (size_t i=0;i<ra;++i)
        for (size_t j=0;j<ca;++j)
            // Rand.random(A[i*lda+j]);
            A[i*lda+j] = rand() % 10;
    for (size_t i=0;i<rb;++i)
        for (size_t j=0;j<cb;++j)
            // Rand.random(B[i*ldb+j]);
            B[i*ldb+j] = rand() % 10;
    for (size_t i=0;i<m;++i)
        for (size_t j=0;j<n;++j)
            // Rand.random(C[i*ldc+j]);
            D[i*n+j]=C[i*ldc+j] = 0 ; //rand() % 10;

#if DISPLAY
    FFLAS::WriteMatrix(std::cout << "A:=", Z, ra, ca, A, lda, FflasMaple) <<';' <<std::endl;
    // FFLAS::WriteMatrix(std::cout << "A:=", Z, ra, ca, A, lda, FflasMaple,(tA==FflasTrans))  <<';'<<std::endl;
    FFLAS::WriteMatrix(std::cout << "B:=", Z, rb, cb, B, ldb,FflasMaple,false) <<';' <<std::endl;
    // FFLAS::WriteMatrix(std::cout << "B:=", Z, rb, cb, B, ldb,FflasMaple,(tB==FflasTrans)) <<';' <<std::endl;
#endif



    IField::Element alpha,beta ;
    alpha = (IField::Element) a_scal ;
    beta = (IField::Element) b_scal ;

    tim.clear(); tim.start();
#if TRUST_FGEMM
    FFLAS::fgemm(Z,(FFLAS::FFLAS_TRANSPOSE)tA,(FFLAS::FFLAS_TRANSPOSE)tB,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
#else
    if (!timing) {
        IField::Element_ptr ail,blj;
        IField::Element tmp;
        for (size_t i = 0; i < m; ++i)
            for (size_t j = 0; j < n; ++j){
                Z.mulin(*(C+i*ldc+j),beta);
                Z.assign (tmp, Z.zero);
                for ( size_t l = 0; l < k ; ++l ){
                    if ( tA == FflasNoTrans )
                        ail = A+i*lda+l;
                    else
                        ail = A+l*lda+i;
                    if ( tB == FflasNoTrans )
                        blj = B+l*ldb+j;
                    else
                        blj = B+j*ldb+l;
                    Z.axpyin (tmp, *ail, *blj);
                }
                Z.axpyin (*(C+i*ldc+j), alpha, tmp);
            }
    }
#endif
    tim.stop();
    if (timing) std::cout << "zgemm time: " << tim << std::endl;


#if DISPLAY
    FFLAS::WriteMatrix(std::cout << "C:=", Z, m, n, C, ldc,FflasMaple,false) <<';' <<std::endl;
    std::cout << ((tA == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(A).";
    std::cout << ((tB == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(B);" << std::endl;;
#endif

    std::cout << "---------------------------------------------" << std::endl;


    typedef Givaro::ZRing<int64_t> FField ;
    FField F ;

    FField::Element_ptr Ci,Ai,Bi;
#if COL_MAJOR
    Ci= FFLAS::fflas_new(F,ldC,n);
    Ai= FFLAS::fflas_new(F,ldA,k);
    Bi= FFLAS::fflas_new(F,ldB,n);

    for (size_t i=0;i<m;++i) {
        for (size_t j=0;j<k;++j) {
            F.init(Ai[j*ldA+i], A[i*lda+j]);
        }
    }
    for (size_t i=0;i<k;++i) {
        for (size_t j=0;j<n;++j) {
            F.init(Bi[j*ldB+i], B[i*ldb+j]);
        }
    }
    for (size_t i=0;i<m;++i) {
        for (size_t j=0;j<n;++j) {
            F.init(Ci[j*ldC+i], D[i*n+j]);
        }
    }
#else
    Ci= FFLAS::fflas_new(F,m,ldC);
    Ai= FFLAS::fflas_new(F,ra,ldA);
    Bi= FFLAS::fflas_new(F,rb,ldB);

    for (size_t i=0;i<ra;++i) {
        for (size_t j=0;j<ca;++j) {
            F.init(Ai[i*ldA+j], A[i*lda+j]);
        }
    }
    for (size_t i=0;i<rb;++i) {
        for (size_t j=0;j<cb;++j) {
            F.init(Bi[i*ldB+j], B[i*ldb+j]);
        }
    }
    for (size_t i=0;i<m;++i) {
        for (size_t j=0;j<n;++j) {
            F.init(Ci[i*ldC+j], D[i*n+j]);
        }
    }

#endif

#if DISPLAY
    FFLAS::WriteMatrix(std::cout << "A:=", F, ra, ca, Ai, ldA,FflasMaple,COL_MAJOR)  <<';'<<std::endl;
    // FFLAS::WriteMatrix(std::cout << "A:=", F, (int)ra, (int)ca, Ai,(int)ldA,FflasMaple,(tA==FflasTrans))  <<';'<<std::endl;
    FFLAS::WriteMatrix(std::cout << "B:=", F, rb, cb, Bi, ldB,FflasMaple,COL_MAJOR) <<';' <<std::endl;
    // FFLAS::WriteMatrix(std::cout << "B:=", F, rb, cb, Bi, ldB,FflasMaple,(tB==FflasTrans)) <<';' <<std::endl;
#endif

    FField::Element a,b ;
    a= (FField::Element) a_scal;
    b= (FField::Element) b_scal;
#if 0
    FFLAS::igemm_(FflasColMajor, tA, tB, m, n, k, a, Ai, ldA, Bi, ldB, b, Ci, ldC);
#else
    tim.clear(); tim.start();
    FFLAS::igemm_(FflasRowMajor,tA,tB, (int)m, (int)n, (int)k, a, Ai, (int)ldA, Bi, (int)ldB, b, Ci, (int)ldC);
    tim.stop();
    if (timing) std::cout << "igemm time: " << tim << std::endl;
#endif


#if DISPLAY
    FFLAS::WriteMatrix(std::cout << "C:=", F, m, n, Ci, ldC,FflasMaple,COL_MAJOR) <<';' <<std::endl;
    std::cout << ((tA == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(A).";
    std::cout << ((tB == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(B);" << std::endl;;
#endif

    bool pass = true ;
    if (!timing) {
#if DISPLAY
        for (size_t i = 0 ; i < m ; ++i) {
            for (size_t j = 0 ; j < n  ; ++j) {
                if (Ci[i*ldC+j] != (typename IField::Element) C[i*ldc+j]) {
                    pass = false;
                    std::cout << 'x' ;
                }
                else
                    std::cout << 'o' ;
            }
            std::cout << std::endl;
        }
#else
        for (size_t i = 0 ; i < m && pass; ++i) {
            for (size_t j = 0 ; j < n && pass ; ++j) {
                if (Ci[i*ldC+j] != (typename IField::Element) C[i*ldc+j]) {
                    pass = false;
                }
            }
        }
#endif
    }

    if (!pass) {
        std::cout << "***       *** " << std::endl;
        std::cout << "*** error *** " << std::endl;
        std::cout << "***       *** " << std::endl;
    }
    else {
        std::cout << "+++ pass  +++" << std::endl;
    }

    if  (timing) {
        FFLAS::Timer tom;
        // Givaro::Modular<double> G(65537);
        Givaro::ZRing<double> G;
        double af, bf ;
        G.init(af, alpha);
        G.init(bf, beta);

        double *Cf,*Af,*Bf;
        Cf= FFLAS::fflas_new(G,m,ldC);
        Af= FFLAS::fflas_new(G,ra,ldA);
        Bf= FFLAS::fflas_new(G,rb,ldB);

        for (size_t i=0;i<ra;++i) {
            for (size_t j=0;j<ca;++j) {
                G.init(Af[i*ldA+j], A[i*lda+j]);
            }
        }
        for (size_t i=0;i<rb;++i) {
            for (size_t j=0;j<cb;++j) {
                G.init(Bf[i*ldB+j], B[i*ldb+j]);
            }
        }
        for (size_t i=0;i<m;++i) {
            for (size_t j=0;j<n;++j) {
                G.init(Cf[i*ldC+j], D[i*n+j]);
            }
        }


        tom.clear(); tom.start();
        FFLAS::fgemm(G,(FFLAS::FFLAS_TRANSPOSE)tA,(FFLAS::FFLAS_TRANSPOSE)tB,m,n,k,af,Af,ldA,Bf,ldB,bf,Cf,ldC);
        tom.stop();
        std::cout << "fgemm time: " << tom << std::endl;

        FFLAS::fflas_delete(Af);
        FFLAS::fflas_delete(Bf);
        FFLAS::fflas_delete(Cf);

    }

    FFLAS::fflas_delete(A);
    FFLAS::fflas_delete(B);
    FFLAS::fflas_delete(C);

    FFLAS::fflas_delete(Ai);
    FFLAS::fflas_delete(Bi);
    FFLAS::fflas_delete(Ci);

    FFLAS::fflas_delete(D);

    return pass;
}

int main(int argc, char** argv)
{

    static size_t m = 9 ;
    static size_t n = 10 ;
    static size_t k = 11 ;
    static bool timing = false ;

    static Argument as[] = {
        { 'm', "-m m", "m.",      TYPE_INT , &m },
        { 'n', "-n n", "n.",      TYPE_INT , &n },
        { 'k', "-k k", "k.",      TYPE_INT , &k },
        { 't', "-timing", "Output timing"            , TYPE_NONE, &timing},
        END_OF_ARGUMENTS
    };

    FFLAS::parseArguments(argc,argv,as);


    for (int i = -1 ; i < 2 ; ++i) {
        for (int j = -1 ; j < 2 ; ++j) {
            std::cout << "===================================================" << std::endl;
            std::cout << " C = " << i << " A B + " << j << "C" << std::endl;
            test_igemm(m,n,k,FflasNoTrans, FflasNoTrans,i,j,timing);
            std::cout << " C = " << i << " A tB + " << j << "C" << std::endl;
            test_igemm(m,n,k,FflasTrans, FflasNoTrans,i,j,timing);
            std::cout << " C = " << i << " tA B + " << j << "C" << std::endl;
            test_igemm(m,n,k,FflasNoTrans, FflasTrans,i,j,timing);
            std::cout << " C = " << i << " tA tB + " << j << "C" << std::endl;
            test_igemm(m,n,k,FflasTrans, FflasTrans,i,j,timing);
        }
    }
    for (size_t a = 0 ; a < 4 ; ++a) {
        int i = rand() % 25 ;
        int j = rand() % 25 ;
        if (rand()%2) i = -i ;
        if (rand()%2) j = -j ;
        std::cout << " C = " << i << " A B + " << j << "C" << std::endl;
        test_igemm(m,n,k,FflasNoTrans, FflasNoTrans,i,j,timing);
        std::cout << " C = " << i << " A tB + " << j << "C" << std::endl;
        test_igemm(m,n,k,FflasTrans, FflasNoTrans,i,j,timing);
        std::cout << " C = " << i << " tA B + " << j << "C" << std::endl;
        test_igemm(m,n,k,FflasNoTrans, FflasTrans,i,j,timing);
        std::cout << " C = " << i << " tA tB + " << j << "C" << std::endl;
        test_igemm(m,n,k,FflasTrans, FflasTrans,i,j,timing);

    }


    return 0;
}

/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top