https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
benchmark-fgemm-rns.C
/* Copyright (c) FFLAS-FFPACK
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 */


// declare that the call to openblas_set_numthread will be made here, hence don't do it
// everywhere in the call stack
#define __FFLASFFPACK_OPENBLAS_NT_ALREADY_SET 1

#include "fflas-ffpack/fflas/fflas.h"

#include <iostream>

#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/args-parser.h"

#ifdef __FFLASFFPACK_USE_KAAPI
#include "libkomp.h"
#endif

using namespace FFLAS;

typedef FFPACK::rns_double RNS;
typedef FFPACK::RNSInteger<RNS> Field;
typedef Field::Element_ptr Element_ptr;
typedef Field::ConstElement_ptr ConstElement_ptr;

typedef StrategyParameter::Threads THREADS;
typedef StrategyParameter::Grain GRAIN;
typedef StrategyParameter::TwoD TWOD;
typedef StrategyParameter::TwoDAdaptive TWODA;
typedef StrategyParameter::ThreeD THREED;
typedef StrategyParameter::ThreeDAdaptive THREEDA;
typedef StrategyParameter::ThreeDInPlace THREEDIP;

typedef ParSeqHelper::Sequential PSeq;

template<typename ModParSeqTrait, typename FgemmParSeqTrait>
static inline void
bench_fgemm (const Field ZZ, const size_t m, const size_t n, const size_t k,
             ConstElement_ptr A, ConstElement_ptr B, Element_ptr C,
             int moduli_th, int fgemm_th, int nbw)
{
    PAR_BLOCK
    {
        typedef ParSeqHelper::Compose<ModParSeqTrait, FgemmParSeqTrait> RNSParSeqHelper;
        RNSParSeqHelper RNSParSeq (moduli_th, fgemm_th);

        typedef MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag, RNSParSeqHelper> RNSHelper;
        RNSHelper WH (ZZ, nbw, RNSParSeq);

        fgemm (ZZ, FflasNoTrans, FflasNoTrans, m, n, k, ZZ.one, A, k, B, n, ZZ.zero,
               C, n, WH);
    }
}

template<typename T>
static inline void
bench_do_it (const Field ZZ, const size_t m, const size_t n, const size_t k,
             ConstElement_ptr A, ConstElement_ptr B, Element_ptr C,
             int moduli_th, int p, int fgemm_th, int nbw)
{
    if (p == 0)
        bench_fgemm<T, PSeq> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    else if (p == 1)
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Block, THREADS> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
    else if (p == 2)
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Recursive, TWOD> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
    else if (p == 3)
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Recursive, TWODA> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
    else if (p == 4)
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Recursive, THREEDIP> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
    else if (p == 5)
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Recursive, THREED> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
    else /* p == 6 */
    {
        typedef ParSeqHelper::Parallel<CuttingStrategy::Recursive, THREEDA> PPar;
        bench_fgemm<T, PPar> (ZZ, m, n, k, A, B, C, moduli_th, fgemm_th, nbw);
    }
}

int
main(int argc, char *argv[])
{
 
#ifdef __FFLASFFPACK_OPENBLAS_NUM_THREADS
    openblas_set_num_threads(__FFLASFFPACK_OPENBLAS_NUM_THREADS);
#endif

    size_t pbits = 20;
    size_t r = 8;
    size_t m = 1000;
    size_t k = 1000;
    size_t n = 1000;
    int nbw = -1;
    size_t iter = 3;
    int fgemm_th = MAX_THREADS;
    int p = 0;
    int moduli_th = 1;
    int q = 0;

    Argument as[] = {
        { 'r', "-r R", "Number of RNS moduli", TYPE_INT , &r},
        { 'b', "-b B", "Number of bits of the moduli (in [10, 26])", TYPE_INT,
            &pbits},
        { 'm', "-m M", "Row dimension of A", TYPE_INT, &m},
        { 'k', "-k K", "Col dimension of A", TYPE_INT, &k},
        { 'n', "-n N", "Col dimension of B", TYPE_INT, &n},
        { 'w', "-w N", "Number of Winograd levels (-1 means computed by the lib)",
            TYPE_INT, &nbw},
        { 'i', "-i R", "Number of repetitions", TYPE_INT, &iter},
        { 'p', "-p P", "0 for sequential, 1 for 2D iterative, 2 for 2D rec, "
            "3 for 2D rec adaptive, 4 for 3D rc in-place, "
            "5 for 3D rec, 6 for 3D rec adaptive.", TYPE_INT , &p },
        { 't', "-t T", "Number of threads for the fgemm computations.", TYPE_INT,
            &fgemm_th},
        { 'q', "-q Q", "Strategy parameter for the moduli: 0 for sequential, "
            "1 for threads", TYPE_INT , &q},
        { 'u', "-u U", "Number of threads for handling the moduli "
            "(depends on the value of q) .", TYPE_INT, &moduli_th},
        END_OF_ARGUMENTS
    };

    parseArguments (argc, argv, as);
    std::ostringstream oss;
    /* Check some command line parameters */
    if (fgemm_th <= 0 || moduli_th <= 0)
    {
        std::cerr << "Error, the number of threads must be positive" << std::endl;
        return 1;
    }
    if (pbits < 10 || pbits > 26)
    {
        std::cerr << "Error, the number of bits of the moduli must be in [10, 26]"
        << std::endl;
        return 1;
    }

    /* Some warnings */
    if (q == 0 && moduli_th > 1)
    {
        oss << "Warning, -u is set to 1 because -q is 0 (sequential)"
        << std::endl;
        moduli_th = 1;
    }
    if (p == 0 && fgemm_th > 1)
    {
        oss << "Warning, -t is set to 1 because -p is 0 (sequential)"
        << std::endl;
        fgemm_th = 1;
    }

    RNS rns (pbits, r);
    Field ZZ(rns);

    Timer chrono, TimFreivalds;
    double time=0.0, timev=0.0;

    Element_ptr A, B, C;

    Field::RandIter G(ZZ);

    A = fflas_new (ZZ, m, k, Alignment::CACHE_PAGESIZE);
    frand (ZZ, G, m*k, A, 0);
    B = fflas_new (ZZ, k, n, Alignment::CACHE_PAGESIZE);
    frand (ZZ, G, k*n, B, 0);
    C = fflas_new (ZZ, m, n, Alignment::CACHE_PAGESIZE);
    fzero (ZZ, m*n, C, 0);

    for (size_t i=0; i<=iter; ++i)
    {
        chrono.clear();
        if (i)
            chrono.start();

        if (q == 0) /* moduli are done sequentially */
            bench_do_it<PSeq> (ZZ, m, n, k, A, B, C, moduli_th, p, fgemm_th, nbw);
        else /* (q == 1) */
        {
            typedef ParSeqHelper::Parallel<CuttingStrategy::RNSModulus, THREADS> PPar;
            bench_do_it<PPar> (ZZ, m, n, k, A, B, C, moduli_th, p, fgemm_th, nbw);
        }

        if (i)
        {
            chrono.stop();
            time+=chrono.realtime();
        }

        TimFreivalds.clear();
        TimFreivalds.start();

        bool pass = freivalds (ZZ, FflasNoTrans, FflasNoTrans, m, n, k, ZZ.one, A, k, B, n, C,n);
        TimFreivalds.stop();
        timev += TimFreivalds.usertime();
        if (!pass)
            std::cout << "FAILED" << std::endl;
    }

    fflas_delete (A);
    fflas_delete (B);
    fflas_delete (C);

    // -----------
    // Standard output for benchmark
    double time_per_iter = time / double(iter);
    double Gflops = double(iter) * double (r) * (2. * double(m)/1000. * double(n)/1000. * double(k)/1000.0) / time;
    std::cout << "Time: " << time_per_iter << " Gflops: " << Gflops;
    writeCommandString (std::cout, as) << std::endl;
#if __FFLASFFPACK_DEBUG
    std::cout << "Freivalds vtime: " << timev / (double)iter << std::endl;
#endif
    std::cerr<<oss.str();
    return 0;
}

/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top