https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
fflas_pftrsm.inl
/* fflas/fflas_pftrsm.inl
 * Copyright (C) 2013 Ziad Sultan
 *
 * Written by Ziad Sultan  < Ziad.Sultan@imag.fr >
 * Time-stamp: <18 Dec 15 16:09:24 Jean-Guillaume.Dumas@imag.fr>
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */


#ifndef __FFLASFFPACK_fflas_pftrsm_INL
#define __FFLASFFPACK_fflas_pftrsm_INL

#define PTRSM_HYBRID_THRESHOLD 256

#include "fflas-ffpack/paladin/parallel.h"

namespace FFLAS {

    template<class Field, class Cut, class Param>
    inline typename Field::Element_ptr
    ftrsm( const Field& F,
           const FFLAS::FFLAS_SIDE Side,
           const FFLAS::FFLAS_UPLO UpLo,
           const FFLAS::FFLAS_TRANSPOSE TA,
           const FFLAS::FFLAS_DIAG Diag,
           const size_t m,
           const size_t n,
           const typename Field::Element alpha,
#ifdef __FFLAS__TRSM_READONLY
           typename Field::ConstElement_ptr
#else
           typename Field::Element_ptr
#endif
           A, const size_t lda,
           typename Field::Element_ptr B, const size_t ldb,
           TRSMHelper <StructureHelper::Iterative, ParSeqHelper::Parallel<Cut,Param> > & H)
    // const FFLAS::CuttingStrategy method,
    // const size_t numThreads)
    {
        typedef TRSMHelper<StructureHelper::Recursive,ParSeqHelper::Sequential> seqRecHelper;
        SYNCH_GROUP(
                    seqRecHelper SeqH(H);
                    if(Side == FflasRight){
                    FORBLOCK1D(iter, m, H.parseq,
                               TASK(MODE(READ(A[0]) CONSTREFERENCE(F, A, B, SeqH,H) READWRITE(B[iter.begin()*ldb])), ftrsm( F, Side, UpLo, TA, Diag, iter.end()-iter.begin(), n, alpha, A, lda, B + iter.begin()*ldb, ldb, SeqH));
                              );
                    } else {
                    FORBLOCK1D(iter, n, H.parseq,
                               //							  seqRecHelper SeqH(H);
                               TASK(MODE(READ(A[0]) CONSTREFERENCE(F, A, B, SeqH,H) READWRITE(B[iter.begin()])), ftrsm(F, Side, UpLo, TA, Diag, m, iter.end()-iter.begin(), alpha, A , lda, B + iter.begin(), ldb, SeqH));
                              );
                    }
                   );
        return B;
    }
    template<class Field, class Cut, class Param>
    inline typename Field::Element_ptr
    ftrsm( const Field& F,
           const FFLAS::FFLAS_SIDE Side,
           const FFLAS::FFLAS_UPLO UpLo,
           const FFLAS::FFLAS_TRANSPOSE TA,
           const FFLAS::FFLAS_DIAG Diag,
           const size_t m,
           const size_t n,
           const typename Field::Element alpha,
#ifdef __FFLAS__TRSM_READONLY
           typename Field::ConstElement_ptr
#else
           typename Field::Element_ptr
#endif
           A, const size_t lda,
           typename Field::Element_ptr B, const size_t ldb,
           TRSMHelper <StructureHelper::Hybrid, ParSeqHelper::Parallel<Cut, Param> > & H)
    // const FFLAS::CuttingStrategy method,
    // const size_t numThreads)
    {

        if(Side == FflasRight){

            size_t nt = H.parseq.numthreads();
            size_t nt_it,nt_rec;
            if (m/PTRSM_HYBRID_THRESHOLD < nt){
                nt_it = (int)ceil(double(m)/PTRSM_HYBRID_THRESHOLD);
                nt_rec = (int)ceil(double(nt)/double(nt_it));
            } else { nt_it = nt; nt_rec = 1;}
            //			ForStrategy1D<size_t> iter(m, ParSeqHelper::Parallel((size_t)nt_it,H.parseq.method));
            //			for (iter.begin(); ! iter.end(); ++iter) {
            //			SYNCH_GROUP(H.parseq.numthreads(),
            SYNCH_GROUP(
                        ParSeqHelper::Parallel<Cut,Param> psh(nt_rec);
                        TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Parallel<Cut,Param> > SeqH (psh);
                        H.parseq.set_numthreads(nt_it);
                        FORBLOCK1D(iter, m, H.parseq,
                                   //				      std::cerr<<"trsm_rec nt = "<<nt_rec<<std::endl;
                                   TASK(MODE(READ(A) CONSTREFERENCE(F, A, B, SeqH) READWRITE(B[iter.begin()*ldb])),
                                        ftrsm( F, Side, UpLo, TA, Diag, iter.end()-iter.begin(), n, alpha, A, lda, B + iter.begin()*ldb, ldb, SeqH));
                                  );
                       );

        } else {

            size_t nt = H.parseq.numthreads();
            size_t nt_it=nt;
            size_t nt_rec=1;
            if (nt_it*PTRSM_HYBRID_THRESHOLD >= n){
                nt_it>>=1;
                nt_rec<<=1;
                while(nt_it*PTRSM_HYBRID_THRESHOLD >= n){
                    nt_it>>=1;
                    nt_rec<<=1;
                }
                nt_it<<=1;
                nt_rec>>=1;
            }

            // if ((int)n/PTRSM_HYBRID_THRESHOLD < nt){
            // 	nt_it = std::min(nt,(int)ceil(double(n)/PTRSM_HYBRID_THRESHOLD));
            // 	nt_rec = ceil(double(nt)/nt_it);
            // } else { nt_it = nt; nt_rec = 1;}

            //	ForStrategy1D<size_t> iter(n, ParSeqHelper::Parallel((size_t)nt_it,H.parseq.method));
            //				for (iter.begin(); ! iter.end(); ++iter) {


            // 				std::cerr<<"trsm_rec nt_it = "<<nt_it<<std::endl;
            // 				std::cerr<<"trsm_rec nt_rec = "<<nt_rec<<std::endl;

            SYNCH_GROUP(
                        ParSeqHelper::Parallel<Cut,Param> psh(nt_rec);
                        TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Parallel<Cut,Param> > SeqH (psh);
                        H.parseq.set_numthreads(nt_it);
                        FORBLOCK1D(iter, n, H.parseq,
                                   TASK(MODE(READ(A) CONSTREFERENCE(F, A, B, SeqH) READWRITE(B[iter.begin()])), ftrsm( F, Side, UpLo, TA, Diag, m, iter.end()-iter.begin(), alpha, A , lda, B + iter.begin(), ldb, SeqH));
                                  );
                       );
        }
        return B;
        }

        } // FFLAS


#endif // __FFLASFFPACK_fflas_pftrsm_INL
        /* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
        // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top