https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
fflas_helpers.inl
/*
 * Copyright (C) 2014 the FFLAS-FFPACK group
 *
 * Written by Clement Pernet <Clement.Pernet@imag.fr>
 *
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */

/** @file fflas/fflas_mmhelper.h
 * @brief  Matrix-Matrix Helper class
 */

#ifndef __FFLASFFPACK_fflas_fflas_mmhelper_INL
#define __FFLASFFPACK_fflas_fflas_mmhelper_INL

#include "fflas-ffpack/field/field-traits.h"
#include "fflas-ffpack/paladin/parallel.h"
#include "fflas-ffpack/utils/flimits.h"

#include <algorithm> // std::max

namespace FFLAS{ namespace Protected{
    /** \brief Computes the number of recursive levels to perform.
     *
     * \param m the common dimension in the product AxB
     */
    template<class Field>
    int WinogradSteps (const Field & F, const size_t & m);

}//Protected
}//FFLAS

namespace FFLAS {

    namespace Protected{
        template <class DFE> inline size_t min_types(const DFE& k) {return static_cast<size_t>(k);}
#if __FFLASFFPACK_SIZEOF_LONG == 4
        template <> inline size_t min_types(const double& k) {return static_cast<size_t>(std::min(k,double(std::numeric_limits<size_t>::max())));}
        template <> inline size_t min_types(const int64_t& k) {return static_cast<size_t>(std::min(k,int64_t(std::numeric_limits<size_t>::max())));}
#endif
        template <> inline size_t min_types(const RecInt::rint<6>& k) {return static_cast<size_t>(uint64_t(std::min(k,RecInt::rint<6>(uint64_t(std::numeric_limits<size_t>::max())))));}
        template <> inline size_t min_types(const RecInt::rint<7>& k) {return static_cast<size_t>(uint64_t(std::min(k,RecInt::rint<7>(uint64_t(std::numeric_limits<size_t>::max())))));}
        template <> inline size_t min_types(const RecInt::rint<8>& k) {return static_cast<size_t>(uint64_t(std::min(k,RecInt::rint<8>(uint64_t(std::numeric_limits<size_t>::max())))));}
        template <> inline size_t min_types(const RecInt::rint<9>& k) {return static_cast<size_t>(uint64_t(std::min(k,RecInt::rint<9>(uint64_t(std::numeric_limits<size_t>::max())))));}
        template <> inline size_t min_types(const RecInt::rint<10>& k) {return static_cast<size_t>(uint64_t(std::min(k,RecInt::rint<10>(uint64_t(std::numeric_limits<size_t>::max())))));}
        template <> inline size_t min_types(const Givaro::Integer& k) {return static_cast<size_t>(uint64_t(std::min(k,Givaro::Integer(uint64_t(std::numeric_limits<size_t>::max())))));}

        template <class T>
        inline bool unfit(T x){return false;}
        template <>
        inline bool unfit(int64_t x){return (x>limits<int32_t>::max());}
        template <size_t K>
        inline bool unfit(RecInt::rint<K> x){return (x > RecInt::rint<K>(limits<RecInt::rint<K-1>>::max()));}
        template <>
        inline bool unfit(RecInt::rint<6> x){return (x > limits<int32_t>::max());}
    }

    namespace MMHelperAlgo{
        struct Auto{};
        struct Classic{};
        struct Winograd{};
        struct WinogradPar{};
        struct Bini{};
    }

    template<class ModeT, class ParSeq>
    struct AlgoChooser{typedef MMHelperAlgo::Winograd value;};
    template<class ParSeq>
    struct AlgoChooser<ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq>{typedef MMHelperAlgo::Classic value;};

    template<class Field,
    typename AlgoTrait = MMHelperAlgo::Auto,
    typename ModeTrait = typename ModeTraits<Field>::value,
    typename ParSeqTrait = ParSeqHelper::Sequential >
    struct MMHelper;
    /*! FGEMM  Helper for Default and ConvertTo modes of operation
    */
    template<class Field,
    typename AlgoTrait,
    typename ParSeqTrait >
    struct MMHelper<Field, AlgoTrait, ModeCategories::DefaultTag, ParSeqTrait>
    {
        typedef MMHelper<Field,AlgoTrait, ModeCategories::DefaultTag,ParSeqTrait> Self_t;
        int recLevel ;
        ParSeqTrait parseq;

        MMHelper(){}
        MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) : recLevel(-1), parseq(_PS) {}
        MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) : recLevel(w), parseq(_PS) {}

        // copy constructor from other Field and Algo Traits
        template<class F2, typename AlgoT2, typename FT2, typename PS2>
        MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) : recLevel(WH.recLevel), parseq(WH.parseq) {}

        friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
        {
            return out <<"Helper: "
            <<typeid(AlgoTrait).name()<<' '
            <<typeid(ModeCategories::DefaultTag).name()<< ' '
            << M.parseq <<std::endl
            <<"  recLevel = "<<M.recLevel<<std::endl;
        }
    };
    template<class Field,
    typename AlgoTrait,
    typename Dest,
    typename ParSeqTrait>
    struct MMHelper<Field, AlgoTrait, ModeCategories::ConvertTo<Dest>, ParSeqTrait>
    {
        typedef MMHelper<Field,AlgoTrait, ModeCategories::ConvertTo<Dest>,ParSeqTrait> Self_t;
        int recLevel ;
        ParSeqTrait parseq;

        MMHelper(){}
        MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) : recLevel(-1), parseq(_PS) {}
        MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) : recLevel(w), parseq(_PS) {}

        // copy constructor from other Field and Algo Traits
        template<class F2, typename AlgoT2, typename FT2, typename PS2>
        MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) : recLevel(WH.recLevel), parseq(WH.parseq) {}

        friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
        {
            return out <<"Helper: "
            <<typeid(AlgoTrait).name()<<' '
            <<typeid(ModeCategories::ConvertTo<Dest>).name()<< ' '
            << M.parseq <<std::endl
            <<"  recLevel = "<<M.recLevel<<std::endl;
        }
    };
    // MMHelper for Delayed and Lazy Modes of operation
    template<class Field,
    typename AlgoTrait,
    typename ModeTrait,
    typename ParSeqTrait>
    struct MMHelper {
        typedef MMHelper<Field,AlgoTrait,ModeTrait,ParSeqTrait> Self_t;
        typedef typename associatedDelayedField<const Field>::type DelayedField_t;
        typedef typename associatedDelayedField<const Field>::field DelayedField;
        typedef typename DelayedField::Element DFElt;
        int recLevel ;
        DFElt FieldMin, FieldMax, Amin, Amax, Bmin, Bmax, Cmin, Cmax, Outmin, Outmax;
        DFElt MaxStorableValue;

        const DelayedField_t delayedField;
        ParSeqTrait parseq;
        void initC(){Cmin = FieldMin; Cmax = FieldMax;}
        void initA(){Amin = FieldMin; Amax = FieldMax;}
        void initB(){Bmin = FieldMin; Bmax = FieldMax;}
        void initOut(){Outmin = FieldMin; Outmax = FieldMax;}


        size_t MaxDelayedDim(DFElt beta)
        {
            if (MaxStorableValue < DFElt(0))
                //Infinte precision delayed field
                return std::numeric_limits<size_t>::max();

            DFElt absbeta;
            delayedField.init(absbeta);
            delayedField.assign(absbeta, beta);
            if (beta < 0) absbeta = -beta;
            // This cast is needed when Cmin base type is int8/16_t,
            // getting -Cmin returns a int, not the same base type.
            DFElt diff = MaxStorableValue - absbeta
            * std::max(static_cast<const DFElt&>(-Cmin), Cmax);
            DFElt AB = std::max(static_cast<const DFElt&>(-Amin), Amax)
            * std::max(static_cast<const DFElt&>(-Bmin), Bmax);
            if ((diff < DFElt(0u))||(AB<DFElt(0u))) return 0;


            DFElt kmax = diff/AB;
            return FFLAS::Protected::min_types<DFElt>(kmax);
            // if (kmax > std::numeric_limits<size_t>::max())
            // 	return std::numeric_limits<size_t>::max();
            // else
            // 	return kmax;
        }
        bool Aunfit(){ return Protected::unfit(std::max(static_cast<const DFElt&>(-Amin),Amax));}
        bool Bunfit(){ return Protected::unfit(std::max(static_cast<const DFElt&>(-Bmin),Bmax));}
        void setOutBounds(const size_t k, const DFElt alpha, const DFElt beta)
        {
            if (beta<0){
                Outmin = beta*Cmax;
                Outmax = beta*Cmin;
            } else {
                Outmin = beta*Cmin;
                Outmax = beta*Cmax;
            }
            if (alpha >0){
                Outmin += DFElt(k)*alpha*std::min(Amin*Bmax, Amax*Bmin);
                Outmax += DFElt(k)*alpha*std::max(Amin*Bmin, Amax*Bmax);
            }else{
                Outmin += DFElt(k)*alpha*std::max(Amin*Bmin, Amax*Bmax);
                Outmax += DFElt(k)*alpha*std::min(Amin*Bmax, Amax*Bmin);
            }
        }

        bool checkA(const Field& F, const FFLAS::FFLAS_TRANSPOSE ta, const size_t M, const size_t N,
                    typename Field::ConstElement_ptr A, const size_t lda )
        {
#ifdef __FFLASFFPACK_DEBUG
            for (size_t i=0; i<M;++i)
                for (size_t j=0; j<N;++j){
                    const typename Field::Element x = (ta == FFLAS::FflasNoTrans)? A[i*lda+j] : A[i+j*lda];
                    if (x > Amax || x < Amin){
                        std::cerr<<"Error in "<<Amin<<" < = A["<<i<<", "<<j<<"] ="<<x<<" <= "<<Amax<<std::endl;
                        return false;
                    }
                }
#endif
            return true;
        }

        bool checkB(const Field& F, const FFLAS::FFLAS_TRANSPOSE tb, const size_t M, const size_t N,
                    typename Field::ConstElement_ptr B, const size_t ldb)
        {
#ifdef __FFLASFFPACK_DEBUG
            for (size_t i=0; i<M;++i)
                for (size_t j=0; j<N;++j){
                    const typename Field::Element x = (tb == FFLAS::FflasNoTrans)? B[i*ldb+j] : B[i+j*ldb];
                    if (x > Bmax || x < Bmin){
                        std::cerr<<"Error in "<<Bmin<<" < = B["<<i<<", "<<j<<"] ="<<B[i*ldb+j]<<" <= "<<Bmax<<std::endl;
                        return false;
                    }
                }
#endif
            return true;
        }

        bool checkOut(const Field& F, const size_t M, const size_t N,
                      typename Field::ConstElement_ptr A, const size_t lda ){
#ifdef __FFLASFFPACK_DEBUG
            for (size_t i=0; i<M;++i)
                for (size_t j=0; j<N;++j)
                    if ((A[i*lda+j]>Outmax) || (A[i*lda+j]<Outmin)){
                        std::cerr<<"Error in "<<Outmin<<" <= Out["<<i<<", "<<j<<"] = "<<A[i*lda+j]<<" <= "<<Outmax<<std::endl;
                        return false;
                    }
#endif
            return true;
        }

        MMHelper(){}
        //TODO: delayedField constructor has a >0 characteristic even when it is a Double/FloatDomain
        // correct but semantically not satisfactory
        MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) :
            recLevel(-1),
            FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
            Amin(FieldMin), Amax(FieldMax),
            Bmin(FieldMin), Bmax(FieldMax),
            Cmin(FieldMin), Cmax(FieldMax),
            Outmin(0), Outmax(0),
            MaxStorableValue ((DFElt)(limits<typename DelayedField::Element>::max())),
            delayedField(F),
            // delayedField((typename Field::Element)F.characteristic()),
            parseq(_PS)
        {
        }

        MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) :
            recLevel(w),
            FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
            Amin(FieldMin), Amax(FieldMax),
            Bmin(FieldMin), Bmax(FieldMax),
            Cmin(FieldMin), Cmax(FieldMax),
            Outmin(0), Outmax(0),
            MaxStorableValue ((DFElt)(limits<typename DelayedField::Element>::max())),
            delayedField(F),
            parseq(_PS)
        {
        }

        // copy constructor from other Field and Algo Traits
        template<class F2, typename AlgoT2, typename FT2, typename PS2>
        MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) :
            recLevel(WH.recLevel),
            FieldMin(WH.FieldMin), FieldMax(WH.FieldMax),
            Amin(WH.Amin), Amax(WH.Amax),
            Bmin(WH.Bmin), Bmax(WH.Bmax),
            Cmin(WH.Cmin), Cmax(WH.Cmax),
            Outmin(WH.Outmin), Outmax(WH.Outmax),
            MaxStorableValue(WH.MaxStorableValue),
            delayedField(WH.delayedField),
            parseq(WH.parseq)
        {
        }

        MMHelper(const Field& F, int w,
                 DFElt _Amin, DFElt _Amax,
                 DFElt _Bmin, DFElt _Bmax,
                 DFElt _Cmin, DFElt _Cmax,
                 ParSeqTrait _PS=ParSeqTrait()):
            recLevel(w), FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
            Amin(_Amin), Amax(_Amax),
            Bmin(_Bmin), Bmax(_Bmax),
            Cmin(_Cmin), Cmax(_Cmax),
            Outmin(0),Outmax(0),
            MaxStorableValue(limits<typename DelayedField::Element>::max()),
            delayedField(F),
            parseq(_PS)
        {
        }

        friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
        {
            return out <<"Helper: "
            <<typeid(AlgoTrait).name()<<' '
            <<typeid(ModeTrait).name()<< ' '
            << M.parseq <<std::endl
            <<" DelayedField = "<<typeid(DelayedField).name()<<std::endl
            <<"  recLevel = "<<M.recLevel<<std::endl
            <<"  FieldMin = "<<M.FieldMin<<" FieldMax = "<<M.FieldMax<<std::endl
            <<"  MaxStorableValue = "<< M.MaxStorableValue <<std::endl
            <<"  Amin = "<<M.Amin<<" Amax = "<<M.Amax<<std::endl
            <<"  Bmin = "<<M.Bmin<<" Bmax = "<<M.Bmax<<std::endl
            <<"  Cmin = "<<M.Cmin<<" Cmax = "<<M.Cmax<<std::endl
            <<"  Outmin = "<<M.Outmin<<" Outmax = "<<M.Outmax<<std::endl;
        }
    }; // MMHelper


    // to be used in the future, when Winograd's algorithm will be made generic wrt the ModeTrait
    // template <class Field, class AlgoT, class ParSeqH>
    // void copyOutBounds(const MMHelper<Field,AlgoT,ModeCategories::DelayedTag, ParSeqH> &Source,
    // 		   MMHelper<Field,AlgoT,ModeCategories::DelayedTag, ParSeqH> & Dest){
    // 	Dest.Outmax = Source.Outmax;
    // 	Dest.Outmin = Source.Outmin;
    // }
    // template <class Field, class AlgoT, class ParSeqH>
    // void copyOutBounds(const MMHelper<Field,AlgoT,ModeCategories::LazyTag, ParSeqH> &Source,
    // 		   MMHelper<Field,AlgoT,ModeCategories::LazyTag, ParSeqH> & Dest){
    // 	Dest.Outmax = Source.Outmax;
    // 	Dest.Outmin = Source.Outmin;
    // }
    // template <class MMH1, class MMH2>
    // void copyOutBounds(const MMH1 &Source, MMH2 & Dest){}
    /*! StructureHelper for ftrsm
    */
    namespace StructureHelper {
        struct Recursive{};
        struct Iterative{};
        struct Hybrid{};
    }

    /*! TRSM Helper
    */
    template<typename RecIterTrait = StructureHelper::Recursive, typename ParSeqTrait = ParSeqHelper::Sequential>
    struct TRSMHelper {
        ParSeqTrait parseq;
        template<class Cut,class Param>
        TRSMHelper(ParSeqHelper::Parallel<Cut,Param> _PS):parseq(_PS){}
        TRSMHelper(ParSeqHelper::Sequential _PS):parseq(_PS){}
        template<typename RIT, typename PST>
        TRSMHelper(TRSMHelper<RIT,PST>& _TH):parseq(_TH.parseq){}

        template<class Dom, class Algo=FFLAS::MMHelperAlgo::Winograd, class ModeT=typename FFLAS::ModeTraits<Dom>::value>
        FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait> pMMH (Dom& D, size_t m, size_t k, size_t n, ParSeqTrait p) const {
            return FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait>(D,m,k,n,p);
        }

        template<class Dom, class Algo=FFLAS::MMHelperAlgo::Winograd, class ModeT=typename FFLAS::ModeTraits<Dom>::value>
        FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait> pMMH (Dom& D, size_t m, size_t k, size_t n) const {
            return pMMH(D,m,k,n,this->parseq);
        }

    };




} // FFLAS
#endif
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top