https://hal.archives-ouvertes.fr/hal-02128878
Raw File
Tip revision: 4201397494d9af8b687117e8ff4d85a8944f5c5a authored by Software Heritage on 11 June 2019, 10:15:02 UTC
hal: Deposit 298 in collection hal
Tip revision: 4201397
fflas_freduce.h
/* fflas/fflas_freduce.inl
 * Copyright (C) 2014 FFLAS FFPACK group
 *
 * Written by  Brice Boyer (briceboyer) <boyer.brice@gmail.com>
 *
 *
 * ========LICENCE========
 * This file is part of the library FFLAS-FFPACK.
 *
 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
 * it under the terms of the  GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * ========LICENCE========
 *.
 */

#ifndef __FFLASFFPACK_fflas_freduce_H
#define __FFLASFFPACK_fflas_freduce_H

#include "fflas-ffpack/fflas/fflas_simd.h"
#include "fflas-ffpack/field/field-traits.h"
#include "fflas-ffpack/utils/cast.h"

namespace FFLAS {

    template<class T>
    struct support_simd_mod  : public std::false_type {} ;

#ifdef __FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS
    template<>
    struct support_simd_mod<float> : public std::true_type {} ;
    template<>
    struct support_simd_mod<double> : public std::true_type {} ;
#ifdef __x86_64__
    template<>
    struct support_simd_mod<int64_t> : public std::true_type {} ;
#endif  // __x86_64__

#endif // __FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS

    /* Note that support_simd_mod => support_fast_mod */
    template<class T>
    struct support_fast_mod  : public std::false_type {} ;
    template<>
    struct support_fast_mod<float> : public std::true_type {} ;
    template<>
    struct support_fast_mod<double> : public std::true_type {} ;
    template<>
    struct support_fast_mod<int64_t> : public std::true_type {} ;

} // FFLAS

#include "fflas-ffpack/fflas/fflas_freduce.inl"

namespace FFLAS {

    /***************************/
    /*         LEVEL 1         */
    /***************************/

    template<class Field>
    void
    freduce (const Field & F, const size_t m,
             typename Field::ConstElement_ptr  B, const size_t incY,
             typename Field::Element_ptr A, const size_t incX)
    {
        return details::freduce (F,m,B,incY,A,incX,typename FieldTraits<Field>::category());
    }

    template<class Field>
    void
    freduce (const Field & F, const size_t m,
             typename Field::Element_ptr A, const size_t incX)
    {
        return details::freduce (F,m,A,incX,typename FieldTraits<Field>::category());
    }

    template<class Field>
    void
    freduce_constoverride(const Field & F, const size_t m,
                          typename Field::ConstElement_ptr A, const size_t incX)
    {
        return freduce(F, m, FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), incX);
    }

    // OOOPS
    // CP: to be moved to a fflas_finit field, if ever needed
    template<class Field, class ConstOtherElement_ptr>
    void
    finit (const Field& F, const size_t n,
           ConstOtherElement_ptr Y, const size_t incY,
           typename Field::Element_ptr X, const size_t incX)
    {
        typename Field::Element_ptr Xi = X ;
        ConstOtherElement_ptr Yi = Y ;

        if (incX == 1 && incY == 1)
            for (; Yi < Y + n ; ++Xi, ++Yi) {
                F.init(*Xi, *Yi);
            }
        else
            for (; Yi < Y+n*incY; Xi+=incX, Yi += incY ) {
                F.init(*Xi, *Yi);
            }
    }


    template<class Field>
    void
    finit (const Field& F, const size_t n,
           typename Field::Element_ptr X, const size_t incX)
    {
        typename Field::Element_ptr Xi = X ;

        if (incX == 1)
            for (; Xi < X + n ; ++Xi) {
                F.init(*Xi);
            }
        else
            for (; Xi < X+n*incX; Xi+=incX ) {
                F.init(*Xi);
            }
    }

    /***************************/
    /*         LEVEL 2         */
    /***************************/


    template<class Field>
    void
    freduce (const Field& F, const size_t m , const size_t n,
             typename Field::Element_ptr A, const size_t lda)
    {
        if (n == lda)
            freduce (F, n*m, A, 1);
        else
            for (size_t i = 0 ; i < m ; ++i)
                freduce (F, n, A+i*lda, 1);
        return;
    }
    template<class Field>
    void
    pfreduce (const Field& F, const size_t m , const size_t n,
              typename Field::Element_ptr A, const size_t lda, const size_t numths)
    {
        SYNCH_GROUP(
                    FORBLOCK1D(iter, m, SPLITTER(numths),
                               size_t rowsize= iter.end()-iter.begin();
                               TASK(MODE(CONSTREFERENCE(F) READWRITE(A[iter.begin()*lda])),
                                    freduce (F, rowsize, n, A+iter.begin()*lda, lda);
                                   );
                              );
                   );
        return;
    }

    template<class Field>
    void
    freduce (const Field& F, const size_t m , const size_t n,
             typename Field::ConstElement_ptr B, const size_t ldb,
             typename Field::Element_ptr A, const size_t lda)
    {
        for (size_t i = 0 ; i < m ; ++i) {
            freduce(F,n,B+i*ldb,1,A+i*lda,1);
        }
    }


    template<class Field>
    void
    freduce_constoverride(const Field & F, const size_t m, const size_t n,
                          typename Field::ConstElement_ptr A, const size_t lda)
    {
        return freduce(F, m, n,
                       FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), lda);
    }

    // CP: to be moved to a fflas_finit field, if ever needed
    template<class Field, class OtherElement_ptr>
    void
    finit (const Field& F, const size_t m , const size_t n,
           const OtherElement_ptr B, const size_t ldb,
           typename Field::Element_ptr A, const size_t lda)
    {
        if (n == lda && n == ldb)
            finit (F, n*m, B, 1, A, 1);
        else
            for (size_t i = 0 ; i < m ; ++i)
                finit (F, n, B + i*ldb, 1, A + i*lda, 1);
        return;
    }

    template<class Field>
    void
    finit (const Field& F, const size_t m , const size_t n,
           typename Field::Element_ptr A, const size_t lda)
    {
        if (n == lda)
            finit (F, n*m, A, 1);
        else
            for (size_t i = 0 ; i < m ; ++i)
                finit (F, n, A + i*lda, 1);
        return;
    }

} // end of namespace FFLAS

//#include "fflas_freduce_mp.inl" moved to fflas.h

#endif // __FFLASFFPACK_fflas_freduce_H
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
back to top