Content - 32eb13bd3438fe8a8017115a68e7995d5d5dc524 - 29c4155/Source/Math/cudabasetypes.h

visit type:
Tip revision: 5c3f708097bdcdaf2c06d2aa8a9b3fdc772ae27a authored by Mark Hillebrand on 18 January 2016, 08:36:30 UTC
License change
Tip revision: 5c3f708
cudabasetypes.h
// cudabasetypes.h -- basic types used on both CUDA and PC side
//
// F. Seide, V-hansu

#pragma once

#ifdef __CUDA_ARCH__         // we are compiling under CUDA
#define ON_CUDA 1
#ifdef __device__
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#else
#define ON_CUDA 0           // TODO: this does not work for some combination--fix this
#ifdef __device__ 
#define cudacode __device__
#define cudasharedcode __device__ __host__
#else
#define cudacode
#define cudasharedcode
#endif
#endif

#include <assert.h>

namespace msra { namespace cuda {

typedef size_t cuda_size_t; // TODO: verify if this is consistent across CPU/CUDA, or use uint32 or so

// we wrap CUDA pointers so that we don't accidentally use them in CPU code
template<typename T> class cuda_ptr
{
    T * p;  // CUDA pointers are the same as host (e.g. Win32 is restricted to 32-bit CUDA pointers)
public:
    void swap (cuda_ptr & other) { T * tmp = p; p = other.p; other.p = tmp; }
    cudacode T &       operator[] (size_t index)       { return p[index]; }
    cudacode const T & operator[] (size_t index) const { return p[index]; }
    cudasharedcode cuda_ptr operator+ (size_t index) const { return cuda_ptr (p + index); }
    cudasharedcode cuda_ptr operator- (size_t index) const { return cuda_ptr (p - index); }
    cuda_ptr (T * pp) : p (pp) {}
    T * get() const { return p; }
};

// reference to a vector (without allocation) that lives in CUDA RAM
// This can be directly passed by value to CUDA functions.
template<typename T> class vectorref
{
    cuda_ptr<T> p;      // pointer in CUDA space of this device
    cuda_size_t n;      // number of elements
public:
    cudasharedcode size_t size() const throw() { return n; }
    cudacode T &       operator[] (size_t i)       { return p[i]; }
    cudacode const T & operator[] (size_t i) const { return p[i]; }
    cuda_ptr<T> get() const throw() { return p; }
    cuda_ptr<T> reset (cuda_ptr<T> pp, size_t nn) throw() { p.swap (pp); n = nn; return pp; }
    vectorref (cuda_ptr<T> pp, size_t nn) : p (pp), n (nn) { }
    vectorref() : p (0), n (0) { }
};

// reference to a matrix
template<typename T> class matrixref
{
protected:
    cuda_ptr<T> p;      // pointer in CUDA space of this device
    size_t numrows;     // rows()
    size_t numcols;     // cols()
    size_t colstride;   // height of column = rows() rounded to multiples of 4
    cudasharedcode size_t locate (size_t i, size_t j) const
    {
        return j * colstride + i; 
    }   // matrix in column-wise storage
    matrixref() : p (0), numrows (0), numcols (0), colstride (0) {}
public:
    matrixref(T* p, size_t numRows, size_t numCols, size_t colStride)
        : p(p), numrows(numRows), numcols(numCols), colstride(colStride)
    {
    }
    cuda_ptr<T> get() const throw() { return p; }
    cudasharedcode size_t rows() const throw() { return numrows; }
    cudasharedcode size_t cols() const throw() { return numcols; }
    cudasharedcode void reshape(const size_t newrows, const size_t newcols) { assert (rows() * cols() == newrows * newcols); numrows=newrows; numcols = newcols;};
    cudasharedcode size_t getcolstride() const throw() { return colstride; }
    cudacode T &       operator() (size_t i, size_t j)       { return p[locate(i,j)]; }
    cudacode const T & operator() (size_t i, size_t j) const { return p[locate(i,j)]; }
};
}}
Browse the archive

https://github.com/Microsoft/CNTK