Content - 7ea1747287da4f2b7ce31feb7ff5af6c135888f2 - 5149f80/Source/Math/ConvolveGeometry.h

visit type:
Tip revision: 66d8088c279cb656f3b1af6dd5685a94e890491e authored by Lingfeng Wu on 05 June 2017, 20:02:04 UTC
Merge remote-tracking branch 'origin/master' into chenzhehuai/lfbmmi-negstream
Tip revision: 66d8088
ConvolveGeometry.h
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once

#include "Basics.h"
#include "TensorShape.h"
#include <iterator>

namespace Microsoft { namespace MSR { namespace CNTK {

// Notes:
// * ConvolveGeometry represents the application of one or more rectangular "kernels" (all of the same size)
//   to a rectangular input to produce a rectangular output.
// * A "cell" in the rectangular input is identified by a single coordinate called a "col" (for column).
// * A "cell" in the rectangular output is identified by a single coordinate called a "row".
// * The kernels may involve weights, in which case MpRowIwht indicates the starting index of the weights
//   used for a given output cell.
// The overall idea of ConvolveGeometry is to precompute maps that can be used to apply convolutions of
// arbitrary configurations and dimensions. In such case the generic implementation becomes very simple and invariant
// wrt convolution configuration and dimensionality. For specific cases like 2D/3D convolutions and full sharing,
// highly optimized implementations (e.g. cuDNN) are used.
// TODO: rename to ConvolutionGeometry
class ConvolveGeometry final
{
public:
    using IntVec = std::vector<int>;
    using BoolVec = std::vector<bool>;

    const TensorShape& InputShape() const { return m_inputShape; }
    const TensorShape& OutputShape() const { return m_outputShape; }
    const TensorShape& KernelShape() const { return m_kernelShape; }
    const TensorShape& MapCount() const { return m_mapCount; }
    const TensorShape& Stride() const { return m_stride; }
    const BoolVec& Sharing() const { return m_sharing; }
    const BoolVec& AutoPad() const { return m_autoPad; }
    const TensorShape& LowerPad() const { return m_lowerPad; }
    const TensorShape& UpperPad() const { return m_upperPad; }

    // Maps from a "row" (index of output cell) to its base "col" (index of input cell). For a given row,
    // the cols that contribute to it are { MpRowCol[row] + Indices[i0 + 1 + i] | 0 <= i < Indices[i0] },
    // where i0 = MpRowIndices[row].
    const IntVec& MpRowCol() const { return m_mpRowCol; }

    // Maps from a "row" (index of output cell) to where to start in the weights array. Each run of weights
    // consists of KernelSize weights.
    const IntVec& MpRowIwht() const { return m_mpRowIwht; }

    // Maps from a "row" (index of output cell) to its starting index in Runs. A run consists of:
    // * skip count (to skip that many weights)
    // * item count
    // * relative indices into source (item count of these)
    // * masks (all 1's or all 0's) (item count of these)
    // For items that are masked out (0 mask), the index stored is the next valid index.
    // This ensures that accessing the corresponding neuron value doesn't fault and that
    // backprop operations write the correct value last (any previous writes won't change
    // the value).
    // NOTE: The first (zeroth) run is always the "full" kernel run. Also, MpRowRun can be empty,
    // indicating that all values are zero (all outputs use the "full" kernel run).
    const IntVec& MpRowRun() const { return m_mpRowRun; }
    const IntVec& Runs() const { return m_runs; }

    // Maps from a "row" (index of output cell) to its starting index in Indices. Note that "Runs" is intended
    // for kernels that have weights, while "Indices" is intended for kernels that don't need to access weights.
    // As a result, the encoding in Indices is simpler and more direct.
    // A run in Indices consists of:
    // * item count
    // * relative indices into source (item count of these)
    // NOTE: The first run of indices is always the "full" kernel run. Also, MpRowIndices can be empty,
    // indicating that all values are zero (all outputs use the "full" kernel run).
    // In addition, all items in Indices are valid source indices so no masking is required in subsequent computation.
    const IntVec&  MpRowIndices() const { return m_mpRowIndices; }
    const IntVec&  Indices() const { return m_indices; }

    // Number of kernels (equal to MapCount if sharing is all true values).
    size_t KernelCount() const { return m_kernelCount; }

    ConvolveGeometry(const TensorShape& inputShape, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& stride,
                     const BoolVec& sharing, const BoolVec& autoPad, const TensorShape& lowerPad, const TensorShape& upperPad, const bool ceilOutDim = false)
                     : m_inputShape(inputShape), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(stride), m_sharing(sharing),
                     m_autoPad(autoPad), m_lowerPad(lowerPad), m_upperPad(upperPad)
    {
        // Note: this ctor is a bit long so sit back and relax.

        assert(m_inputShape.GetRank() == m_kernelShape.GetRank());
        assert(m_mapCount.GetRank() == 1 || m_mapCount.GetRank() == m_inputShape.GetRank());
        assert(m_stride.GetRank() == 1 || m_stride.GetRank() == m_inputShape.GetRank());
        assert(m_sharing.size() == 1 || m_sharing.size() == m_inputShape.GetRank());
        assert(m_autoPad.size() == 1 || m_autoPad.size() == m_inputShape.GetRank());
        assert(m_lowerPad.GetRank() == 1 || m_lowerPad.GetRank() == m_inputShape.GetRank());
        assert(m_upperPad.GetRank() == 1 || m_upperPad.GetRank() == m_inputShape.GetRank());
        
        m_outputShape = ComputeOutputShape(m_inputShape, m_kernelShape, m_mapCount, m_stride,
                                           m_sharing, m_autoPad, m_lowerPad, m_upperPad, ceilOutDim);
        assert(m_inputShape.GetRank() == m_outputShape.GetRank());

        size_t dimCount = inputShape.GetRank();
        size_t kernelSize = kernelShape.GetNumElements();

        // Compute the total number of kernels.
        m_kernelCount = 1;
        for (size_t i = 0; i < dimCount; i++)
            m_kernelCount *= !GetSharing(i) ? m_outputShape[i] : GetMapCount(i);

        // Compute the "Start" indices.
        m_start.resize(dimCount);
        m_startIndex = 0;
        m_originIndex = 0;
        for (int i = (int)dimCount - 1; i >= 0; i--)
        {
            assert((m_outputShape[i] % GetMapCount(i)) == 0);
            int outPerMap = (int)(m_outputShape[i] / GetMapCount(i));
            // Number of cells between first and last "centers", inclusive. 
            int cells = (int)((outPerMap - 1) * GetStride(i) + 1); assert(m_inputShape[i] >= cells);
            // Extra cells, to the left and right of "cells". 
            int extra = (int)m_inputShape[i] - cells;
            assert(extra >= 0);

            bool padded = GetAutoPad(i);
            if (padded)
            {
                m_start[i] = extra / 2;
            }
            else
            {
                m_start[i] = ((int)m_kernelShape[i] - 1) / 2;
                int lo = (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : i];
                int hi = (int)m_upperPad[m_upperPad.size() == 1 ? 0 : i];
                if (lo != 0 || hi != 0)
                {
                    m_start[i] -= lo;
                    assert(m_start[i] >= 0); 
                    assert(m_start[i] + cells + (int)m_kernelShape[i] - 1 == m_inputShape[i] + hi + lo);
                }
            }

            m_startIndex = m_startIndex * (int)m_inputShape[i] + m_start[i];
            m_originIndex = m_originIndex * (int)m_inputShape[i] + ((int)m_kernelShape[i] - 1) / 2;
        }
        
        // Compute support, mapping from the index into the kernel to offset into source.
        // Support consists of the column deltas of the kernels, as offsets from MpRowCol[row].
        IntVec support(kernelSize);
        std::vector<IntVec> kernelCoords(kernelSize);
        for (int idx = 0; idx < kernelSize; idx++)
        {
            kernelCoords[idx].resize(dimCount);
            int ivSrc = 0;
            int factor = 1;
            int cur = idx;
            for (size_t i = 0; i < dimCount; i++)
            {
                assert(cur >= 0);
                int d = (int)m_kernelShape[i];
                assert(d > 0);
                int coord = cur % d;
                cur /= d;
                kernelCoords[idx][i] = coord;
                ivSrc += factor * coord;
                factor *= (int)m_inputShape[i];
            }
            assert(cur == 0);
            assert(ivSrc < m_inputShape.GetNumElements());
            support[idx] = ivSrc - m_originIndex;
        }
        
        size_t outputSize = m_outputShape.GetNumElements();
        // Compute the mappings (where row = output node index, col = source node index):
        // * from row to the index of the first weight to use for that row.
        // * from row to the first input col. The rest are col + _support[i].
        m_mpRowIwht.resize(outputSize);
        m_mpRowCol.resize(outputSize);
        m_mpRowRun.resize(outputSize);
        m_mpRowIndices.resize(outputSize);

        // A "key" is an equivalence class of run/masks.
        // Calculate the key for an interior cell (for using all of support - when all masks are 1's).
        int keyInterior = 0;
        for (size_t i = 0; i < dimCount; i++)
        {
            int width = (int)m_kernelShape[i];
            keyInterior = keyInterior * width + (width - 1) / 2;
        }

        m_runs.resize(2 * kernelSize + 2, -1);
        m_indices.resize(kernelSize + 1);
        m_runs[0] = 0; // Skip count
        m_runs[1] = (int)kernelSize; // Count of entries
        m_indices[0] = (int)kernelSize;
        for (size_t i = 0; i < kernelSize; i++)
        {
            m_runs[2 + i] = support[i];
            m_indices[1 + i] = support[i];
        }

        // Working buffer for masks.
        IntVec masks(kernelSize);

        // Map from key to pair of starting locations in Runs and Indices.
        std::map<int, std::pair<int, int>>  mpkeystarts;
        mpkeystarts[keyInterior] = std::make_pair(0, 0);

        IntVec dkey(dimCount);
        for (size_t row = 0; row < outputSize; row++)
        {
            // Compute the kernel number, column, and key.
            // REVIEW alexeyk: Seems like there should be a simpler and faster way, without starting
            // from scratch for each output (row)....
            int kern = 0;
            int col = 0;
            int factorKern = 1;
            int factorCol = 1;
            int key = 0;
            int cur = (int)row;
            for (size_t i = 0; i < dimCount; i++)
            {
                int dim = (int)(m_outputShape[i] / GetMapCount(i));
                int coord = cur % dim;
                cur /= dim;

                // Kernel
                if (!GetSharing(i))
                {
                    kern += factorKern * coord;
                    factorKern *= dim;
                }

                int maps = (int)GetMapCount(i);
                if (maps > 1)
                {
                    kern += factorKern * (cur % maps);
                    cur /= maps;
                    factorKern *= maps;
                }

                // Transform coord to input index space.
                coord *= (int)GetStride(i);
                coord += m_start[i];

                col += factorCol * coord;
                factorCol *= (int)m_inputShape[i];

                int width = (int)m_kernelShape[i];
                int half = (width - 1) / 2;
                int min = coord - half;
                int lim = min + width;
                if (min < 0)
                    dkey[i] = min;
                else if (lim > m_inputShape[i])
                    dkey[i] = lim - (int)m_inputShape[i];
                else
                    dkey[i] = 0;
                int dk = dkey[i] + half;
                assert(0 <= dk);
                assert(dk < width);
                key = key * width + dk;
            }
            assert(cur == 0);
            assert(0 <= kern);
            assert(kern < m_kernelCount);
            assert(0 <= col);
            assert(col < m_inputShape.GetNumElements());

            auto startsIter = mpkeystarts.find(key);
            if (startsIter == mpkeystarts.end())
            {
                auto starts = std::make_pair((int)m_runs.size(), (int)m_indices.size());
                mpkeystarts[key] = starts;

                int indexCount = 0;
                for (int idx = 0; idx < kernelSize; idx++)
                {
                    const auto& coords = kernelCoords[idx];
                    int mask = 0;
                    for (int i = (int)dimCount; ; )
                    {
                        if (--i < 0)
                        {
                            // All OK.
                            mask = -1;
                            break;
                        }
                        int k = dkey[i] + coords[i];
                        if (k < 0)
                            break;
                        if (k >= m_kernelShape[i])
                            break;
                    }
                    assert(mask == 0 || mask == -1);
                    indexCount -= mask;
                    masks[idx] = mask;
                }

                int skip = 0;
                while (masks[skip] == 0)
                    skip++;
                int count = (int)kernelSize;
                while (masks[count - 1] == 0)
                    count--;

                count -= skip;
                m_runs.push_back(skip); // Skip count
                m_runs.push_back(count); // Count of entries
                m_indices.push_back(indexCount);
                for (int i = 0, iMin = 0; i < count; i++)
                {
                    int index = support[skip + i];
                    int mask = masks[skip + i];
                    if (mask != 0)
                    {
                        // Add "index" to runs for this slot and any immediately preceeding
                        // slots that have mask == 0.
                        assert(iMin <= i);
                        assert(m_runs.size() == starts.first + 2 + iMin);
                        for (; iMin <= i; iMin++)
                            m_runs.push_back(index);
                        assert(iMin == i + 1);
                        assert(m_runs.size() == starts.first + 2 + iMin);

                        m_indices.push_back(index);
                    }
                }
                for (int i = 0; i < count; i++)
                    m_runs.push_back(masks[skip + i]);
                assert(m_runs.size() == std::get<0>(starts) + 2 + 2 * count);
                assert(m_indices.size() == std::get<1>(starts) + 1 + indexCount);

                m_mpRowRun[row] = starts.first;
                m_mpRowIndices[row] = starts.second;
            }
            else
            {
                m_mpRowRun[row] = (*startsIter).second.first;
                m_mpRowIndices[row] = (*startsIter).second.second;
            }
            assert(0 <= kern);
            assert(kern < m_kernelCount);
            m_mpRowCol[row] = col;
            m_mpRowIwht[row] = kern * (int)kernelSize;
        }
    }

    size_t GetStride(size_t dim) const
    {
        assert(m_stride.size() == 1 || dim < m_stride.size());
        return m_stride[m_stride.size() == 1 ? 0 : dim];
    }

    size_t GetMapCount(size_t dim) const
    {
        assert(m_mapCount.size() == 1 || dim < m_mapCount.size());
        // If the whole map count tensor was specified explicitly - return requested component.
        if (m_mapCount.size() > 1)
            return m_mapCount[dim];
        // If map count tensor rank == 1 then assume it represents number of feature maps for the rightmost dimension.
        if (dim == m_inputShape.size() - 1)
            return m_mapCount[0];
        return 1;
    }

    bool GetSharing(size_t dim) const
    {
        assert(m_sharing.size() == 1 || dim < m_sharing.size());
        return m_sharing[m_sharing.size() == 1 ? 0 : dim];
    }

    bool GetAutoPad(size_t dim) const
    {
        assert(m_autoPad.size() == 1 || dim < m_autoPad.size());
        return m_autoPad[m_autoPad.size() == 1 ? 0 : dim];
    }

    int GetLowerPad(size_t dim) const
    {
        if (!GetAutoPad(dim))
            return (int)m_lowerPad[m_lowerPad.size() == 1 ? 0 : dim];

        int kernSize = (int)m_kernelShape[dim];
        int inpSize = (int)m_inputShape[dim];
        int outSize = (int)m_outputShape[dim];
        int stride = (int)GetStride(dim);

        // Taken from computation in ConvolveGeometry ctor.
        // Number of cells between first and last "centers", inclusive.
        int cells = (outSize - 1) * stride + 1;
        // Extra cells, to the left and right of "cells".
        int extra = inpSize - cells;
        int center = extra / 2;
        return -(center - (kernSize - 1) / 2);
    }

    int GetUpperPad(size_t dim) const
    {
        if (!GetAutoPad(dim))
            return (int)m_upperPad[m_upperPad.size() == 1 ? 0 : dim];

        int kernSize = (int)m_kernelShape[dim];
        int inpSize = (int)m_inputShape[dim];
        int outSize = (int)m_outputShape[dim];
        int stride = (int)GetStride(dim);

        // Taken from computation in ConvolveGeometry ctor.
        // Number of cells between first and last "centers", inclusive.
        int cells = (outSize - 1) * stride + 1;
        // Extra cells, to the left and right of "cells".
        int extra = inpSize - cells;
        int center = extra / 2; 
        return (kernSize - 1) - (kernSize - 1) / 2 - (extra - center); 
    }

    // Computes output shape given input shape and other convolution parameters.
    static TensorShape ComputeOutputShape(const TensorShape& inputShape, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& stride,
                                          const BoolVec& sharing, const BoolVec& autoPad, const TensorShape& lowerPad, const TensorShape& upperPad, const bool ceilOutDim = false)
    {
        if (inputShape.GetRank() != kernelShape.GetRank())
            InvalidArgument("Convolution input and kernel tensors must have the same rank.");
        if (mapCount.GetRank() != 1 && inputShape.GetRank() != mapCount.GetRank())
            InvalidArgument("Convolution map tensor must have rank 1 or the same as the input tensor.");
        if (stride.GetRank() != 1 && inputShape.GetRank() != stride.GetRank())
            InvalidArgument("Convolution stride tensor must have rank 1 or the same as the input tensor.");
        if (sharing.size() != 1 && inputShape.GetRank() != sharing.size())
            InvalidArgument("Convolution sharing tensor must have rank 1 or the same as the input tensor.");
        if (autoPad.size() != 1 && inputShape.GetRank() != autoPad.size())
            InvalidArgument("Convolution padding tensor must have rank 1 or the same as the input tensor.");
        if (lowerPad.GetRank() != 1 && inputShape.GetRank() != lowerPad.GetRank())
            InvalidArgument("Convolution lower pad tensor must have rank 1 or the same as the input tensor.");
        if (upperPad.GetRank() != 1 && inputShape.GetRank() != upperPad.GetRank())
            InvalidArgument("Convolution upper pad tensor must have rank 1 or the same as the input tensor.");

        SmallVector<size_t> dimsOutput(inputShape.GetRank());
        for (size_t i = 0; i < inputShape.GetRank(); i++)
        {
            assert(inputShape[i] >= 1);
            if (kernelShape[i] > inputShape[i])
                InvalidArgument("Convolution operation requires that kernel dim %d <= input dim %d.", (int)kernelShape[i], (int)inputShape[i]);

            size_t delta = stride[stride.GetRank() == 1 ? 0 : i];
            size_t dim = inputShape[i];
            bool autoPadCur = autoPad[autoPad.size() == 1 ? 0 : i];
            size_t lo = lowerPad[lowerPad.size() == 1 ? 0 : i];
            size_t hi = upperPad[upperPad.size() == 1 ? 0 : i];
            if (autoPadCur)
            {
                dim += kernelShape[i] - 1;
            }
            else
            {
                dim += lo + hi;
            }
            float preciseDimOut = (float)(dim - kernelShape[i]) / delta + 1;
            size_t dimOut = static_cast<size_t>(ceilOutDim ? ceil(preciseDimOut) : floor(preciseDimOut));
            // When LowerPad and/or UpperPad are specified (i.e. > 0), we insist that the kernel applications
            // fill the entire space.
            if (!autoPadCur && (lo > 0 || hi > 0))
            {
                size_t size = (dimOut - 1) * delta + kernelShape[i];
                if (size != dim)
                    InvalidArgument("Convolution requires that kernel fills the entire space if auto-padding is disabled.");
            }
            if (mapCount.size() > 1)
                dimOut *= mapCount[i];
            else if (i == inputShape.GetRank() - 1)
                dimOut *= mapCount[0];

            dimsOutput[i] = dimOut;
        }

        auto dimsOut = TensorShape(dimsOutput);
        // Check the output dimensions.
        size_t mapCountTotal = mapCount.GetNumElements();
        size_t sizeOut = dimsOut.GetNumElements();
        assert((sizeOut % mapCountTotal) == 0);
        UNUSED(mapCountTotal);
        UNUSED(sizeOut);

        return dimsOut;
    }

    // Computes input shape given output shape and other convolution parameters.
    // Used in deconvolution operation.
    static TensorShape ComputeInputShape(const TensorShape& outputShape, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& stride,
                                         const BoolVec& sharing, const BoolVec& autoPad, const TensorShape& lowerPad, const TensorShape& upperPad, bool ceilOutDim = false)
    {
        UNUSED(ceilOutDim);
        if (outputShape.GetRank() != kernelShape.GetRank())
            InvalidArgument("Convolution output and kernel tensors must have the same rank.");
        if (mapCount.GetRank() != 1 && outputShape.GetRank() != mapCount.GetRank())
            InvalidArgument("Convolution map tensor must have rank 1 or the same as the output tensor.");
        if (stride.GetRank() != 1 && outputShape.GetRank() != stride.GetRank())
            InvalidArgument("Convolution stride tensor must have rank 1 or the same as the output tensor.");
        if (sharing.size() != 1 && outputShape.GetRank() != sharing.size())
            InvalidArgument("Convolution sharing tensor must have rank 1 or the same as the output tensor.");
        if (autoPad.size() != 1 && outputShape.GetRank() != autoPad.size())
            InvalidArgument("Convolution padding tensor must have rank 1 or the same as the output tensor.");
        if (lowerPad.GetRank() != 1 && outputShape.GetRank() != lowerPad.GetRank())
            InvalidArgument("Convolution lower pad tensor must have rank 1 or the same as the output tensor.");
        if (upperPad.GetRank() != 1 && outputShape.GetRank() != upperPad.GetRank())
            InvalidArgument("Convolution upper pad tensor must have rank 1 or the same as the output tensor.");

        SmallVector<size_t> dimsInput(outputShape.GetRank());
        for (size_t i = 0; i < outputShape.GetRank(); i++)
        {
            assert(outputShape[i] >= 1);

            size_t delta = stride[stride.GetRank() == 1 ? 0 : i];
            size_t dim = outputShape[i];
            // Input dimension does not include output map count.
            size_t curMapCount = 1;
            if (mapCount.size() > 1)
                curMapCount = mapCount[i];
            else if (i == outputShape.GetRank() - 1)
                curMapCount = mapCount[0];
            assert((dim % curMapCount) == 0);
            dim /= curMapCount;

            bool autoPadCur = autoPad[autoPad.size() == 1 ? 0 : i];
            size_t lo = lowerPad[lowerPad.size() == 1 ? 0 : i];
            size_t hi = upperPad[upperPad.size() == 1 ? 0 : i];
            size_t dimIn = (dim - 1) * delta;
            // We need to be able to restore any input size from the output, not just the one
            // that does not require padding. For example, if output is 14, stride 2 and 
            // desired input is 28 then padded input will be 31. In this case if autopadding is enabled,
            // the input will 27 as (27 - 1) / 2 + 1 == 14.
            if (autoPadCur)
                dimIn += 1;
            else
                dimIn += (int64_t)kernelShape[i] - (lo + hi);
            // When LowerPad and/or UpperPad are specified (i.e. > 0), we insist that the kernel applications
            // fill the entire space.
            if (!autoPadCur && (lo > 0 || hi > 0))
            {
                size_t size = (dimIn - kernelShape[i] + lo + hi) / delta + 1;
                if (size != dim)
                    InvalidArgument("Convolution requires that kernel fills the entire space if auto-padding is disabled.");
            }

            dimsInput[i] = dimIn;
        }

        return TensorShape(dimsInput);
    }

    // Used in unit tests and during debugging.
    operator std::string() const
    {
        std::ostringstream res;
        res << "Input: " << (string)InputShape();
        res << ", Output: " << (string)OutputShape();
        res << ", Kernel: " << (string)KernelShape();
        res << ", Map: " << (string)MapCount();
        res << ", Stride: " << (string)Stride();
        res << ", Sharing: (";
        std::copy(begin(Sharing()), end(Sharing()) - 1, std::ostream_iterator<bool>(res, ", "));
        res << Sharing().back() << ")";
        res << ", AutoPad: (";
        std::copy(begin(AutoPad()), end(AutoPad()) - 1, std::ostream_iterator<bool>(res, ", "));
        res << AutoPad().back() << ")";
        res << ", LowerPad: " << (string)LowerPad();
        res << ", UpperPad: " << (string)UpperPad();
        return res.str();
    }

    DISABLE_COPY_AND_MOVE(ConvolveGeometry);

private:
    TensorShape m_inputShape;
    TensorShape m_outputShape;
    TensorShape m_kernelShape;
    TensorShape m_mapCount;
    TensorShape m_stride;
    BoolVec m_sharing;
    BoolVec m_autoPad;
    TensorShape m_lowerPad;
    TensorShape m_upperPad;

    // There are several reasons why int type is used here rather than size_t:
    // 1. Many of these vectors contain offsets which can be negative.
    // 2. Most of these vectors will be copied into device memory (GPU) so the smaller the size - the better.
    //    Also, 64-bit operations are slower on GPU.
    // 3. If you are still not convinced, we don't expect convolutions to be more than 2B in size anyway. 
    // See description to corresponding getter functions to understand what these are.
    IntVec m_mpRowCol;
    IntVec m_mpRowIwht;
    IntVec m_mpRowRun;
    IntVec m_runs;
    IntVec m_mpRowIndices;
    IntVec m_indices;
    // The indices of the first ("top-left-most") "kernel-center" cell in the source.
    IntVec m_start;
    int m_startIndex;
    // When the first kernel cell is aligned with the first source cell, this is the index of the input cell that
    // is aligned with the "kernel-center" cell. Indices in "Runs" and "Indices" are relative to OriginIndex.
    int m_originIndex;

    size_t m_kernelCount;
};

using ConvolveGeometryPtr = std::shared_ptr<ConvolveGeometry>;

} } }
Browse the archive

https://github.com/Microsoft/CNTK