swh:1:snp:f50ab94432af916b5fb8b4ad831e8dddded77084
Raw File
Tip revision: a8e76a0261bd86abc2717bed593b592b93b53198 authored by Vadim Mazalov on 30 October 2018, 03:28:55 UTC
Enable NCCL Group start/end
Tip revision: a8e76a0
HalfGPUTests.cpp
//
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Half data type GPU tests should go here
//
#include "stdafx.h"
#include "../../../Source/Math/GPUMatrix.h"
#include "../../../Source/Math/Matrix.h"
#include "../../../Source/Math/half.hpp"

using namespace Microsoft::MSR::CNTK;

namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {

// Tests from GPUMatrixCudaBlasTests.cpp
BOOST_AUTO_TEST_SUITE(Half_GPUMatrixSuite)

BOOST_FIXTURE_TEST_CASE(GPUBlasMultiplyAndWeightedAdd, RandomSeedFixture)
{
    const half alpha = 2.0f;
    const half beta = 0.42f;
    GPUMatrix<half> m0(12, 5, c_deviceIdZero);
    m0.SetValue(1);
    GPUMatrix<half> m1(5, 11, c_deviceIdZero);
    m1.SetValue(1);
    GPUMatrix<half> m2(12, 11, c_deviceIdZero);
    m2.SetValue(1);

    // m2 = alpha * m0 * m1 + beta * m2
    GPUMatrix<half>::MultiplyAndWeightedAdd(alpha, m0, false, m1, false, beta, m2);

    GPUMatrix<half> mr(12, 11, c_deviceIdZero);
    mr.SetValue(10.42f);
    BOOST_CHECK(m2.IsEqualTo(mr, c_epsilonFloatE4));
}

BOOST_FIXTURE_TEST_CASE(GPUBlasScale, RandomSeedFixture)
{
    const half scale = 0.5f;
    GPUMatrix<half> m0(12, 53, c_deviceIdZero);
    m0.SetValue(4.2f);
    GPUMatrix<half>::Scale(scale, m0);

    GPUMatrix<half> mr(12, 53, c_deviceIdZero);
    mr.SetValue(2.1f);
    BOOST_CHECK(m0.IsEqualTo(mr, c_epsilonFloatE4));
}

BOOST_FIXTURE_TEST_CASE(GPUBlasInnerProduct, RandomSeedFixture)
{
    GPUMatrix<half> m0(10, 10, c_deviceIdZero);
    GPUMatrix<half> m1(10, 10, c_deviceIdZero);
    GPUMatrix<half> m2(1, 10, c_deviceIdZero);
    m0.SetValue(2);
    m1.SetValue(2);
    m2.SetValue(2);

    GPUMatrix<half>::InnerProduct(m0, m1, m2, true);
    GPUMatrix<half> mr(1, 10, c_deviceIdZero);
    mr.SetValue(40);
    BOOST_CHECK(m2.IsEqualTo(mr, c_epsilonFloatE4));

    GPUMatrix<half>::InnerProduct(m0, m1, m2, false);
    BOOST_CHECK(m2.IsEqualTo(mr.Transpose(), c_epsilonFloatE4));
}

// TODO: add tests for other CUDA BLAS methods?

BOOST_AUTO_TEST_SUITE_END()

// Tests from GPUMatrixTests.cpp

BOOST_AUTO_TEST_SUITE(Half_GPUMatrixSuite)

BOOST_FIXTURE_TEST_CASE(MatrixCopyAssignAcrossDevices, RandomSeedFixture)
{
    bool hasTwoGpus = false;
#ifndef CPUONLY
    auto gpus = GetAllGpusData();
    hasTwoGpus = (gpus.size() > 1);
#endif
    std::array<half, 6> array = { 1, 2, 3, 4, 5, 6 };

    {
        Matrix<half> m_gpu(2, 3, array.data(), c_deviceIdZero, matrixFlagNormal);
        Matrix<half> m_copy_gpu_0(m_gpu, c_deviceIdZero);
        if (hasTwoGpus)
            Matrix<half> m_copy_gpu_1(m_gpu, c_deviceIdZero + 1);
        Matrix<half> m_copy_cpu(m_gpu, -1);
    }

    {
        Matrix<half> m_cpu(2, 3, array.data(), -1, matrixFlagNormal);
        Matrix<half> m_copy_gpu_0(m_cpu, c_deviceIdZero);
        if (hasTwoGpus)
            Matrix<half> m_copy_gpu_1(m_cpu, c_deviceIdZero + 1);
        Matrix<half> m_copy_cpu(m_cpu, -1);
    }

    {
        Matrix<half> m_gpu(2, 3, array.data(), c_deviceIdZero, matrixFlagNormal);
        Matrix<half> m_copy_gpu_0(c_deviceIdZero);
        m_copy_gpu_0.AssignValuesOf(m_gpu);
        if (hasTwoGpus)
        {
            Matrix<half> m_copy_gpu_1(c_deviceIdZero + 1);
            m_copy_gpu_1.AssignValuesOf(m_gpu);
        }
        Matrix<half> m_copy_cpu(-1);
        m_copy_cpu.AssignValuesOf(m_gpu);
    }

    if (hasTwoGpus)
    {

        Matrix<half> m_gpu_0(2, 3, array.data(), c_deviceIdZero, matrixFlagNormal);
        Matrix<half> m_gpu_1(2, 3, c_deviceIdZero + 1, m_gpu_0.GetMatrixType(), m_gpu_0.GetFormat());
        try
        {
            // TODO: fix this!
            m_gpu_1.AssignValuesOf(m_gpu_0);
            BOOST_TEST(false, "Expected AssignValuesOf to fail.");
        }
        catch (...)
        {
        }
    }
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixConstructorNoFlag, RandomSeedFixture)
{
    // TODO: consider splitting into several tests
    GPUMatrix<half> m0(c_deviceIdZero);
    BOOST_CHECK(m0.IsEmpty());

    GPUMatrix<half> m1(12, 53, c_deviceIdZero);
    BOOST_CHECK_EQUAL(12, m1.GetNumRows());
    BOOST_CHECK_EQUAL(53, m1.GetNumCols());
    BOOST_CHECK_EQUAL(12 * 53, m1.GetNumElements());

    std::array<half, 2> array = {1, 14};
    m1.SetValue(1, 2, c_deviceIdZero, array.data());

    unique_ptr<half[]> result(m1.CopyToArray());
    BOOST_CHECK_EQUAL_COLLECTIONS(result.get(), result.get() + 2, array.begin(), array.end());

    GPUMatrix<half> m1Copy(m1);
    BOOST_CHECK(m1.IsEqualTo(m1Copy));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixConstructorFlagNormal, RandomSeedFixture)
{
    std::array<half, 6> array = {1, 2, 3, 4, 5, 6};
    GPUMatrix<half> m(2, 3, c_deviceIdZero, array.data(), matrixFlagNormal);

    unique_ptr<half[]> result(m.CopyToArray());
    BOOST_CHECK_EQUAL_COLLECTIONS(result.get(), result.get() + 6, array.begin(), array.end());
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixIdentityAndZero, RandomSeedFixture)
{
    // TODO: consider splitting into two separate tests?
    const int size = 60;
    GPUMatrix<half> m0(GPUMatrix<half>::Eye(size, c_deviceIdZero));
    unique_ptr<half[]> result0(m0.CopyToArray());

    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            BOOST_CHECK_CLOSE(result0[i * size + j], i == j, 0.01);
        }
    }

    GPUMatrix<half> m1(GPUMatrix<half>::Zeros(size, size, c_deviceIdZero));
    unique_ptr<half[]> result1(m1.CopyToArray());
    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            BOOST_CHECK_CLOSE(result1[i * size + j], 0.0f, 0.01);
        }
    }
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixElementWiseOperations, RandomSeedFixture)
{
    const half val = 3.0;
    const int rows = 16;
    const int cols = 23;

    GPUMatrix<half> m0(rows, cols, c_deviceIdZero);
    m0.SetValue(val);
    GPUMatrix<half> m1(rows, cols, c_deviceIdZero);
    GPUMatrix<half> mr(rows, cols, c_deviceIdZero);

    // test element wise power
    half alpha = 2.0f;
    GPUMatrix<half>::ElementWisePower(alpha, m0, m1);
    mr.SetValue(std::pow(val, alpha));
    BOOST_CHECK(mr.IsEqualTo(m1, c_epsilonFloatE4));

    alpha = 0.234f;
    GPUMatrix<half>::ElementWisePower(alpha, m0, m1);
    mr.SetValue(std::pow(val, alpha));
    BOOST_CHECK(mr.IsEqualTo(m1, c_epsilonFloatE4));

    // test element wise absolute value
    m0.SetValue(-val);
    m1.AssignAbsOf(m0);
    mr.SetValue(val);
    BOOST_CHECK(mr.IsEqualTo(m1));

    // TODO: add other element wise operations?
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixInplaceOperations, RandomSeedFixture)
{
    const half val = 0.42f;
    const int rows = 16;
    const int cols = 23;

    GPUMatrix<half> m(rows, cols, c_deviceIdZero);
    GPUMatrix<half> mr(rows, cols, c_deviceIdZero);

    m.SetValue(val);
    m.InplaceExp();
    mr.SetValue(std::exp(val));
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    m.SetValue(val);
    m.InplaceLog();
    mr.SetValue(std::log(val));
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    m.SetValue(val);
    m.InplaceTanh();
    mr.SetValue(std::tanh(val));
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    m.SetValue(-val);
    m.InplaceAbs();
    mr.SetValue(val);
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    m.SetValue(val);
    m.InplaceSqrt();
    mr.SetValue(std::sqrt(val));
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    m.SetValue(val);
    m.InplaceSigmoid();
    mr.SetValue(1 / (std::exp(-val) + 1));
    BOOST_CHECK(mr.IsEqualTo(m, c_epsilonFloatE4));

    // TODO: there are two more inplace operations. Test these? compare to CPU results?
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixAddAndSub, RandomSeedFixture)
{
    std::array<half, 6> array0 = {1, 2, 3, 4, 5, 6};
    GPUMatrix<half> m0(2, 3, c_deviceIdZero, array0.data(), matrixFlagNormal);

    std::array<half, 6> array1 = {11, 12, 13, 14, 15, 16};
    GPUMatrix<half> m1(2, 3, c_deviceIdZero, array1.data(), matrixFlagNormal);

    std::array<half, 6> array2 = {12, 14, 16, 18, 20, 22};
    GPUMatrix<half> m2(2, 3, c_deviceIdZero, array2.data(), matrixFlagNormal);

    std::array<half, 3> arrayCRS = {10, 10, 10};
    GPUMatrix<half> mc(2, 1, c_deviceIdZero, arrayCRS.data(), matrixFlagNormal);
    GPUMatrix<half> mr(1, 3, c_deviceIdZero, arrayCRS.data(), matrixFlagNormal);
    GPUMatrix<half> ms(1, 1, c_deviceIdZero, arrayCRS.data(), matrixFlagNormal);

    GPUMatrix<half> m3 = m2 - m0;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3 += m0;
    BOOST_CHECK(m3.IsEqualTo(m2));

    m3 = m0 + 10;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3 -= 10;
    BOOST_CHECK(m3.IsEqualTo(m0));

    m3 = m1 + m0;
    BOOST_CHECK(m3.IsEqualTo(m2));

    m3 -= m0;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3 = m1 - 10;
    BOOST_CHECK(m3.IsEqualTo(m0));

    m3 += 10;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3 -= mc;
    BOOST_CHECK(m3.IsEqualTo(m0));

    m3 += mc;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3 -= mr;
    BOOST_CHECK(m3.IsEqualTo(m0));

    m3 += mr;
    BOOST_CHECK(m3.IsEqualTo(m1));

    m3.AssignDifferenceOf(m3, ms);
    BOOST_CHECK(m3.IsEqualTo(m0));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixNorms, RandomSeedFixture)
{
    std::array<half, 6> array = {
        1, 4, 2,
        5, 3, 6};
    GPUMatrix<half> m0(2, 3, c_deviceIdZero, array.data(), matrixFlagNormal);

    GPUMatrix<half> m3(c_deviceIdZero);
    m0.VectorNorm1(m3, true);
    array[0] = 5;
    array[1] = 7;
    array[2] = 9;
    GPUMatrix<half> m2(1, 3, c_deviceIdZero, array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    m0.VectorNorm1(m3, false);
    m2.Resize(2, 1);
    array[0] = 6;
    array[1] = 15;
    m2.SetValue(2, 1, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    m0.VectorNorm2(m3, true);
    m2.Resize(1, 3);
    array[0] = 4.1231f;
    array[1] = 5.3852f;
    array[2] = 6.7082f;
    m2.SetValue(1, 3, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2, c_epsilonFloat5E4));

    m0.VectorNorm2(m3, false);
    m2.Resize(2, 1);
    array[0] = 3.7417f;
    array[1] = 8.7750f;
    m2.SetValue(2, 1, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2, c_epsilonFloat5E4));

    array[0] = 1;
    array[2] = 2;
    array[4] = 3;
    array[1] = 4;
    array[3] = 5;
    array[5] = 6;
    GPUMatrix<half> m00(2, 3, c_deviceIdZero, array.data(), matrixFlagNormal);

    GPUMatrix<half> m1(c_deviceIdZero);
    m00.VectorMax(m1, m3, true);
    m2.Resize(1, 3);
    array[0] = 4;
    array[1] = 5;
    array[2] = 6;
    m2.SetValue(1, 3, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    m00.VectorMax(m1, m3, false);
    m2.Resize(2, 1);
    array[0] = 3.;
    array[1] = 6;
    m2.SetValue(2, 1, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    m0.VectorNormInf(m3, true);
    m2.Resize(1, 3);
    array[0] = 4;
    array[1] = 5;
    array[2] = 6;
    m2.SetValue(1, 3, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    m0.VectorNormInf(m3, false);
    m2.Resize(2, 1);
    array[0] = 3.;
    array[1] = 6;
    m2.SetValue(2, 1, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK(m3.IsEqualTo(m2));

    array[0] = 1;
    array[2] = 2;
    array[4] = 3;
    array[1] = 4;
    array[3] = 5;
    array[5] = 6;
    m00.SetValue(2, 3, m2.GetComputeDeviceId(), array.data(), matrixFlagNormal);
    BOOST_CHECK_EQUAL(6, m00.MatrixNormInf());

    BOOST_CHECK(abs(m0.FrobeniusNorm() - 9.5394) < c_epsilonFloatE3); // HALF_PRECISION
    BOOST_CHECK(abs(m0.MatrixNormInf() - 6) < c_epsilonFloatE4);
    BOOST_CHECK_EQUAL(21, m00.MatrixNorm1());

    GPUMatrix<half> a = GPUMatrix<half>::Eye(4096, c_deviceIdZero);
    BOOST_CHECK_EQUAL(4096, a.MatrixNorm0());

    GPUMatrix<half> b = GPUMatrix<half>::Eye(5, c_deviceIdZero);
    BOOST_CHECK_EQUAL(5, b.MatrixNorm0());
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixRandomUniform, RandomSeedFixture)
{
    const half low = -0.035f;
    const half high = 0.035f;
    auto m = GPUMatrix<half>::RandomUniform(768, 50, c_deviceIdZero, low, high, IncrementCounter());
    unique_ptr<half[]> result(m.CopyToArray());

    for (int i = 0; i < 768 * 50; ++i)
    {
        BOOST_CHECK_LE(result[i], high);
        // NV_TODO: change from GT to GE for now
        BOOST_CHECK_GE(result[i], low); // HALF_PRECISION
    }
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixColumnSlice, RandomSeedFixture)
{
    std::array<half, 6> array = {
        1, 4, 2,
        5, 3, 6};
    GPUMatrix<half> m0(2, 3, c_deviceIdZero, array.data(), matrixFlagNormal);
    GPUMatrix<half> m1(2, 2, c_deviceIdZero, array.data(), matrixFlagNormal);

    GPUMatrix<half> m2 = m0.ColumnSlice(0, 2);
    BOOST_CHECK(m2.IsEqualTo(m1));

    std::array<half, 4> array3 = {array[2], array[3], array[4], array[5]};
    GPUMatrix<half> m3(2, 2, c_deviceIdZero, array3.data(), matrixFlagNormal);

    m2 = m0.ColumnSlice(1, 2);
    BOOST_CHECK(m2.IsEqualTo(m3));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixRowSlice, RandomSeedFixture)
{
    std::array<half, 15> array0 = {
        1, 2, 3,
        4, 5, 6,
        7, 8, 9,
        10, 11, 12,
        13, 14, 15};
    GPUMatrix<half> m0(5, 3, c_deviceIdZero, array0.data(), matrixFlagNormal);

    std::array<half, 6> array1 = {
        3, 4, 8,
        9, 13, 14};
    GPUMatrix<half> m1(2, 3, c_deviceIdZero, array1.data(), matrixFlagNormal);

    GPUMatrix<half> m2(c_deviceIdZero);
    m2.AssignRowSliceValuesOf(m0, 2, 2);
    BOOST_CHECK(m2.IsEqualTo(m1));

    std::array<half, 15> array3 = {
        0, 0, 3,
        4, 0, 0,
        0, 8, 9,
        0, 0, 0,
        13, 14, 0};
    GPUMatrix<half> m3(5, 3, c_deviceIdZero, array3.data(), matrixFlagNormal);

    m3 += m0;
    m0.AddToRowSliceValuesOf(m1, 2, 2);
    BOOST_CHECK(m3.IsEqualTo(m0));

    m2.AddWithRowSliceValuesOf(m1, 0, 2);
    std::array<half, 6> array4 = {
        6, 8, 16,
        18, 26, 28};
    GPUMatrix<half> m4(2, 3, c_deviceIdZero, array4.data(), matrixFlagNormal);
    BOOST_CHECK(m2.IsEqualTo(m4));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixKhatriRaoProduct, RandomSeedFixture)
{
    std::array<half, 12> arrayA = {
        0.8147f, 0.9058f, 0.1270f, 0.9134f,
        0.6324f, 0.0975f, 0.2785f, 0.5469f,
        0.9575f, 0.9649f, 0.1576f, 0.9706f};
    GPUMatrix<half> a(3, 4, c_deviceIdZero, arrayA.data());

    std::array<half, 8> arrayB = {
        0.9572f, 0.4854f, 0.8003f, 0.1419f,
        0.4218f, 0.9157f, 0.7922f, 0.9595f};
    GPUMatrix<half> b(2, 4, c_deviceIdZero, arrayB.data());

    std::array<half, 24> arrayD = {
        0.7798f, 0.8670f, 0.1215f, 0.3954f,
        0.4396f, 0.0616f, 0.7310f, 0.5061f,
        0.0781f, 0.1296f, 0.0897f, 0.0138f,
        0.1175f, 0.2307f, 0.4038f, 0.2550f,
        0.5008f, 0.8768f, 0.7644f, 0.1249f,
        0.7689f, 0.9258f, 0.1512f, 0.9313f};
    GPUMatrix<half> d(6, 4, c_deviceIdZero, arrayD.data());

    GPUMatrix<half> c(c_deviceIdZero);
    c.AssignKhatriRaoProductOf(a, b);

    BOOST_CHECK(c.IsEqualTo(d, c_epsilonFloatE3)); // HALF_PRECISION
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixAddColumnReshapeProductOf, RandomSeedFixture)
{
    // tests column-wise reshaped product. Used to compute KhatriRaoProduct Gradient
    std::array<half, 12> arrayA = {
        0.6557f, 0.0357f,
        0.8491f, 0.9340f,
        0.6787f, 0.7577f,
        0.7431f, 0.3922f,
        0.6555f, 0.1712f,
        0.7060f, 0.0318f,
    };
    GPUMatrix<half> a(6, 2, c_deviceIdZero, arrayA.data());

    std::array<half, 6> arrayB = {
        0.2769f, 0.0462f,
        0.0971f, 0.8235f,
        0.6948f, 0.3171f};
    GPUMatrix<half> b(3, 2, c_deviceIdZero, arrayB.data());

    std::array<half, 4> arrayD0 = {
        0.2867f, 0.1266f,
        1.2913f, 0.4520f};
    GPUMatrix<half> d0(2, 2, c_deviceIdZero, arrayD0.data());

    std::array<half, 4> arrayD1 = {
        0.2657f, 0.3636f,
        1.0923f, 0.6416f};
    GPUMatrix<half> d1(2, 2, c_deviceIdZero, arrayD1.data());

    GPUMatrix<half> c(2, 2, c_deviceIdZero);
    c.SetValue(0.0f);
    c.AddColumnReshapeProductOf(a, b, false);
    BOOST_CHECK(c.IsEqualTo(d0, c_epsilonFloatE4));

    c.SetValue(0.0f);
    c.AddColumnReshapeProductOf(a, b, true);
    BOOST_CHECK(c.IsEqualTo(d1, c_epsilonFloatE4));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixInnerProduct, RandomSeedFixture)
{
    std::array<half, 6> array = {
        1, 4, 2,
        5, 3, 6};
    GPUMatrix<half> m0(2, 3, c_deviceIdZero, array.data(), matrixFlagNormal);

    GPUMatrix<half> m1(c_deviceIdZero), m2(c_deviceIdZero);
    m1.AssignInnerProductOf(m0, m0, true);
    m2.AssignVectorNorm2Of(m0, true);
    m1.InplaceSqrt();
    BOOST_CHECK(m1.IsEqualTo(m2));

    m1.AssignInnerProductOf(m0, m0, false);
    m2.AssignVectorNorm2Of(m0, false);
    m1.InplaceSqrt();
    BOOST_CHECK(m1.IsEqualTo(m2));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixAssignRepeatOf, RandomSeedFixture)
{
    std::array<half, 6> array0 = {
        1, 2,
        6, 7,
        11, 12};
    GPUMatrix<half> m0(2, 3, c_deviceIdZero, array0.data(), matrixFlagNormal);

    GPUMatrix<half> m1(c_deviceIdZero);
    m1.AssignRepeatOf(m0, 1, 1);
    BOOST_CHECK(m1.IsEqualTo(m0));

    std::array<half, 36> array2 = {
        1, 2, 1, 2, 1, 2,
        6, 7, 6, 7, 6, 7,
        11, 12, 11, 12, 11, 12,
        1, 2, 1, 2, 1, 2,
        6, 7, 6, 7, 6, 7,
        11, 12, 11, 12, 11, 12};
    GPUMatrix<half> m2(6, 6, c_deviceIdZero, array2.data(), matrixFlagNormal);

    m1.AssignRepeatOf(m0, 3, 2);
    BOOST_CHECK(m1.IsEqualTo(m2));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixRowElementOperations, RandomSeedFixture)
{
    GPUMatrix<half> m0 = GPUMatrix<half>::RandomUniform(20, 28, c_deviceIdZero, -1, 1, IncrementCounter());
    GPUMatrix<half> m1 = GPUMatrix<half>::RandomUniform(1, 28, c_deviceIdZero, 1, 2, IncrementCounter());

    GPUMatrix<half> m2(c_deviceIdZero);
    m2.SetValue(m0);
    m2.RowElementMultiplyWith(m1);
    m2.RowElementDivideBy(m1);

    BOOST_CHECK(m0.IsEqualTo(m2, c_epsilonFloatE3)); // HALF_PRECISION
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixColumnElementOperations, RandomSeedFixture)
{
    GPUMatrix<half> m0 = GPUMatrix<half>::RandomUniform(20, 28, c_deviceIdZero, -1, 1, IncrementCounter());
    GPUMatrix<half> m1 = GPUMatrix<half>::RandomUniform(20, 1, c_deviceIdZero, 1, 2, IncrementCounter());

    GPUMatrix<half> m2(c_deviceIdZero);
    m2.SetValue(m0);
    m2.ColumnElementMultiplyWith(m1);
    m2.ColumnElementDivideBy(m1);

    BOOST_CHECK(m0.IsEqualTo(m2, c_epsilonFloatE3)); // HALF_PRECISION
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixCurandSeedingHalf, RandomSeedFixture)
{
    const half low = 0;
    const half high = 1;
    const unsigned long seedUsed = 1;
    const unsigned long seedIgnored = 4711;

    // The current GPUMatrix implementation uses a static RNG.

    GPUMatrix<half>::ResetCurandObject(seedUsed, __FUNCTION__);
    auto m1 = GPUMatrix<half>::RandomUniform(16, 16, c_deviceIdZero, low, high, seedIgnored);

    GPUMatrix<half>::ResetCurandObject(seedUsed, __FUNCTION__);
    auto m2 = GPUMatrix<half>::RandomUniform(16, 16, c_deviceIdZero, low, high, seedIgnored);

    BOOST_CHECK(m1.IsEqualTo(m2));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixAdam, RandomSeedFixture)
{
    GPUMatrix<half> adamMatrix(c_deviceIdZero);
    GPUMatrix<half> gradients(2, 1, c_deviceIdZero);
    GPUMatrix<half> parameters(2, 1, c_deviceIdZero);
    GPUMatrix<half> expectedParameters(2, 1, c_deviceIdZero);
    GPUMatrix<half> expectedStates(2, 2, c_deviceIdZero);
    half gradientValues[] = { 0.1, -0.1 };
    half paramValues[] = { 0.1, 0.1 };
    half expectedValues[] = { -0.05811338, 0.25811338 };
    half expectedStateValues[] = { 1e-5, 0.01, 1e-5, -0.01 };
    gradients.SetValue(2, 1, c_deviceIdZero, gradientValues, matrixFormatRowMajor);
    parameters.SetValue(2, 1, c_deviceIdZero, paramValues, matrixFormatRowMajor);
    expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues, matrixFormatRowMajor);
    expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues, matrixFormatRowMajor);
    adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, 1e-8, 0.1);

    BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-2));
    BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-2));

    half expectedValues2[] = { -0.27059249, 0.47059249 };
    half expectedStateValues2[] = { 2e-05, 0.019, 2e-05, -0.019 };
    expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues2, matrixFormatRowMajor);
    expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues2, matrixFormatRowMajor);
    adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, 1e-8, 0.1);

    BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-2));
    BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-2));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixAdamVarEpsilon, RandomSeedFixture)
{
    GPUMatrix<half> adamMatrix(c_deviceIdZero);
    GPUMatrix<half> gradients(2, 1, c_deviceIdZero);
    GPUMatrix<half> parameters(2, 1, c_deviceIdZero);
    GPUMatrix<half> expectedParameters(2, 1, c_deviceIdZero);
    GPUMatrix<half> expectedStates(2, 2, c_deviceIdZero);
    half gradientValues[] = { 0.1, -0.1 };
    half paramValues[] = { 0.1, 0.1 };
    half expectedValues[] = { 0.0951532672, 0.1048467328 };
    half expectedStateValues[] = { 1e-5, 0.01, 1e-5, -0.01 };
    half epsilon = 0.1;

    gradients.SetValue(2, 1, c_deviceIdZero, gradientValues, matrixFormatRowMajor);
    parameters.SetValue(2, 1, c_deviceIdZero, paramValues, matrixFormatRowMajor);
    expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues, matrixFormatRowMajor);
    expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues, matrixFormatRowMajor);
    adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, epsilon, 0.1);

    BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-3));
    BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-3));

    half expectedValues2[] = { 0.0860598361, 0.1139401639 };
    half expectedStateValues2[] = { 2e-05, 0.019, 2e-05, -0.019 };
    expectedParameters.SetValue(2, 1, c_deviceIdZero, expectedValues2, matrixFormatRowMajor);
    expectedStates.SetValue(2, 2, c_deviceIdZero, expectedStateValues2, matrixFormatRowMajor);
    adamMatrix.Adam(gradients, parameters, 0.1, 0.9, 0.999, 0.5, epsilon, 0.1);

    BOOST_CHECK(parameters.IsEqualTo(expectedParameters, 1e-3));
    BOOST_CHECK(adamMatrix.IsEqualTo(expectedStates, 1e-3));
}

BOOST_FIXTURE_TEST_CASE(GPUMatrixOneHot, RandomSeedFixture)
{
    GPUMatrix<half> result(c_deviceIdZero);
    const size_t num_class = 6;

    half data[4] = {1,2,3,4};
    GPUMatrix<half> m0(2, 2, c_deviceIdZero);
    m0.SetValue(2, 2, c_deviceIdZero, data, matrixFormatRowMajor);

    half exp_data[24];
    memset(&exp_data[0], 0, sizeof(half) * 24);
    exp_data[1] = exp_data[9] = exp_data[14] = exp_data[22] = 1;
    GPUMatrix<half> exp(12, 2, c_deviceIdZero);
    exp.SetValue(12, 2, c_deviceIdZero, exp_data, matrixFormatColMajor);

    vector<size_t> shape(3);
    shape[0] = num_class; shape[1] = 2; shape[2] = 2;

    result.AssignOneHot(m0, shape, 0);

    BOOST_CHECK(result.GetNumCols() == 2);
    BOOST_CHECK(result.GetNumRows() == 12);
    BOOST_CHECK(result.IsEqualTo(exp, 1e-6));

    half exp_data2[24];
    memset(&exp_data2[0], 0, sizeof(half) * 24);
    exp_data2[2] = exp_data2[7] = exp_data2[16] = exp_data2[21] = 1;
    GPUMatrix<half> exp2(12, 2, c_deviceIdZero);
    exp2.SetValue(12, 2, c_deviceIdZero, exp_data2, matrixFormatColMajor);

    vector<size_t> shape2(3);
    shape2[0] = 2; shape2[1] = num_class; shape2[2] = 2;
    GPUMatrix<half> result2(c_deviceIdZero);
    result2.AssignOneHot(m0, shape2, 1);

    BOOST_CHECK(result2.GetNumCols() == 2);
    BOOST_CHECK(result2.GetNumRows() == 12);
    BOOST_CHECK(result2.IsEqualTo(exp2, 1e-6));

    half dirty_data[4] = {1,-1,7,4};
    GPUMatrix<half> dirty_m(2, 2, c_deviceIdZero);
    m0.SetValue(2, 2, c_deviceIdZero, dirty_data, matrixFormatRowMajor);

    half dirty_exp_data[24];
    memset(&dirty_exp_data[0], 0, sizeof(half) * 24);
    dirty_exp_data[1] = dirty_exp_data[22] = 1;
    GPUMatrix<half> dirty_exp(12, 2, c_deviceIdZero);
    dirty_exp.SetValue(12, 2, c_deviceIdZero, dirty_exp_data, matrixFormatColMajor);

    GPUMatrix<half> dirty_result(c_deviceIdZero);
    dirty_result.AssignOneHot(m0, shape, 0);

    BOOST_CHECK(dirty_result.GetNumCols() == 2);
    BOOST_CHECK(dirty_result.GetNumRows() == 12);
    BOOST_CHECK(dirty_result.IsEqualTo(dirty_exp, 1e-6));
}

/*
// Disable, broken because of half atomic
BOOST_FIXTURE_TEST_CASE(GPUMatrixScatterToIndices, RandomSeedFixture)
{
    const size_t row_elements = 2;

    half data[4] = {1,2,2,4};
    GPUMatrix<half> m0(2, 2, c_deviceIdZero);
    m0.SetValue(2, 2, c_deviceIdZero, data, matrixFormatRowMajor);

    half target[12];
    memset(&target[0], 0, sizeof(half) * 12);
    target[2] = target[3] = 4;
    target[4] = target[5] = 3;
    target[6] = target[7] = 2;
    target[8] = target[9] = 1;
    GPUMatrix<half> m1(row_elements, 6, c_deviceIdZero);
    m1.SetValue(row_elements, 6, c_deviceIdZero, target, matrixFormatColMajor);

    half m3_data[8];
    memset(&m3_data[0], 0, sizeof(half) * 8);
    m3_data[0] = 1;
    m3_data[1] = 2;
    m3_data[2] = 3;
    m3_data[3] = 4;
    m3_data[4] = 5;
    m3_data[5] = 6;
    m3_data[6] = 7;
    m3_data[7] = 8;
    GPUMatrix<half> m3(4, 2, c_deviceIdZero);
    m3.SetValue(4, 2, c_deviceIdZero, m3_data, matrixFormatColMajor);
    m1.ScatterToIndices(m3, m0, row_elements);

    half expect[12];
    memset(&expect[0], 0, sizeof(half) * 12);
    expect[2] = 5;
    expect[3] = 6;
    expect[4] = 11;
    expect[5] = 13;
    expect[6] = 2;
    expect[7] = 2;
    expect[8] = 8;
    expect[9] = 9;
    GPUMatrix<half> m_expect(row_elements, 6, c_deviceIdZero);
    m_expect.SetValue(row_elements, 6, c_deviceIdZero, expect, matrixFormatColMajor);

    BOOST_CHECK(m1.IsEqualTo(m_expect, 1e-6));
}
*/

BOOST_FIXTURE_TEST_CASE(GPUMatrixGatherFromTarget, RandomSeedFixture)
{
    const size_t row_elements = 2;

    half data[4] = {1,2,3,4};
    GPUMatrix<half> m0(2, 2, c_deviceIdZero);
    m0.SetValue(2, 2, c_deviceIdZero, data, matrixFormatRowMajor);

    half target[12];
    memset(&target[0], 0, sizeof(half) * 12);
    target[2] = target[3] = 4;
    target[4] = target[5] = 3;
    target[6] = target[7] = 2;
    target[8] = target[9] = 1;
    GPUMatrix<half> m1(row_elements, 6, c_deviceIdZero);
    m1.SetValue(row_elements, 6, c_deviceIdZero, target, matrixFormatColMajor);

    half exp_data[8];
    memset(&exp_data[0], 0, sizeof(half) * 8);
    exp_data[0] = exp_data[1] = 4;
    exp_data[2] = exp_data[3] = 2;
    exp_data[4] = exp_data[5] = 3;
    exp_data[6] = exp_data[7] = 1;
    GPUMatrix<half> expect(4, 2, c_deviceIdZero);
    expect.SetValue(4, 2, c_deviceIdZero, exp_data, matrixFormatColMajor);

    GPUMatrix<half> m2(c_deviceIdZero);
    m2.GatherFromTarget(m0, m1, row_elements);
    BOOST_CHECK(m2.GetNumRows() == 4);
    BOOST_CHECK(m2.GetNumCols() == 2);
    BOOST_CHECK(m2.IsEqualTo(expect, 1e-6));
}

#if 0 // Temporarily disabling
BOOST_FIXTURE_TEST_CASE(GPUMatrixLargeInequality, RandomSeedFixture)
{
    const int rows = 33553921;
    const int cols = 1;

    auto m0 = GPUMatrix<half>::Zeros(rows, cols, c_deviceIdZero);
    auto m1 = GPUMatrix<half>::Ones(rows, cols, c_deviceIdZero);

    BOOST_CHECK(!m1.IsEqualTo(m0, c_epsilonFloatE5));
}
#endif

BOOST_AUTO_TEST_SUITE_END()

// Tests from MatrixDataSynchronizationTests.cpp

BOOST_AUTO_TEST_SUITE(Half_GPUMatrixSuite)

// Requires GPU
BOOST_FIXTURE_TEST_CASE(MatrixDataSynchronization_DefaultBehaviorTestForConstructors, RandomSeedFixture)
{
    const HalfMatrix matrixA1(c_deviceIdZero);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA1.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(0, matrixA1.GetNumCols());
    BOOST_CHECK_EQUAL(0, matrixA1.GetNumRows());

    const HalfMatrix matrixA2(CPUDEVICE);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixA2.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(0, matrixA2.GetNumCols());
    BOOST_CHECK_EQUAL(0, matrixA2.GetNumRows());

    const HalfMatrix matrixA3(13, 12, c_deviceIdZero);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA3.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(12, matrixA3.GetNumCols());
    BOOST_CHECK_EQUAL(13, matrixA3.GetNumRows());

    half arr[5 * 45];
    const HalfMatrix matrixA4(5, 45, arr, c_deviceIdZero, matrixFlagNormal);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA4.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(45, matrixA4.GetNumCols());
    BOOST_CHECK_EQUAL(5, matrixA4.GetNumRows());

    const HalfMatrix matrixA5(45, 5, arr, CPUDEVICE, matrixFlagNormal);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixA5.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(5, matrixA5.GetNumCols());
    BOOST_CHECK_EQUAL(45, matrixA5.GetNumRows());
}

// Requires GPU
BOOST_FIXTURE_TEST_CASE(MatrixDataSynchronization_AccessPatternAndTransferTest, RandomSeedFixture)
{
    half arr[5 * 45];
    const HalfMatrix matrixA(5, 45, arr, c_deviceIdZero, matrixFlagNormal);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA.GetCurrentMatrixLocation());

    // GetValue calls operator() const, leaving the matrix in the BOTH state
    half x = matrixA.GetValue(0, 0);
    BOOST_CHECK_EQUAL(CurrentDataLocation::BOTH, matrixA.GetCurrentMatrixLocation());
    foreach_coord(i, j, matrixA)
    {
        x = matrixA.GetValue(i, j);
        BOOST_CHECK_EQUAL(CurrentDataLocation::BOTH, matrixA.GetCurrentMatrixLocation());
    }

    HalfMatrix matrixB(15, 15, arr, matrixFlagNormal);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixB.GetCurrentMatrixLocation());

    // non-const operator leaves it in CPU state so that writing to it is valid
    half& y = matrixB(1, 1);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixB.GetCurrentMatrixLocation());
    matrixB(4, 2) = y;
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixB.GetCurrentMatrixLocation());
    foreach_coord (i, j, matrixB)
    {
        y = matrixB(i, j);
        matrixB(j, i) = y;
        BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixB.GetCurrentMatrixLocation());
    }

    matrixB.TransferFromDeviceToDevice(CPUDEVICE, c_deviceIdZero, false);
    BOOST_CHECK_EQUAL(CurrentDataLocation::BOTH, matrixB.GetCurrentMatrixLocation());
    matrixB.TransferFromDeviceToDevice(c_deviceIdZero, CPUDEVICE, false);
    BOOST_CHECK_EQUAL(CurrentDataLocation::BOTH, matrixB.GetCurrentMatrixLocation());
    matrixB.TransferFromDeviceToDevice(CPUDEVICE, c_deviceIdZero, true);
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixB.GetCurrentMatrixLocation());
    matrixB.TransferFromDeviceToDevice(c_deviceIdZero, CPUDEVICE, true);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixB.GetCurrentMatrixLocation());
}

// Requires GPU
BOOST_FIXTURE_TEST_CASE(MatrixDataSynchronization_GravitatingTowardsPreferredDevice, RandomSeedFixture)
{
    HalfMatrix matrixA = HalfMatrix::RandomGaussian(64, 23, c_deviceIdZero, 0, 2, IncrementCounter());
    HalfMatrix matrixB = HalfMatrix::Eye(23, c_deviceIdZero);

    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixB.GetCurrentMatrixLocation());

    // Set the current matrix location by reading a value of the matrix (via non-const operator())
    half& x = matrixA(1, 1);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixA.GetCurrentMatrixLocation());
    x = matrixB(1, 1);
    BOOST_CHECK_EQUAL(CurrentDataLocation::CPU, matrixB.GetCurrentMatrixLocation());

    const HalfMatrix matrixC = matrixA * matrixB;
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixA.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixB.GetCurrentMatrixLocation());
    BOOST_CHECK_EQUAL(CurrentDataLocation::GPU, matrixC.GetCurrentMatrixLocation());
}

BOOST_AUTO_TEST_SUITE_END()

// Tests from MatrixFileWriteReadTests.cpp

BOOST_AUTO_TEST_SUITE(Half_GPUMatrixSuite)

BOOST_FIXTURE_TEST_CASE(GPUMatrixFileWriteRead, RandomSeedFixture)
{
    GPUMatrix<half> matrixGpu = GPUMatrix<half>::RandomUniform(43, 10, c_deviceIdZero, -26.3f, 30.2f, IncrementCounter());
    GPUMatrix<half> matrixGpuCopy = matrixGpu;

    std::wstring filenameGpu(L"MGPU.txt");
    File fileGpu(filenameGpu, fileOptionsText | fileOptionsReadWrite);

    fileGpu << matrixGpu;
    fileGpu.SetPosition(0);

    GPUMatrix<half> matrixGpuRead(c_deviceIdZero);
    fileGpu >> matrixGpuRead;

    BOOST_CHECK(matrixGpuCopy.IsEqualTo(matrixGpuRead, c_epsilonFloatE5));
}

BOOST_AUTO_TEST_SUITE_END()

}
} } }
back to top