https://github.com/jyhmiinlin/pynufft
Revision bff018fde8fff46d7c3da71222f8191b89aa4628 authored by Jyh-Miin Lin on 23 August 2020, 06:24:06 UTC, committed by GitHub on 23 August 2020, 06:24:06 UTC
1 parent 1b858aa
Raw File
Tip revision: bff018fde8fff46d7c3da71222f8191b89aa4628 authored by Jyh-Miin Lin on 23 August 2020, 06:24:06 UTC
Create codeql-analysis.yml
Tip revision: bff018f
benchmark_2D_mCoil.py
"""
1. Write a NUFFT_hsa benchmark using for-loop
2. Write a NUFFT_memsave benchmark using for-loop
3. Write a NUFFT_mCoil benchmark without for-loop


"""
import numpy 
import matplotlib.pyplot as pyplot
import scipy.misc
import scipy.io
from matplotlib import cm
import matplotlib
gray = cm.gray

import pkg_resources
DATA_PATH = pkg_resources.resource_filename('pynufft', './src/data/')   




# pyplot.imshow(numpy.abs(image[:,:,64]), label='original signal',cmap=gray)
# pyplot.show()

def benchmark(nufftobj, gx, maxiter, sense=1):
    import time
    t0= time.time()
    for pp in range(0, maxiter*sense):
        gy = nufftobj.forward(gx)
    t1 = time.time()
    for pp in range(0, maxiter*sense):
        gx2 = nufftobj.adjoint(gy)
    t2 = time.time()
    t_iter0 = time.time()
    for pp in range(0, maxiter*sense):
        pass
    t_iter1 = time.time()
    t_delta = t_iter1 - t_iter0 
    return (t1 - t0 )/maxiter, (t2 - t1)/maxiter, gy, gx2
        
def test_mCoil(sense_number):
    image = scipy.misc.ascent()[::2,::2]
    Nd = (256,256) # time grid, tuple
#     image = scipy.misc.imresize(image, Nd)*(1.0 + 0.0j)
    Kd = (512,512) # frequency grid, tuple
    Jd = (6,6) # interpolator 
#     om=       numpy.load(DATA_PATH+'om3D.npz')['arr_0']
    # om = numpy.random.randn(10000,3)*2
    # om = numpy.load('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/')['arr_0']
    om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/sparkling/samples_sparkling_x8_64x3072.mat')['samples_sparkling']
    # om = scipy.io.loadmat('/home/sram/Cambridge_2012/DATA_MATLAB/Ciuciu/Trajectories_and_data_sparkling_radial/radial/samples_radial_x8_64x3072.mat')['samples_radial']
    om = om/numpy.max(om.real.ravel()) * numpy.pi
    print('om.shape, ', om.shape)
#     sense_number = 16
#     sense = numpy.ones(Nd + (sense_number,), dtype=numpy.complex64)
    m = om.shape[0]
    print(om.shape)
    from pynufft import NUFFT_cpu, NUFFT_hsa, NUFFT_hsa_legacy
        # from pynufft import NUFFT_memsave
    NufftObj_cpu = NUFFT_cpu()
    api = 'ocl'
    proc = 1
    NufftObj_hsa = NUFFT_hsa_legacy(api, proc, 0)
    NufftObj_radix1 = NUFFT_hsa(api, proc, 0)
    NufftObj_radix2 = NUFFT_hsa(api, proc, 0)
        
    import time
    t0=time.time()
    NufftObj_cpu.plan(om, Nd, Kd, Jd)
    t1 = time.time()
    NufftObj_hsa.plan(om, Nd, Kd, Jd)
    t12 = time.time()
    NufftObj_radix1.plan(om, Nd, Kd, Jd, batch = sense_number, radix = 1)
    t2 = time.time()
    NufftObj_radix2.plan(om, Nd, Kd, Jd, batch = sense_number, radix = 2)
    tc = time.time()
#     proc = 0 # GPU
#     proc = 1 # gpu
#     NufftObj_hsa.offload(API = 'ocl',   platform_number = proc, device_number = 0)
#     t22 = time.time()
#     NufftObj_radix1.offload(API = 'ocl',   platform_number = proc, device_number = 0)
    # NufftObj_radix1.offload(API = 'cuda',   platform_number = 0, device_number = 0)
#     t3 = time.time()
#     NufftObj_radix2.offload(API = 'ocl',   platform_number = proc, device_number = 0)
#     tp = time.time()
    if proc is 0:
        print('CPU')
    else:
        print('GPU')
    print('Number of samples = ', om.shape[0])
    print('planning time of CPU = ', t1 - t0)
    print('planning time of HSA = ', t12 - t1)
    print('planning time of MEM = ', t2 - t12)
    print('planning time of mCoil = ', tc - t2)
    
    
#     print('loading time of HSA = ', t22 - tc)
#     print('loading time of MEM = ', t3 - t22)
#     print('loading time of mCoil = ', tp - t3)
        
    gx_hsa = NufftObj_hsa.thr.to_device(image.astype(numpy.complex64))
    gx_memsave0 = NufftObj_radix1.thr.to_device(image.astype(numpy.complex64))
    gx_memsave = NufftObj_radix1.s2x(gx_memsave0)
    gx_mCoil0 = NufftObj_radix2.thr.to_device(image.astype(numpy.complex64))    
    gx_mCoil = NufftObj_radix2.s2x(gx_mCoil0)
    maxiter = 2
    tcpu_forward, tcpu_adjoint, ycpu, xcpu = benchmark(NufftObj_cpu, image, maxiter, sense_number)
    print('CPU', int(m), tcpu_forward, tcpu_adjoint)
    maxiter = 5
    thsa_forward, thsa_adjoint, yhsa, xhsa = benchmark(NufftObj_hsa, gx_hsa, maxiter, sense_number)
    print('CSR', int(m), thsa_forward, thsa_adjoint, )#numpy.linalg.norm(yhsa.get() - ycpu)/  numpy.linalg.norm( ycpu))
    tmem_forward, tmem_adjoint, ymem, xmem = benchmark(NufftObj_radix1, gx_memsave, maxiter)#, sense_number)
    print('radix-1' , int(m), tmem_forward, tmem_adjoint)
    
    tmCoil_forward, tmCoil_adjoint, ymCoil, xmCoil = benchmark(NufftObj_radix2, gx_mCoil, maxiter)
    print('radix-2' , int(m), tmCoil_forward, tmCoil_adjoint)    
    
    for ss in range(0, sense_number):
        erry = numpy.linalg.norm(ymCoil.get()[:,ss] - ycpu)/ numpy.linalg.norm( ycpu)
        errx = numpy.linalg.norm(xmCoil.get()[...,ss] - xcpu)/ numpy.linalg.norm( xcpu)
        if erry > 1e-6 or errx > 1e-6:
            print("degraded accuracy:", sense_number, erry, errx)
        else:
            print("Pass test for coil: ", ss)
    NufftObj_radix1.release()
    NufftObj_radix2.release()
    NufftObj_hsa.release()
    del NufftObj_radix1, NufftObj_hsa, NufftObj_radix2, NufftObj_cpu
    return tcpu_forward, tcpu_adjoint,  thsa_forward, thsa_adjoint, tmem_forward, tmem_adjoint,  tmCoil_forward, tmCoil_adjoint

import numpy
CPU_forward = ()
HSA_forward = ()
MEM_forward = ()
mCoil_forward = ()
CPU_adjoint = ()
HSA_adjoint = ()
MEM_adjoint = ()
mCoil_adjoint = ()
SENSE_NUM = ()

for sense_number in (64, 32, 16, 8, 4, 2, 1):
    print('SENSE = ', sense_number)
    t = test_mCoil(sense_number)
    CPU_forward += (t[0], )
    CPU_adjoint += (t[1], )
    HSA_forward  += (t[2], )
    HSA_adjoint  += (t[3], )
    MEM_forward  += (t[4], )
    MEM_adjoint  += (t[5], )
    mCoil_forward += (t[6], )
    mCoil_adjoint  += (t[7], )
    SENSE_NUM += (sense_number, )

CPU_forward = numpy.array(CPU_forward)
HSA_forward = numpy.array(HSA_forward)
MEM_forward = numpy.array(MEM_forward)
mCoil_forward = numpy.array(mCoil_forward)
CPU_adjoint = numpy.array(CPU_adjoint)
HSA_adjoint = numpy.array(HSA_adjoint)
MEM_adjoint = numpy.array(MEM_adjoint)
mCoil_adjoint = numpy.array(mCoil_adjoint)
SENSE_NUM = numpy.array(SENSE_NUM)


matplotlib.pyplot.subplot(1,3, 1)

matplotlib.pyplot.plot(SENSE_NUM, CPU_forward/HSA_forward, '*-', label='loop, CSR')
matplotlib.pyplot.plot(SENSE_NUM, CPU_forward/MEM_forward, 'D--', label='batched, radix-1')
matplotlib.pyplot.plot(SENSE_NUM, CPU_forward/mCoil_forward, 'x:', label='batched, radix-2')
matplotlib.pyplot.legend()
matplotlib.pyplot.ylabel('Acceleration')
matplotlib.pyplot.xlabel('Number of coils')
matplotlib.pyplot.title('Forward')

matplotlib.pyplot.subplot(1,3, 2)

matplotlib.pyplot.plot(SENSE_NUM, CPU_adjoint/HSA_adjoint, '*-', label='loop, CSR')
matplotlib.pyplot.plot(SENSE_NUM, CPU_adjoint/MEM_adjoint, 'D--', label='batched, radix-1')
matplotlib.pyplot.plot(SENSE_NUM, CPU_adjoint/mCoil_adjoint, 'x:', label='batched, radix-2')
matplotlib.pyplot.legend()
matplotlib.pyplot.ylabel('Acceleration')
matplotlib.pyplot.xlabel('Number of coils')
matplotlib.pyplot.title('Adjoint')

matplotlib.pyplot.subplot(1,3, 3)

matplotlib.pyplot.plot(SENSE_NUM, (CPU_adjoint+CPU_forward)/(HSA_adjoint + HSA_forward), '*-', label='loop, CSR')
matplotlib.pyplot.plot(SENSE_NUM, (CPU_adjoint+CPU_forward)/(MEM_adjoint + MEM_forward), 'D--', label='batched, radix-1')
matplotlib.pyplot.plot(SENSE_NUM, (CPU_adjoint+CPU_forward)/(mCoil_adjoint + mCoil_forward), 'x:', label='batched, radix-2')
matplotlib.pyplot.legend()
matplotlib.pyplot.ylabel('Acceleration')
matplotlib.pyplot.xlabel('Number of coils')
matplotlib.pyplot.title('Selfadjoint')
matplotlib.pyplot.show()


#     matplotlib.pyplot.imshow(xmCoil.get()[:,:].real)
#     matplotlib.pyplot.show()
#     matplotlib.pyplot.imshow(xcpu.real)
#     matplotlib.pyplot.show()
# print(tmem_forward, tmem_adjoint, numpy.linalg.norm(ymem.get() - ycpu)/ numpy.linalg.norm( ycpu))
back to top