""" An example of multiprocessing with NUFFT using PyCUDA backend, wrapped inside the atomic_NUFFT wrapper class. The two processes are running on two CPU cores. Each process creates one NUFFT and offloads the computations to GPU. nvidia-smi confirms that two python programs are using the GPU. """ import numpy from pynufft import NUFFT, helper import scipy.misc import matplotlib.pyplot import multiprocessing import os class atomic_NUFFT: def __init__(self, om, Nd, Kd, Jd, device_indx): """ This caches the parameters only. Any other GPU related stuffs are carried out in run() """ self.om = om self.Nd = Nd self.Kd = Kd self.Jd = Jd # self.API = API self.device_indx = device_indx def run(self, x, cpu_cores): """ In this method, the NUFFT are created and executed on a fixed CPU core. """ pid= os.getpid() print('pid=', pid) os.system("taskset -p -c %d-%d %d" % (cpu_cores[0], cpu_cores[1], pid)) """ Control the CPU affinity. Otherwise the process on one core can be switched to another core. """ # create NUFFT # NUFFT = NUFFT(self.API, ) # plan the NUFFT device_list = helper.device_list() self.NUFFT = NUFFT(device_list[self.device_indx]) self.NUFFT.plan(self.om, self.Nd, self.Kd, self.Jd) # send the image to device gx = self.NUFFT.to_device(x) # carry out 10000 forward transform for pp in range(0, 10000): gy = self.NUFFT._forward_device(gx) # return the object return gy.get() Nd = (256,256) Kd = (512,512) Jd = (6,6) om = numpy.random.randn(35636, 2) x = scipy.misc.ascent()[::2,::2] om1 = om[om[:,0]>0, :] om2 = om[om[:,0]<=0, :] # create pool pool = multiprocessing.Pool(2) # create the list to receive the return values results = [] # Now enter the first process # This is the standard multiprocessing Pool D = atomic_NUFFT(om1, Nd, Kd, Jd, 0) # async won't obstruct the next line of code result = pool.apply_async(D.run, args = (x, (0,3))) # the result is appended results.append(result) # Now enter the second process # This is the standard multiprocessing Pool D = atomic_NUFFT(om2, Nd, Kd, Jd, 0) # Non-obstructive result = pool.apply_async(D.run, args = (x, (4,7))) results.append(result) # closing the pool pool.close() pool.join() # results are appended # Now print the outputs result1 = results[0].get() result2 = results[1].get() # check CPU results NUFFT_cpu1 = NUFFT() NUFFT_cpu1.plan(om1, Nd, Kd, Jd) y1 = NUFFT_cpu1.forward(x) print('norm = ', numpy.linalg.norm(y1 - result1) / numpy.linalg.norm(y1)) NUFFT_cpu2 = NUFFT() NUFFT_cpu2.plan(om2, Nd, Kd, Jd) y2 = NUFFT_cpu2.forward(x) print('norm = ', numpy.linalg.norm(y2 - result2) / numpy.linalg.norm(y2))