https://github.com/gwastro/pycbc
Tip revision: bc03655f8a0522f0fc2781801aaeccae93c00ed2 authored by Duncan Brown on 10 September 2017, 14:10:05 UTC
Set for 1.8.1 release
Set for 1.8.1 release
Tip revision: bc03655
matchedfilter_cpu.py
# Copyright (C) 2012 Alex Nitz
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# =============================================================================
#
# Preamble
#
# =============================================================================
#
from __future__ import absolute_import
import numpy
from pycbc.opt import omp_libs, omp_flags
from pycbc import WEAVE_FLAGS
from weave import inline
from .simd_correlate import default_segsize, corr_parallel_code, corr_support
from .matchedfilter import _BaseCorrelator
batch_correlator_code = """
#pragma omp parallel for
for (int i=0; i<num_vectors; i++){
std::complex<float>* xp = (std::complex<float>*) x[i];
std::complex<float>* zp = (std::complex<float>*) z[i];
for (int j=0; j<size; j++){
float xr, yr, xi, yi, re, im;
xr = xp[j].real();
xi = xp[j].imag();
yr = y[j].real();
yi = y[j].imag();
re = xr*yr + xi*yi;
im = xr*yi - xi*yr;
zp[j] = std::complex<float>(re, im);
}
}
"""
def batch_correlate_execute(self, y):
num_vectors = self.num_vectors # pylint:disable=unused-variable
size = self.size # pylint:disable=unused-variable
x = numpy.array(self.x.data, copy=False) # pylint:disable=unused-variable
z = numpy.array(self.z.data, copy=False) # pylint:disable=unused-variable
y = numpy.array(y.data, copy=False)
inline(batch_correlator_code, ['x', 'y', 'z', 'size', 'num_vectors'],
extra_compile_args=[WEAVE_FLAGS] + omp_flags,
libraries=omp_libs)
support = """
#include <stdio.h>
#include <math.h>
"""
def correlate_numpy(x, y, z):
z.data[:] = numpy.conjugate(x.data)[:]
z *= y
code_batch = """
#pragma omp parallel for
for (int i=0; i<N; i++){
TYPE xr, yr, xi, yi, re, im;
xr = xa[i].real();
xi = xa[i].imag();
yr = ya[i].real();
yi = ya[i].imag();
re = xr*yr + xi*yi;
im = xr*yi - xi*yr;
za[i] = std::complex<TYPE>(re, im);
}
"""
single_codeb = code_batch.replace('TYPE', 'float')
double_codeb = code_batch.replace('TYPE', 'double')
def correlate_batch_inline(x, y, z):
if z.precision == 'single':
the_code = single_codeb
else:
the_code = double_codeb
za = numpy.array(z.ptr, copy=False) # pylint:disable=unused-variable
xa = numpy.array(x.ptr, copy=False) # pylint:disable=unused-variable
ya = numpy.array(y.ptr, copy=False) # pylint:disable=unused-variable
N = len(x) # pylint:disable=unused-variable
inline(the_code, ['xa', 'ya', 'za', 'N'],
extra_compile_args=[WEAVE_FLAGS] + omp_flags,
support_code = support,
libraries=omp_libs
)
code = """
#pragma omp parallel for
for (int i=0; i<N; i++){
TYPE xr, yr, xi, yi, re, im;
xr = xa[i].real();
xi = xa[i].imag();
yr = ya[i].real();
yi = ya[i].imag();
re = xr*yr + xi*yi;
im = xr*yi - xi*yr;
za[i] = std::complex<TYPE>(re, im);
}
"""
single_code = code.replace('TYPE', 'float')
double_code = code.replace('TYPE', 'double')
def correlate_inline(x, y, z):
if z.precision == 'single':
the_code = single_code
else:
the_code = double_code
za = numpy.array(z.data, copy=False) # pylint:disable=unused-variable
xa = numpy.array(x.data, copy=False) # pylint:disable=unused-variable
ya = numpy.array(y.data, copy=False) # pylint:disable=unused-variable
N = len(x) # pylint:disable=unused-variable
inline(the_code, ['xa', 'ya', 'za', 'N'],
extra_compile_args=[WEAVE_FLAGS] + omp_flags,
support_code = support,
libraries=omp_libs
)
#correlate = correlate_inline
correlate = correlate_inline
class CPUCorrelator(_BaseCorrelator):
def __init__(self, x, y, z):
self.x = numpy.array(x.data, copy=False)
self.y = numpy.array(y.data, copy=False)
self.z = numpy.array(z.data, copy=False)
self.arrlen = len(self.x)
self.code = corr_parallel_code
self.support = corr_support
self.segsize = default_segsize
def correlate(self):
htilde = self.x # pylint:disable=unused-variable
stilde = self.y # pylint:disable=unused-variable
qtilde = self.z # pylint:disable=unused-variable
arrlen = self.arrlen # pylint:disable=unused-variable
segsize = self.segsize # pylint:disable=unused-variable
inline(self.code, ['htilde', 'stilde', 'qtilde', 'arrlen', 'segsize'],
extra_compile_args = [WEAVE_FLAGS] + omp_flags,
#extra_compile_args = ['-mno-avx -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4 -mno-sse4.1 -mno-sse4.2 -mno-sse4a -O2 -w'] + omp_flags,
#extra_compile_args = ['-msse3 -O3 -w'] + omp_flags,
libraries = omp_libs, support_code = self.support, auto_downcast = 1)
def _correlate_factory(x, y, z):
return CPUCorrelator