Revision 70c78234182d797294d831d9a66961bfb278bceb authored by heisterm on 13 January 2015, 09:45:27 UTC, committed by heisterm on 13 January 2015, 09:45:27 UTC
The example (attenuation_correction_example.py) has the same content as the tutorial. Just the code is organised a little different.
1 parent 06e4219
util.py
# -*- coding: UTF-8 -*-
#-------------------------------------------------------------------------------
# Name: util
# Purpose:
#
# Author: heistermann
#
# Created: 26.10.2011
# Copyright: (c) heistermann 2011
# Licence: <your licence>
#-------------------------------------------------------------------------------
#!/usr/bin/env python
"""
Utility functions
^^^^^^^^^^^^^^^^^
Module util provides a set of useful helpers which are currently not attributable
to the other modules
.. autosummary::
:nosignatures:
:toctree: generated/
aggregate_in_time
from_to
"""
import numpy as np
import datetime as dt
from time import mktime
from scipy import interpolate
from scipy.ndimage import filters
from scipy.spatial import cKDTree
from scipy.stats import nanmean
import importlib
import warnings
import functools
warnings.simplefilter('once', DeprecationWarning)
def deprecated(replacement=None):
"""A decorator which can be used to mark functions as deprecated.
replacement is a callable that will be called with the same args
as the decorated function.
Author: Giampaolo Rodola' <g.rodola [AT] gmail [DOT] com>
License: MIT
>>> # Todo: warnings are sent to stderr instead of stdout
>>> # so they are not seen here
>>> from wradlib.util import deprecated
>>> @deprecated()
... def foo(x):
... return x
>>> ret = foo(1)
>>> #DeprecationWarning: foo is deprecated
>>> print ret
1
>>> def newfun(x):
... return 0
>>> @deprecated(newfun)
... def foo(x):
... return x
>>> ret = foo(1)
>>> #DeprecationWarning: foo is deprecated; use newfun instead
>>> print ret
1
>>>
"""
def outer(fun):
msg = "%s.%s is deprecated" % (fun.__module__,fun.__name__)
if replacement is not None:
msg += "; use %s instead" % replacement
@functools.wraps(fun)
def inner(*args, **kwargs):
warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
return fun(*args, **kwargs)
return inner
return outer
class OptionalModuleStub(object):
"""Stub class for optional imports.
Objects of this class are instantiated when optional modules are not
present on the user's machine.
This allows global imports of optional modules with the code only breaking
when actual attributes from this module are called.
"""
def __init__(self, name):
self.name = name
def __getattr__(self, name):
raise AttributeError, ('Module "'+ self.name +
'" is not installed.\n\n' +
'You tried to access function/module/attribute "' +
name + '"\nfrom module "' + self.name + '".\nThis '+
'module is optional right now in wradlib.\n' +
'You need to separately install this dependency.\n' +
'Please refer to http://wradlib.bitbucket.org/gettingstarted.html#optional-dependencies\n' +
'for further instructions.'
)
def import_optional(module):
"""Allowing for lazy loading of optional wradlib modules or dependencies.
This function removes the need to satisfy all dependencies of wradlib before
being able to work with it.
Parameters:
-----------
module : string
name of the module
Returns:
--------
mod : object
if module is present, returns the module object, on ImportError
returns an instance of `OptionalModuleStub` which will raise an
AttributeError as soon as any attribute is accessed.
Examples:
---------
Trying to import a module that exists makes the module available as normal.
You can even use an alias. You cannot use the '*' notation, or import only
select functions, but you can simulate most of the standard import syntax
behavior
>>> m = import_optional('math')
>>> m.log10(100)
2.0
Trying to import a module that does not exists, does not produce any errors.
Only when some function is used, the code triggers an error
>>> m = import_optional('nonexistentmodule')
>>> m.log10(100)
Traceback (most recent call last):
...
AttributeError: Module "nonexistentmodule" is not installed.
<BLANKLINE>
You tried to access function/module/attribute "log10"
from module "nonexistentmodule".
This module is optional right now in wradlib.
You need to separately install this dependency.
Please refer to http://wradlib.bitbucket.org/gettingstarted.html#optional-dependencies
for further instructions.
"""
try:
mod = importlib.import_module(module)
except ImportError:
mod = OptionalModuleStub(module)
return mod
def aggregate_equidistant_tseries(tstart, tend, tdelta, tends_src, tdelta_src, src, method="sum", minpercvalid=100.):
"""Aggregates an equidistant time series to equidistant target time windows.
This function aggregates time series data under the assumption that the source
and the target time series are equidistant, and that the source time steps
regularly fit into the target time steps (no shifts at the boundaries).
This is the most trivial aggregation scenario.
However, the function allows for gaps in the source data. This means, we assume the
data to be equidistant (each item represents a time step with the same length),
but it does not need to be contiguous. NaN values in the source data are allowed,
too.
The output, though, will be a contiguous time series. This series will have NaN
values for those target time steps which were not sufficiently supported by source
data. The decision whether the support was "sufficient" is based on the argument
*minpercvalid*. This argument specifies the minimum percentage of valid source
time steps inside one target time step (valid meaning not NaN and not missing) which
is required to compute an aggregate. The default value of minpercvalid is 100 percent.
This means no gaps are allowed, and a target time step will be NaN if only one source
time step is missing.
Aggregation methods at the moment are "sum" and "mean" of the source data.
Parameters
----------
tstart : isostring or datetime object
start of the target time series
tend : isostring or datetime object
end of the target time series
tdelta : integer
resolution of the target time series (seconds)
tends_src : sequence of isostrings or datetime objects
timestamps which define the END of the source time steps
tdelta_src : integer
resolution of the source data (seconds)
src : 1-d array of floats
source values
method : string
Method of aggregation (either "sum" or "mean")
minpercvalid : float
Minimum percentage of valid source values within target interval that are
required to compute an aggregate target value. If set to 100 percent,
the target value wil be NaN if only one source value is missing or NaN.
If set to 90 percent, target value will be NaN if more than 10 percent of
the source values are missing or NaN.
Returns
-------
tstarts : array of datetime objects
array of timestamps which defines the start of each target time step/window
tends : array of datetime objects
array of timestamps which defines the end of each target time step/window
agg : array of aggregated values
aggregated values for each target time step
Example
-------
>>> tstart = "2000-01-01 00:00:00"
>>> tend = "2000-01-02 00:00:00"
>>> tdelta = 3600*6
>>> tends_src = ["2000-01-01 02:00:00","2000-01-01 03:00:00","2000-01-01 04:00:00","2000-01-01 05:00:00","2000-01-01 12:00:00"]
>>> tdelta_src = 3600
>>> src = [1,1,1,1,1]
>>> tstarts, tends, agg = aggregate_equidistant_tseries(tstart, tend, tdelta, tends_src, tdelta_src, src, minpercvalid=50.)
>>> print agg
[ 4. nan nan nan]
"""
# Check arguments and make sure they have the right type
src = np.array(src)
tstart = iso2datetime(tstart)
tend = iso2datetime(tend)
tends_src = np.array([iso2datetime(item) for item in tends_src])
twins = np.array(from_to(tstart, tend, tdelta))
tstarts = twins[:-1]
tends = twins[1:]
# Check consistency of timestamps and data
assert len(tends_src)==len(src), "Length of source timestamps tends_src must equal length of source data src."
# Check that source time steps are sorted correctly
assert np.all( np.sort(tends_src)==tends_src ), "The source time steps are not in chronological order."
# number of expected source time steps per target timestep
assert tdelta % tdelta_src == 0, "Target resolution %d is not a multiple of source resolution %d." % (tdelta, tdelta_src)
nexpected = tdelta / tdelta_src
# results container
agg = np.repeat(np.nan, len(tstarts))
inconsistencies=0
# iterate over target time windows
for i, begin in enumerate(tstarts):
end = tends[i]
# select source data that is between start and end of this interval
ixinside = np.where( (tends_src > begin) & (tends_src <= end) )[0]
# Time steps and array shape in that interval sized as if it had no gaps
tends_src_expected = np.array(from_to(begin, end, tdelta_src)[1:])
srcfull = np.repeat(np.nan, len(tends_src_expected))
# These are the indices of srcfull which actually have data according to src
validix = np.where(np.in1d(tends_src_expected, tends_src[ixinside]))[0]
if not len(validix)==len(ixinside):
# If we find time stamps in tends_src[ixinside] that are not
# contained in the expected cintiguous time steps (tends_src_expected),
# we assume that the data is corrupt (can have multiple reasons,
# e.g. wrong increment)
inconsistencies += 1
continue
srcfull[validix] = src[ixinside]
# valid source values found in target time window
nvalid = len(np.where(~np.isnan( srcfull ))[0])
if nvalid > nexpected:
# If we have more source time steps in target interval than expected
# something must be wrong.
inconsistencies += 1
continue
if float(nvalid) / float(nexpected) >= minpercvalid/100.:
if method=="sum":
agg[i] = np.nansum( srcfull )
elif method=="mean":
agg[i] = nanmean( srcfull )
else:
print "Aggregation method not known, yet."
raise Exception()
if inconsistencies>0:
print "WARNING: Inconsistent source times in %d target time intervals." % inconsistencies
return tstarts, tends, agg
def aggregate_in_time(src, dt_src, dt_trg, taxis=0, func='sum'):
"""Aggregate time series data to a coarser temporal resolution.
Parameters
----------
src : array of shape (..., original number of time steps,...)
This is the time series data which should be aggregated. The position
of the time dimension is indicated by the *taxis* argument. The number
of time steps corresponds to the length of the time dimension.
taxis : integer
This is the position of the time dimension in array *src*.
dt_src : array of datetime objects
Must be of length *original number of time steps + 1* because dt_src
defines the limits of the intervals corresponding to the time steps.
This means: dt_src[0] is the lower limit of time step 1, dt_src[1] is the
upper limit of time step 1 and the lower limit of time step 2 and so on.
dt_trg : array of datetime objects
Must be of length *number of output time steps + 1* analogously to dt_src.
This means: dt_trg[0] is the lower limit of output time step 1, dt_trg[1]
is the upper limit of output time step 1 and the lower limit of output
time step 2 and so on.
func : numpy function name, e.g. 'sum', 'mean'
Defines the way the data should be aggregated. The string must correspond
to a valid numpy function, e.g. 'sum', 'mean', 'min', 'max'.
Returns
-------
output : array of shape (..., len(dt_trg) - 1, ...)
The length of the time dimension of the output array depends on the array
*dt_trg* which defines the limits of the output time step intervals.
Examples
--------
>>> src = np.arange(8*4).reshape( (8,4) )
>>> print 'source time series:' # doctest: +SKIP
>>> print src
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
>>> dt_src = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(hours=i) for i in range(9) ]
>>> print 'source time interval limits:' # doctest: +SKIP
>>> for tim in dt_src: print tim
2008-06-02 00:00:00
2008-06-02 01:00:00
2008-06-02 02:00:00
2008-06-02 03:00:00
2008-06-02 04:00:00
2008-06-02 05:00:00
2008-06-02 06:00:00
2008-06-02 07:00:00
2008-06-02 08:00:00
>>> print 'target time interval limits:' # doctest: +SKIP
>>> dt_trg = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(seconds=i*3600*4) for i in range(4) ]
>>> for tim in dt_trg: print tim
2008-06-02 00:00:00
2008-06-02 04:00:00
2008-06-02 08:00:00
2008-06-02 12:00:00
>>> print 'target time series' # doctest: +SKIP
>>> print aggregate_in_time(src, dt_src, dt_trg, taxis=0, func='sum')
[[ 24. 28. 32. 36.]
[ 88. 92. 96. 100.]
[ nan nan nan nan]]
"""
## src, dt_src, dt_trg = np.array(src), np.array(dt_src), np.array(dt_trg)
dt_src, dt_trg = np.array(dt_src), np.array(dt_trg)
trg_shape = list(src.shape)
trg_shape[taxis] = len(dt_trg)-1
trg = np.repeat(np.nan, _shape2size(trg_shape)).reshape(trg_shape)
for i in range(len(dt_trg)-1):
trg_slice = [slice(0,j) for j in trg.shape]
trg_slice[taxis] = i
src_slice = [slice(0,src.shape[j]) for j in range(len(src.shape))]
src_slice[taxis] = np.where( np.logical_and(dt_src<=dt_trg[i+1], dt_src>=dt_trg[i]) )[0][:-1]
if len(src_slice[taxis])==0:
trg[trg_slice] = np.nan
else:
trg[trg_slice] = _get_func(func)(src[tuple(src_slice)], axis=taxis)
return trg
def sum_over_time_windows(src, dt_src, dt_trg, minpercvalid):
"""Returns the sums of time series <src> within the time windows dt_trg
"""
assert len(src)+1==len(dt_src), "Length of time series array <src> must be one less than datetime array <dt_src>."
try:
dt_src = [dt.datetime.strptime(dtime, "%Y-%m-%d %H:%M:%S") for dtime in dt_src]
except TypeError:
pass
try:
dt_trg = [dt.datetime.strptime(dtime, "%Y-%m-%d %H:%M:%S") for dtime in dt_trg]
except TypeError:
pass
accum = np.repeat(np.nan, len(dt_trg)-1)
for i, tstart in enumerate(dt_trg[:-1]):
tend = dt_trg[i+1]
# accumulate gage data to target time windows
ix = np.where((dt_src>tstart) & (dt_src <= tend))[0] - 1
if len(src[ix])==0:
continue
elif len(np.where(np.isnan( src[ix] ))[0]) / len(src[ix]) < minpercvalid/100.:
accum[i] = np.nansum( src[ix] )
return accum
def mean_over_time_windows(src, dt_src, dt_trg, minbasepoints=1):
"""UNDER DEVELOPMENT: Aggregate time series data to a coarser temporal resolution.
Parameters
----------
src : array of shape (..., original number of time steps,...)
This is the time series data which should be aggregated. The position
of the time dimension is indicated by the *taxis* argument. The number
of time steps corresponds to the length of the time dimension.
taxis : integer
This is the position of the time dimension in array *src*.
dt_src : array of datetime objects
Must be of length *original number of time steps + 1* because dt_src
defines the limits of the intervals corresponding to the time steps.
This means: dt_src[0] is the lower limit of time step 1, dt_src[1] is the
upper limit of time step 1 and the lower limit of time step 2 and so on.
dt_trg : array of datetime objects
Must be of length *number of output time steps + 1* analogously to dt_src.
This means: dt_trg[0] is the lower limit of output time step 1, dt_trg[1]
is the upper limit of output time step 1 and the lower limit of output
time step 2 and so on.
func : numpy function name, e.g. 'sum', 'mean'
Defines the way the data should be aggregated. The string must correspond
to a valid numpy function, e.g. 'sum', 'mean', 'min', 'max'.
Returns
-------
output : array of shape (..., len(dt_trg) - 1, ...)
The length of the time dimension of the output array depends on the array
*dt_trg* which defines the limits of the output time step intervals.
Examples
--------
>>> # TODO: put an example here for `mean_over_time_windows`
>>> src = np.arange(8*4).reshape( (8,4) )
>>> print 'source time series:'
>>> print src
>>> dt_src = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(hours=i) for i in range(9) ]
>>> print 'source time interval limits:'
>>> for tim in dt_src: print tim
>>> print 'target time interval limits:'
>>> dt_trg = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(seconds=i*3600*4) for i in range(4) ]
>>> for tim in dt_trg: print tim
>>> print 'target time series'
>>> print aggregate_in_time(src, dt_src, dt_trg, taxis=0, func='sum')
"""
# Convert input time steps to numpy arrays
dt_src, dt_trg = np.array(dt_src), np.array(dt_trg)
# Create a new container for the target data
trg_shape = list(src.shape)
trg_shape[0] = len(dt_trg)-1
trg = np.repeat(np.nan, _shape2size(trg_shape)).reshape(trg_shape)
for i in range(len(dt_trg)-1):
# width of window
width = float(_tdelta2seconds( dt_trg[i+1] - dt_trg[i] ))
# These are the intervals completely INSIDE the target time window
src_ix = np.where(np.logical_and(dt_src > dt_trg[i], dt_src < dt_trg[i+1]))[0]
intervals = dt_src[src_ix[1:]] - dt_src[src_ix[:-1]]
# check left edge
intervals = np.insert(intervals, 0, dt_src[src_ix[0]] - dt_trg[i])
if src_ix[0]>0:
src_ix = np.insert(src_ix, 0, src_ix[0]-1)
# check right edge
intervals = np.append(intervals, dt_trg[i+1] - dt_src[src_ix[-1]])
if src_ix[-1]>(len(dt_src)-1):
src_ix = np.append(src_ix, src_ix[-1]+1)
# convert to seconds
intervals = np.array([_tdelta2seconds(interval) for interval in intervals])
# compute weights
weights = intervals / width
# compute weighted mean
trg[i] = np.dot(np.transpose(src[src_ix]), weights)
return trg
def average_over_time_windows(src, dt_src, dt_trg, maxdist=3600, helper_interval=300, **ipargs):
"""UNDER DEVELOPMENT: Computes the average of a time series over given time windows.
This function computes the average values of an irregular time series ``src``
within given time windows ``dt_trg``. The datetimes of the original time series
are given by ``dt_src``. The idea of this function is to create regular helper
timesteps at an interval length given by ``helper_interval``. The values of
``src`` are then interpolated to these helper time steps, and the resulting
helper values are finally averaged over the given target time windows.
Parameters
----------
src : array of shape (..., original number of time steps,...)
This is the time series data which should be aggregated. The position
of the time dimension is indicated by the *taxis* argument. The number
of time steps corresponds to the length of the time dimension.
taxis : integer
This is the position of the time dimension in array *src*.
dt_src : array of datetime objects
Must be of length *original number of time steps + 1* because dt_src
defines the limits of the intervals corresponding to the time steps.
This means: dt_src[0] is the lower limit of time step 1, dt_src[1] is the
upper limit of time step 1 and the lower limit of time step 2 and so on.
dt_trg : array of datetime objects
Must be of length *number of output time steps + 1* analogously to dt_src.
This means: dt_trg[0] is the lower limit of output time step 1, dt_trg[1]
is the upper limit of output time step 1 and the lower limit of output
time step 2 and so on.
func : numpy function name, e.g. 'sum', 'mean'
Defines the way the data should be aggregated. The string must correspond
to a valid numpy function, e.g. 'sum', 'mean', 'min', 'max'.
Returns
-------
output : array of shape (..., len(dt_trg) - 1, ...)
The length of the time dimension of the output array depends on the array
*dt_trg* which defines the limits of the output time step intervals.
Examples
--------
>>> # TODO: put an example here for `average_over_time_windows`
>>> src = np.arange(8*4).reshape( (8,4) )
>>> print 'source time series:'
>>> print src
>>> dt_src = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(hours=i) for i in range(9) ]
>>> print 'source time interval limits:'
>>> for tim in dt_src: print tim
>>> print 'target time interval limits:'
>>> dt_trg = [dt.datetime.strptime('2008-06-02', '%Y-%m-%d' ) + dt.timedelta(seconds=i*3600*4) for i in range(4) ]
>>> for tim in dt_trg: print tim
>>> print 'target time series'
>>> print aggregate_in_time(src, dt_src, dt_trg, axis=0, func='sum')
"""
# Convert input time steps to numpy arrays
dt_src, dt_trg = np.array(dt_src), np.array(dt_trg)
trg_secs = np.array([mktime(tstep.timetuple()) for tstep in dt_trg])
src_secs = np.array([mktime(tstep.timetuple()) for tstep in dt_src])
helper_secs = np.arange(trg_secs[0],trg_secs[-1],helper_interval)
# Interpolate to target points
f = interpolate.interp1d(src_secs, src, axis=0, bounds_error=False)
helpers = f(helper_secs)
# Mask those values as invalid which are more than maxdist from the next source point
tree = cKDTree(src_secs.reshape((-1,1)))
dists, ix = tree.query(helper_secs.reshape((-1,1)), k=1)
# deal with edges (in case of extrapolation, we apply nearest neighbour)
np.where(np.isnan(helpers), src[ix], helpers)
# mask out points which are to far from the next source point
helpers[np.where(dists>maxdist)[0]] = np.nan
# Create a new container for the target data
trg_shape = list(src.shape)
trg_shape[0] = len(dt_trg)-1
trg = np.repeat(np.nan, _shape2size(trg_shape)).reshape(trg_shape)
for i in range(len(dt_trg)-1):
# width of window
width = float(_tdelta2seconds( dt_trg[i+1] - dt_trg[i] ))
# These are the intervals completely INSIDE the target time window
helper_ix = np.where(np.logical_and(dt_src >= dt_trg[i], dt_src <= dt_trg[i+1]))[0]
trg[i] = np.mean(helpers[helper_ix], axis=0)
return trg
def _get_func(funcname):
"""
Retrieve the numpy function with name <funcname>
Parameters
----------
funcname : string
"""
try:
func = getattr(np,funcname)
except:
print '<'+funcname+'> is not a valid function in numpy...'
raise
return func
def _shape2size(shape):
"""
Compute the size which corresponds to a shape
"""
out=1
for item in shape:
out*=item
return out
def from_to(tstart, tend, tdelta):
"""Return a list of timesteps from <tstart> to <tend> of length <tdelta>
Parameters
----------
tstart : datetime isostring (%Y%m%d %H:%M:%S), e.g. 2000-01-01 15:34:12
or datetime object
tend : datetime isostring (%Y%m%d %H:%M:%S), e.g. 2000-01-01 15:34:12
or datetime object
tdelta : integer representing time interval in SECONDS
Returns
-------
output : list of datetime.datetime objects
"""
if not type(tstart)==dt.datetime:
tstart = dt.datetime.strptime(tstart, "%Y-%m-%d %H:%M:%S")
if not type(tend)==dt.datetime:
tend = dt.datetime.strptime(tend, "%Y-%m-%d %H:%M:%S")
tdelta = dt.timedelta(seconds=tdelta)
tsteps = [tstart,]
tmptime = tstart
while True:
tmptime = tmptime + tdelta
if tmptime > tend:
break
else:
tsteps.append(tmptime)
return tsteps
def _tdelta2seconds(tdelta):
"""
Convert a dt.timedelta object to seconds
Parameters
----------
tdelta : a dt.timedelta object
"""
return tdelta.days * 86400 + tdelta.seconds
def _get_tdelta(tstart, tend, as_secs=False):
"""Returns the difference between two datetimes
"""
if not type(tstart)==dt.datetime:
tstart = dt.datetime.strptime(tstart, "%Y-%m-%d %H:%M:%S")
if not type(tend)==dt.datetime:
tend = dt.datetime.strptime(tend, "%Y-%m-%d %H:%M:%S")
if not as_secs:
return tend-tstart
else:
return _tdelta2seconds(tend-tstart)
def iso2datetime(iso):
"""Converts an ISO formatted time string to a datetime object."""
# in case the argument has been parsed to datetime before
if type(iso)==dt.datetime:
return iso
# sometimes isoformat seperates date and time by a white space
iso = iso.replace(" ", "T")
try:
return dt.datetime.strptime(iso, "%Y-%m-%dT%H:%M:%S.%f")
except (ValueError, TypeError):
return dt.datetime.strptime(iso, "%Y-%m-%dT%H:%M:%S")
except:
print("Could not convert argument <%r> to datetime. Probably not an isostring. See following traceback:" % iso)
raise
def timestamp2index(ts, delta, refts, **kwargs):
"""Calculates the array index for a certain time in an equidistant
time-series given the reference time (where the index would be 0)
and the time discretization.
If any of the input parameters contains timezone information, all others
also need to contain timezone information.
Parameters
----------
ts : str or datetime-object
The timestamp to determine the index for
If it is a string, it will be converted to datetime using the
function iso2datetime
delta : str or timedelta object
The discretization of the time series (the amount of time that
elapsed between indices)
If used as a string, it needs to be given in the format
"keyword1=value1,keyword2=value2". Keywords must be understood
by the timedelta constructor (like days, hours,
minutes, seconds) and the values may only be integers.
refts : str or datetime-object
The timestamp to determine the index for
If it is a string, it will be converted to datetime using the
function iso2datetime
Returns
-------
index : integer
The index of a discrete time series array of the given
parameters
Example
-------
>>> import datetime as dt
>>> timestr1, timestr2 = '2008-06-01T00:00:00', '2007-01-01T00:00:00'
>>> timestamp2index(timestr1, 'minutes=5', timestr2)
148896
>>> timestamp2index(timestr1, 'hours=1,minutes=5',timestr2)
11453
>>> timestamp2index(timestr1, dt.timedelta(hours=1, minutes=5), timestr2)
11453
"""
if not isinstance(ts, dt.datetime):
_ts = iso2datetime(ts, **kwargs)
else:
_ts = ts
if not isinstance(refts, dt.datetime):
_refts = iso2datetime(refts, **kwargs)
else:
_refts = refts
if not isinstance(delta, dt.timedelta):
kwargs = dict([(sp[0], int(sp[1]))
for sp in [item.split('=') for item in delta.split(',')]])
_dt = dt.timedelta(**kwargs)
else:
_dt = delta
return int(_tdelta2seconds(_ts - _refts) / _tdelta2seconds(_dt))
def _idvalid(data, isinvalid=[-99., 99, -9999., -9999], minval=None, maxval=None):
"""Identifies valid entries in an array and returns the corresponding indices
Invalid values are NaN and Inf. Other invalid values can be passed using the
isinvalid keyword argument.
Parameters
----------
data : array of floats
invalid : list of what is considered an invalid value
"""
ix = np.ma.masked_invalid(data).mask
for el in isinvalid:
ix = np.logical_or(ix, np.ma.masked_where(data==el, data).mask)
if not minval==None:
ix = np.logical_or(ix, np.ma.masked_less(data, minval).mask)
if not maxval==None:
ix = np.logical_or(ix, np.ma.masked_greater(data, maxval).mask)
return np.where(np.logical_not(ix))[0]
def meshgridN(*arrs):
"""N-dimensional meshgrid
Just pass sequences of coordinates arrays
"""
arrs = tuple(arrs)
lens = map(len, arrs)
dim = len(arrs)
sz = 1
for s in lens:
sz*=s
ans = []
for i, arr in enumerate(arrs):
slc = [1]*dim
slc[i] = lens[i]
arr2 = np.asarray(arr).reshape(slc)
for j, sz in enumerate(lens):
if j!=i:
arr2 = arr2.repeat(sz, axis=j)
ans.append(arr2)
## return tuple(ans[::-1])
return tuple(ans)
def gridaspoints(*arrs):
"""Creates an N-dimensional grid form arrs and returns grid points sequence of point coordinate pairs
"""
# there is a small gotcha here.
# with the convention following the 2013-08-30 sprint in Potsdam it was
# agreed upon that arrays should have shapes (...,z,y,x) similar to the
# convention that polar data should be (...,time,scan,azimuth,range)
#
# Still coordinate tuples are given in the order (x,y,z) [and hopefully not
# more dimensions]. Therefore np.meshgrid must be fed the axis coordinates
# in shape order (z,y,x) and the result needs to be reversed in order
# for everything to work out.
grid = tuple([dim.ravel()
for dim in reversed(np.meshgrid(*arrs, indexing='ij'))])
return np.vstack(grid).transpose()
def issequence(x):
"""Test whether x is a sequence of numbers
"""
out = True
try:
# can we get a length on the object
length = len(x)
except:
return(False)
# is the object not a string?
out = np.all( np.isreal(x) )
return out
def trapezoid(data, x1, x2, x3, x4):
"""
Applied the trapezoidal function described in Vulpiani et al, 2012 to determine
the degree of membership in the non-meteorological target class.
Parameters
----------
data : array
Array contaning the data
x1 : float
x-value of the first vertex of the trapezoid
x2 : float
x-value of the second vertex of the trapezoid
x3 : float
x-value of the third vertex of the trapezoid
x4 : float
x-value of the fourth vertex of the trapezoid
Returns
-------
d : array
Array of values describing degree of membership in nonmeteorological target class.
"""
d = np.ones(np.shape(data))
d[np.logical_or(data <= x1, data >= x4)] = 0
d[np.logical_and(data >= x2, data <= x3)] = 1
d[np.logical_and(data > x1, data < x2)] = (data[np.logical_and(data > x1, data < x2)] - x1)/float((x2-x1))
d[np.logical_and(data > x3, data < x4)] = (x4 - data[np.logical_and(data > x3, data < x4)])/float((x4-x3))
d[np.isnan(data)] = np.nan
return d
def maximum_intensity_projection(data, r=None, az=None, angle=None, elev=None, autoext=True):
"""
Computes the maximum intensity projection along an arbitrary cut through the ppi
from polar data.
Parameters
----------
data : array
Array containing polar data (azimuth, range)
r : array
Array containing range data
az : array
Array containing azimuth data
angle : float
angle of slice, Defaults to 0. Should be between 0 and 180.
0. means horizontal slice, 90. means vertical slice
elev : float
elevation angle of scan, Defaults to 0.
autoext : True | False
This routine uses numpy.digitize to bin the data.
As this function needs bounds, we create one set of coordinates more than
would usually be provided by `r` and `az`.
Returns
-------
xs : array
meshgrid x array
ys : array
meshgrid y array
mip : array
Array containing the maximum intensity projection (range, range*2)
"""
from wradlib.georef import beam_height_n as beam_height_n
# this may seem odd at first, but d1 and d2 are also used in several plotting
# functions and thus it may be easier to compare the functions
d1 = r
d2 = az
# providing 'reasonable defaults', based on the data's shape
if d1 is None:
d1 = np.arange(data.shape[1], dtype=np.float)
if d2 is None:
d2 = np.arange(data.shape[0], dtype=np.float)
if angle is None:
angle = 0.0
if elev is None:
elev = 0.0
if autoext:
# the ranges need to go 'one bin further', assuming some regularity
# we extend by the distance between the preceding bins.
x = np.append(d1, d1[-1]+(d1[-1]-d1[-2]))
# the angular dimension is supposed to be cyclic, so we just add the
# first element
y = np.append(d2, d2[0])
else:
# no autoext basically is only useful, if the user supplied the correct
# dimensions himself.
x = d1
y = d2
# roll data array to specified azimuth, assuming equidistant azimuth angles
ind = (d2 >= angle).nonzero()[0][0]
data = np.roll(data, ind, axis=0)
# build cartesian range array, add delta to last element to compensate for
# open bound (np.digitize)
dc = np.linspace( -np.max(d1), np.max(d1) + 0.0001, num=d1.shape[0]*2+1)
# get height values from polar data and build cartesian height array
# add delta to last element to compensate for open bound (np.digitize)
hp = np.zeros((y.shape[0], x.shape[0]))
hc = beam_height_n(x, elev)
hp[:] = hc
hc[-1] += 0.0001
# create meshgrid for polar data
xx, yy = np.meshgrid(x, y)
# create meshgrid for cartesian slices
xs, ys = np.meshgrid(dc,hc)
#xs, ys = np.meshgrid(dc,x)
# convert polar coordinates to cartesian
xxx = xx * np.cos(np.radians(90.-yy))
#yyy = xx * np.sin(np.radians(90.-yy))
# digitize coordinates according to cartesian range array
range_dig1 = np.digitize(xxx.ravel(), dc)
range_dig1.shape = xxx.shape
# digitize heights according polar height array
height_dig1 = np.digitize(hp.ravel(), hc)
# reshape accordingly
height_dig1.shape = hp.shape
# what am I doing here?!
range_dig1 = range_dig1[0:-1,0:-1]
height_dig1 = height_dig1[0:-1,0:-1]
# create height and range masks
height_mask = [(height_dig1 == i).ravel().nonzero()[0] for i in range(1, len(hc))]
range_mask = [(range_dig1 == i).ravel().nonzero()[0] for i in range(1, len(dc))]
# create mip output array, set outval to inf
mip = np.zeros((d1.shape[0], 2 * d1.shape[0]))
mip[:] = np.inf
# fill mip array,
# in some cases there are no values found in the specified range and height
# then we fill in nans and interpolate afterwards
for i in range(0, len(range_mask)):
mask1 = range_mask[i]
found = False
for j in range(0, len(height_mask)):
mask2 = np.intersect1d(mask1, height_mask[j])
# this is to catch the ValueError from the max() routine when calculating
# on empty array
try:
mip[j,i] = data.ravel()[mask2].max()
if not found:
found = True
except ValueError:
if found:
mip[j,i] = np.nan
# interpolate nans inside image, do not touch outvals
good = -np.isnan(mip)
xp = good.ravel().nonzero()[0]
fp = mip[-np.isnan(mip)]
x = np.isnan(mip).ravel().nonzero()[0]
mip[np.isnan(mip)] = np.interp(x, xp, fp)
# reset outval to nan
mip[mip == np.inf] = np.nan
return xs, ys, mip
if __name__ == '__main__':
print 'wradlib: Calling module <util> as main...'
def filter_window_polar(img,wsize,fun,scale,random=False):
r"""Apply a filter of an approximated square window of half size `fsize` on a given polar image `img`.
Parameters
----------
img : 2d array
Array of values to which the filter is to be applied
wsize : float
Half size of the window centred on the pixel [m]
fun : string
name of the 1d filter from scipy.ndimage.filters
scale : tuple of 2 floats
range [m] and azimutal [radians] scale of the polar grid
random: bool
True to use random azimutal size to avoid long-term biases.
Returns
-------
output : array_like
an array with the same shape as `img`, containing the filter's results.
"""
rscale,ascale = scale
data_filtered = np.empty(img.shape,dtype=img.dtype)
fun = getattr(filters,"%s_filter1d" %(fun))
nbins = img.shape[-1]
ranges = np.arange(nbins)*rscale + rscale/2
asize = ranges*ascale
if random:
na = prob_round(wsize/asize).astype(int)
else:
na = np.fix(wsize/asize+0.5).astype(int)
na[na>20] = 20 # Maximum of adjacent azimuths (higher close to the origin) to increase performance
for n in np.unique(na):
index = ( na == n )
if n == 0:
data_filtered[:,index] = img[:,index]
data_filtered[:,index] = fun(img[:,index],size=2*n+1,mode='wrap',axis=0)
nr = np.fix(wsize/rscale+0.5).astype(int)
data_filtered = fun(data_filtered,size=2*nr+1,axis=1)
return(data_filtered)
def prob_round(x, prec = 0):
"""Round the float number `x` to the lower or higher integer randomly following a binomial distribution """
fixup = np.sign(x) * 10**prec
x *= fixup
intx = x.astype(int)
round_func = intx + np.random.binomial(1,x-intx)
return round_func/fixup
def filter_window_cartesian(img,wsize,fun,scale):
r"""Apply a filter of an approximated square window of half size `fsize` on a given polar image `img`. This method is faster than filter_radius_polar()
Parameters
----------
img : 2d array
Array of values to which the filter is to be applied
wsize : float
Half size of the window centred on the pixel [m]
fun : string
name of the 2d filter from scipy.ndimage.filters
scale : tuple of 2 floats
x and y scale of the cartesian grid [m]
Returns
-------
output : array_like
an array with the same shape as `img`, containing the filter's results.
"""
fun = getattr(filters,"%s_filter" %(fun))
size = np.fix(wsize/scale+0.5).astype(int)
data_filtered = fun(data_filtered,size)
return(data_filtered)
Computing file changes ...