# Copyright 2016 James Hensman, Mark van der Wilk,
# Valentine Svensson, alexggmatthews,
# PabloLeon, fujiisoup
# Copyright 2017 Artem Artemev @awav
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from .. import misc
from ..core.errors import GPflowError
from ..core.compilable import Build
from .parameter import Parameter
class DataHolder(Parameter):
"""
DataHolder is similar to the Parameter with only difference that
default values for `fix_shape` and `trainable` options are opposite
to the Parameter's and it does not have prior and transform options.
By default shape of data holders is in floating mode and data holder
does not provide a trainable option at all.
:param value: Data input value. It can be a float, an integer,
a float or integer like list, numpy array or TensorFlow variable.
:param dtype: Type of new data holder.
:param fix_shape: Default value is `False` and indicates that shape
of internal tensor does not have specific shape, in other words,
it is None.
:param name: Name of the parameter.
:raises: ValueError exception if value is not valid.
"""
def __init__(self, value, dtype=None, fix_shape=False, name=None):
self._dataholder_tensor = None
super().__init__(value=value, name=name, dtype=dtype, fix_shape=fix_shape)
@property
def trainable(self):
return False
@property
def parameter_tensor(self):
return self._dataholder_tensor
def set_trainable(self, value, graph=None):
raise NotImplementedError('Data holder cannot be fixed.')
def is_built(self, graph):
if graph is None:
raise ValueError('Graph is not specified.')
if self.graph is not None:
if self.graph is not graph:
return Build.NOT_COMPATIBLE_GRAPH
return Build.YES
return Build.NO
def as_pandas_table(self):
column_names = ['class', 'shape', 'fixed_shape', 'value']
column_values = [self.__class__.__name__, self.shape, self.fixed_shape, self.value]
column_values = [[value] for value in column_values]
df = misc.pretty_pandas_table([self.pathname], column_names, column_values)
return df
def _parameter_name(self):
return misc.tensor_name(self.tf_pathname, 'dataholder')
def _clear(self):
self.reset_name()
self._initial_value_tensor = None
self._dataholder_tensor = None
self._is_initialized_tensor = None
def _build(self):
tensor = self._build_parameter()
self._dataholder_tensor = tensor
self._is_initialized_tensor = tf.is_variable_initialized(tensor)
def _init_parameter_defaults(self):
self._initial_value_tensor = None
self._dataholder_tensor = None
self._is_initialized_tensor = None
def _init_parameter_attributes(self, _prior, _transform, _trainable):
pass
def _set_parameter_attribute(self, attr, value):
raise NotImplementedError('Data holder does not have parameter attributes.')
def _read_parameter_tensor(self, session):
return session.run(self._dataholder_tensor)
def _apply_transform(self, value):
return value
def _set_parameter_tensor(self, tensor):
self._dataholder_tensor = tensor
def __setattr__(self, name, value):
object.__setattr__(self, name, value)
class Minibatch(DataHolder):
"""
Minibatch is a special case of data holders. As the name implies the minibatch
object provides shuffling and endless batching mechanism for input data.
Minibatch formes batches along zero axe of the input array.
Minibatch is shape agnostic at zero axe. Once you created a minibatch you can
vary size of the dataset, but feature shapes must be fixed.
CAVEAT: Minibatch is not auto-initializable. It means that whenever you switch
to another session, autoflow methods and optimizers will not be able to
intialize TensorFlow dataset iterator. You have to call `intialize` method
for Minibatch explicitly. Simple cases are not affected though.
```
with tf.Session() as session1:
mini = gpflow.Minibatch(data)
with tf.Session() as session2:
# mini.read_value(session=session2) # <<< fails.
mini.initialize(session=session2)
mini.read_value(session=session2) # <<< works fine.
```
:param value: Numpy array.
:param batch_size: Size of the batches.
:param shuffle: If `True` then input data will be shuffled before batching.
:param seed: Seed value for TensorFlow random generator.
:param dtype: Type of new minibatch.
:param name: Minibatch name.
:raises: ValueError exception if input value is not a numpy array or a list.
"""
def __init__(self, value, batch_size=1, shuffle=True,
seed=None, dtype=None, name=None):
if not misc.is_valid_param_value(value) or misc.is_tensor(value):
raise ValueError('The value must be either an array or a scalar.')
super().__init__(value, name=name, dtype=dtype)
self._batch_size = batch_size
self._shuffle = shuffle
self._seed = seed
@property
def batch_size(self):
return self._batch_size
@batch_size.setter
def batch_size(self, value):
return self.set_batch_size(value)
@property
def initializables(self):
return [self._iterator_tensor]
@property
def initializable_feeds(self):
if self._dataholder_tensor is None:
return None
return {self._cache_tensor: self._value,
self._batch_size_tensor: self._batch_size}
@property
def seed(self):
return self._seed
@seed.setter
def seed(self, seed):
if self.graph is not None and self.is_built_coherence():
raise GPflowError('Minibatch seed cannot be changed when it is built.')
self._seed = seed
def set_batch_size(self, size, session=None):
self._batch_size = size
session = self.enquire_session(session)
if session is not None:
self.initialize(session=session, force=True)
def _clear(self):
self.reset_name()
self._cache_tensor = None
self._batch_size_tensor = None
self._dataholder_tensor = None
self._iterator_tensor = None
def _build(self):
initial_tensor = self._build_placeholder_cache()
self._cache_tensor = initial_tensor
self._dataholder_tensor = self._build_dataholder(initial_tensor)
def _build_placeholder_cache(self):
value = self._value
return tf.placeholder(dtype=value.dtype, shape=None, name='minibatch_init')
def _build_dataholder(self, initial_tensor):
if initial_tensor is None:
raise GPflowError("Minibatch state corrupted.")
data = tf.data.Dataset.from_tensor_slices(initial_tensor)
data = data.repeat()
if self._shuffle:
shape = self._value.shape
data = data.shuffle(buffer_size=shape[0], seed=self._seed)
self._batch_size_tensor = tf.placeholder(tf.int64, shape=())
data = data.batch(batch_size=self._batch_size_tensor)
self._iterator_tensor = data.make_initializable_iterator()
name = self._parameter_name()
return self._iterator_tensor.get_next(name=name)
def _init_parameter_defaults(self):
self._cache_tensor = None
self._batch_size_tensor = None
self._dataholder_tensor = None
self._iterator_tensor = None
self._shuffle = True
self._batch_size = 1
self._seed = None
def _parameter_name(self):
name = 'minibatch'
if self.parent is self:
return misc.tensor_name(self.tf_pathname, name)
return name