https://github.com/xunhuang1995/SGAN
Tip revision: e235644b94eb5ba7a14e271a80c9064af16fa680 authored by Omid Poursaeed on 14 May 2017, 03:36:03 UTC
Fix a typo
Fix a typo
Tip revision: e235644
nn.py
"""
neural network stuff, intended to be used with Lasagne 0.1
"""
import numpy as np
import theano as th
import theano.tensor as T
import lasagne
from lasagne.layers import dnn
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.ifelse import ifelse
# T.nnet.relu has some stability issues, this is better
def relu(x):
return T.maximum(x, 0)
def lrelu(x, a=0.2):
return T.maximum(x, a*x)
def centered_softplus(x):
return T.nnet.softplus(x) - np.cast[th.config.floatX](np.log(2.))
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
updates = []
grads = T.grad(cost, params)
t = th.shared(np.cast[th.config.floatX](1.))
for p, g in zip(params, grads):
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v_t = mom1*v + (1. - mom1)*g
mg_t = mom2*mg + (1. - mom2)*T.square(g)
v_hat = v_t / (1. - mom1 ** t)
mg_hat = mg_t / (1. - mom2 ** t)
g_t = v_hat / T.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append((v, v_t))
updates.append((mg, mg_t))
updates.append((p, p_t))
updates.append((t, t+1))
return updates
def adam_conditional_updates(params, cost, mincost, lr=0.001, mom1=0.9, mom2=0.999): # if cost is less than mincost, don't do update
updates = []
grads = T.grad(cost, params)
t = th.shared(np.cast[th.config.floatX](1.))
for p, g in zip(params, grads):
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v_t = mom1*v + (1. - mom1)*g
mg_t = mom2*mg + (1. - mom2)*T.square(g)
v_hat = v_t / (1. - mom1 ** t)
mg_hat = mg_t / (1. - mom2 ** t)
g_t = v_hat / T.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append((v, ifelse(cost<mincost,v,v_t)))
updates.append((mg, ifelse(cost<mincost,mg,mg_t)))
updates.append((p, ifelse(cost<mincost,p,p_t)))
updates.append((t, ifelse(cost<mincost,t,t+1)))
return updates
class Deconv2DLayer(lasagne.layers.Layer):
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), padding = 'half',
W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs):
super(Deconv2DLayer, self).__init__(incoming, **kwargs)
self.target_shape = target_shape
self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity)
self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2)
self.stride = lasagne.layers.dnn.as_tuple(stride, 2)
self.padding = padding
self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1])
self.W = self.add_param(W, self.W_shape, name="W")
if b is not None:
self.b = self.add_param(b, (target_shape[1],), name="b")
else:
self.b = None
def get_output_for(self, input, **kwargs):
op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(imshp=self.target_shape, kshp=self.W_shape, subsample=self.stride, border_mode=self.padding)
activation = op(self.W, input, self.target_shape[2:])
if self.b is not None:
activation += self.b.dimshuffle('x', 0, 'x', 'x')
return self.nonlinearity(activation)
def get_output_shape_for(self, input_shape):
return self.target_shape
class BatchNormLayer(lasagne.layers.Layer):
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), nonlinearity=relu, **kwargs):
super(BatchNormLayer, self).__init__(incoming, **kwargs)
self.nonlinearity = nonlinearity
k = self.input_shape[1]
if b is not None:
self.b = self.add_param(b, (k,), name="b", regularizable=False)
if g is not None:
self.g = self.add_param(g, (k,), name="g", regularizable=False)
self.avg_batch_mean = self.add_param(lasagne.init.Constant(0.), (k,), name="avg_batch_mean", regularizable=False, trainable=False)
self.avg_batch_var = self.add_param(lasagne.init.Constant(1.), (k,), name="avg_batch_var", regularizable=False, trainable=False)
if len(self.input_shape)==4:
self.axes_to_sum = (0,2,3)
self.dimshuffle_args = ['x',0,'x','x']
else:
self.axes_to_sum = 0
self.dimshuffle_args = ['x',0]
def get_output_for(self, input, deterministic=False, **kwargs):
if deterministic:
norm_features = (input-self.avg_batch_mean.dimshuffle(*self.dimshuffle_args)) / T.sqrt(1e-6 + self.avg_batch_var).dimshuffle(*self.dimshuffle_args)
else:
batch_mean = T.mean(input,axis=self.axes_to_sum).flatten()
centered_input = input-batch_mean.dimshuffle(*self.dimshuffle_args)
batch_var = T.mean(T.square(centered_input),axis=self.axes_to_sum).flatten()
batch_stdv = T.sqrt(1e-6 + batch_var)
norm_features = centered_input / batch_stdv.dimshuffle(*self.dimshuffle_args)
# BN updates
new_m = 0.9*self.avg_batch_mean + 0.1*batch_mean
new_v = 0.9*self.avg_batch_var + T.cast((0.1*input.shape[0])/(input.shape[0]-1),th.config.floatX)*batch_var
self.bn_updates = [(self.avg_batch_mean, new_m), (self.avg_batch_var, new_v)]
if hasattr(self, 'g'):
activation = norm_features*self.g.dimshuffle(*self.dimshuffle_args)
else:
activation = norm_features
if hasattr(self, 'b'):
activation += self.b.dimshuffle(*self.dimshuffle_args)
if self.nonlinearity is not None:
return self.nonlinearity(activation)
else:
return activation
def batch_norm(layer, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), **kwargs):
"""
Move the nonlinearity after to batch normalization
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = lasagne.nonlinearities.identity
else:
nonlinearity = lasagne.nonlinearities.identity
if hasattr(layer, 'b'):
del layer.params[layer.b]
layer.b = None
return BatchNormLayer(layer, b, g, nonlinearity=nonlinearity, **kwargs)
class GaussianNoiseLayer(lasagne.layers.Layer):
def __init__(self, incoming, sigma=0.1, **kwargs):
super(GaussianNoiseLayer, self).__init__(incoming, **kwargs)
self._srng = RandomStreams(lasagne.random.get_rng().randint(1, 2147462579))
self.sigma = sigma
def get_output_for(self, input, deterministic=False, use_last_noise=False, **kwargs):
if deterministic or self.sigma == 0:
return input
else:
if not use_last_noise:
self.noise = self._srng.normal(input.shape, avg=0.0, std=self.sigma)
return input + self.noise