https://github.com/mcdavid109/Google-Neural-Machine-Translation-GNMT-
Tip revision: fe3f442187b7fc460e287c78e9abb11e2cefd35a authored by Yifan Xu on 02 June 2017, 22:16:59 UTC
change readme
change readme
Tip revision: fe3f442
linear_modern.py
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Basic linear combinations that implicitly generate variables."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
use_weight_normalization_default = False
def linear(args, output_size, bias, bias_start=0.0, use_l2_loss = False, use_weight_normalization = use_weight_normalization_default, scope=None, timestep = -1, weight_initializer = None, orthogonal_scale_factor = 1.1):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
# assert args #was causing error in upgraded tensorflow
if not isinstance(args, (list, tuple)):
args = [args]
if len(args) > 1 and use_weight_normalization: raise ValueError('you can not use weight_normalization with multiple inputs because the euclidean norm will be incorrect -- besides, you should be using multiple integration instead!!!')
# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape().as_list() for a in args]
for shape in shapes:
if len(shape) != 2:
raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
if not shape[1]:
raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
else:
total_arg_size += shape[1]
if use_l2_loss:
l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
else:
l_regularizer = None
# Now the computation.
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [total_arg_size, output_size],
initializer = tf.uniform_unit_scaling_initializer(), regularizer = l_regularizer)
if use_weight_normalization: matrix = weight_normalization(matrix, timestep = timestep)
if len(args) == 1:
res = tf.matmul(args[0], matrix)
else:
res = tf.matmul(tf.concat(1, args), matrix)
if not bias:
return res
bias_term = tf.get_variable("Bias", [output_size],
initializer=tf.constant_initializer(bias_start), regularizer = l_regularizer)
return res + bias_term
def batch_timesteps_linear(input, output_size, bias, bias_start=0.0, use_l2_loss = False, use_weight_normalization = use_weight_normalization_default, scope=None,
tranpose_input = True, timestep = -1):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 3D Tensor [timesteps, batch_size, input_size]
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
# Calculate the total size of arguments on dimension 2.
if tranpose_input:
input = tf.transpose(input, [1,0,2])
shape_list = input.get_shape().as_list()
if len(shape_list) != 3: raise ValueError('shape must be of size 3, you have inputted shape size of:', len(shape_list))
num_timesteps = shape_list[0]
batch_size = shape_list[1]
total_arg_size = shape_list[2]
if use_l2_loss:
l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
else:
l_regularizer = None
# Now the computation.
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [total_arg_size, output_size], initializer = tf.uniform_unit_scaling_initializer(), regularizer = l_regularizer)
if use_weight_normalization: matrix = weight_normalization(matrix)
matrix = tf.tile(tf.expand_dims(matrix, 0), [num_timesteps, 1, 1])
res = tf.batch_matmul(input, matrix)
if bias:
bias_term = tf.get_variable(
"Bias", [output_size],
initializer=tf.constant_initializer(bias_start))
res = res + bias_term
if tranpose_input:
res = tf.transpose(res, [1,0,2])
return res