https://github.com/stwisdom/urnn
Raw File
Tip revision: 9f8b679c03683d0edd3f9a38d5a7cd0eef25a1c5 authored by Scott T Wisdom on 28 April 2017, 20:34:56 UTC
Update README.md
Tip revision: 9f8b679
memory_problem.py
import cPickle
import gzip
import theano
import pdb
from fftconv import cufft, cuifft
import numpy as np
import theano.tensor as T
from theano.ifelse import ifelse
from models import *
from optimizations import *    
import argparse, timeit
#allow bigger graphs (Python's default setting of 1000 for modern computers [https://github.com/Theano/Theano/issues/689]):
import sys 
sys.setrecursionlimit(50000)

def generate_data(time_steps, n_data, n_sequence):
    seq = np.random.randint(1, high=9, size=(n_data, n_sequence))
    zeros1 = np.zeros((n_data, time_steps-1))
    zeros2 = np.zeros((n_data, time_steps))
    marker = 9 * np.ones((n_data, 1))
    zeros3 = np.zeros((n_data, n_sequence))

    x = np.concatenate((seq, zeros1, marker, zeros3), axis=1).astype('int32')
    y = np.concatenate((zeros3, zeros2, seq), axis=1).astype('int32')
    
    return x.T, y.T

    
def main(n_iter, n_batch, n_hidden, time_steps, learning_rate, savefile, model, input_type, out_every_t, loss_function, w_impl='urnn',n_reflections=None,flag_telescope=True,flag_useGivensForLoop=False):

    # --- Set data params ----------------
    n_input = 10
    n_output = 9
    n_sequence = 10
    n_train = int(1e5)
    n_test = int(1e4)
    num_batches = int(n_train / n_batch)
  

    # --- Create data --------------------
    train_x, train_y = generate_data(time_steps, n_train, n_sequence)
    test_x, test_y = generate_data(time_steps, n_test, n_sequence)

    # train_x is size 120 x n_train, train_y is size 120 x n_train

    s_train_x = theano.shared(train_x)
    s_train_y = theano.shared(train_y)

    s_test_x = theano.shared(test_x)
    s_test_y = theano.shared(test_y)

    
    # --- Create theano graph and compute gradients ----------------------

    print 'Creating theano graph for model %s' % model   
 
    gradient_clipping = np.float32(1)

    if (model == 'LSTM'):           
        inputs, parameters, costs = LSTM(n_input, n_hidden, n_output, input_type=input_type,
                                         out_every_t=out_every_t, loss_function=loss_function)
        gradients = T.grad(costs[0], parameters)
        gradients = [T.clip(g, -gradient_clipping, gradient_clipping) for g in gradients]

    elif (model == 'complex_RNN'):
        if (w_impl == 'full'):
            inputs, parameters, costs = complex_RNN(n_input, n_hidden, n_output, input_type=input_type,out_every_t=out_every_t, loss_function=loss_function,output_type='real', fidx=None, flag_return_lin_output=False,name_suffix='',x_spec=None,flag_feed_forward=False,flag_use_mask=False,hidden_bias_mean=0.0,lam=0.0,Wimpl='full')

        else:
            inputs, parameters, costs = complex_RNN(n_input, n_hidden, n_output, input_type=input_type,
                                                out_every_t=out_every_t, loss_function=loss_function)
        #inputs, parameters, costs = complex_RNN(n_input, n_hidden, n_output, input_type=input_type,
        #                                        out_every_t=out_every_t, loss_function=loss_function)
        gradients = T.grad(costs[0], parameters)

    elif (model == 'cue_RNN'):
            #print "Using CUE-RNN with %d telescoping Householder reflections of dim. %d" % (n_reflections,n_hidden)
        if flag_telescope:
            print "Using CUE-RNN with %d telescoping Householder reflections of dim. %d" % (n_reflections,n_hidden)
        else:
            print "Using CUE-RNN with %d full Householder reflections of dim. %d" % (n_reflections,n_hidden)
        inputs, parameters, costs = cue_RNN(n_input, n_hidden, n_output, input_type=input_type,
                                                out_every_t=out_every_t, loss_function=loss_function, n_reflections=n_reflections, flag_telescope=True)
        gradients = T.grad(costs[0], parameters)

    elif (model == 'Givens_RNN'):
        inputs, parameters, costs = Givens_RNN(n_input, n_hidden, n_output, input_type=input_type,
                                               out_every_t=out_every_t, loss_function=loss_function,flag_useGivensForLoop=flag_useGivensForLoop)
        gradients = T.grad(costs[0], parameters)

    elif (model == 'IRNN'):
        inputs, parameters, costs = IRNN(n_input, n_hidden, n_output, input_type=input_type,
                                         out_every_t=out_every_t, loss_function=loss_function)
        gradients = T.grad(costs[0], parameters)
        gradients = [T.clip(g, -gradient_clipping, gradient_clipping) for g in gradients]

    elif (model == 'RNN'):
        inputs, parameters, costs = tanhRNN(n_input, n_hidden, n_output, input_type=input_type,
                                            out_every_t=out_every_t, loss_function=loss_function)
        gradients = T.grad(costs[0], parameters)
        gradients = [T.clip(g, -gradient_clipping, gradient_clipping) for g in gradients]
    
    else:
        print "Unsuported model:", model
        return

 
    # --- Compile theano functions --------------------------------------------------

    print 'Compiling theano functions...'

    index = T.iscalar('i')

    if (w_impl == 'full'):
        idx_project=[5]
    else:
        idx_project=None
    updates, rmsprop = rms_prop(learning_rate, parameters, gradients,idx_project)
    #updates, rmsprop = rms_prop(learning_rate, parameters, gradients)

    givens = {inputs[0] : s_train_x[:, n_batch * index : n_batch * (index + 1)],
              inputs[1] : s_train_y[:, n_batch * index : n_batch * (index + 1)]}

    givens_test = {inputs[0] : s_test_x,
                   inputs[1] : s_test_y}
    
   
    train = theano.function([index], costs[0], givens=givens, updates=updates)
    test = theano.function([], [costs[0], costs[1]], givens=givens_test)

    # --- Training Loop ---------------------------------------------------------------

    print 'Starting training loop...'

    train_loss = []
    test_loss = []
    test_acc = []
    best_params = [p.get_value() for p in parameters]
    best_rms = [r.get_value() for r in rmsprop]
    best_test_loss = 1e6
    for i in xrange(n_iter):
        if (n_iter % num_batches == 0):
            inds = np.random.permutation(n_train)
            data_x = s_train_x.get_value()
            s_train_x.set_value(data_x[:,inds])
            data_y = s_train_y.get_value()
            s_train_y.set_value(data_y[:,inds])

        ce = train(i % num_batches)
        train_loss.append(ce)
        print "Iteration:", i
        print "cross entropy:", ce
        print

        if (i % 50==0):
            ce, acc = test()
            print
            print "TEST"
            print "cross entropy:", ce
            print 
            test_loss.append(ce)
            test_acc.append(acc)

            if ce < best_test_loss:
                best_params = [p.get_value() for p in parameters]
                best_rms = [r.get_value() for r in rmsprop]
                best_test_loss = ce

            save_vals = {'parameters': [p.get_value() for p in parameters],
                         'rmsprop': [r.get_value() for r in rmsprop],
                         'train_loss': train_loss,
                         'test_loss': test_loss,
                         'test_acc': test_acc,
                         'best_params': best_params,
                         'best_rms': best_rms,
                         'best_test_loss': best_test_loss,
                         'model': model,
                         'time_steps': time_steps}

            cPickle.dump(save_vals,
                         file(savefile, 'wb'),
                         cPickle.HIGHEST_PROTOCOL)

            
if __name__=="__main__":
    parser = argparse.ArgumentParser(
        description="training a model")
    parser.add_argument("n_iter", type=int, default=20000)
    parser.add_argument("n_batch", type=int, default=20)
    parser.add_argument("n_hidden", type=int, default=512)
    parser.add_argument("time_steps", type=int, default=200)
    parser.add_argument("learning_rate", type=float, default=0.001)
    parser.add_argument("savefile")
    parser.add_argument("model", default='complex_RNN')
    parser.add_argument("input_type", default='categorical')
    parser.add_argument("out_every_t", default='False')
    parser.add_argument("loss_function", default='MSE')
    parser.add_argument("--n_reflections", default=8, help="number of reflections for CUE-RNN")
    parser.add_argument("--flag_telescope", default=True, help="whether to use telescoping reflections (True) or full reflections (False)")
    parser.add_argument("--flag_useGivensForLoop",default=False, help="if True, use a for loop instead of scan to do Givens rotations")
    parser.add_argument("w_impl", default='urnn')

    args = parser.parse_args()
    dict = vars(args)

    kwargs = {'n_iter': dict['n_iter'],
              'n_batch': dict['n_batch'],
              'n_hidden': dict['n_hidden'],
              'time_steps': dict['time_steps'],
              'learning_rate': np.float32(dict['learning_rate']),
              'savefile': dict['savefile'],
              'model': dict['model'],
              'input_type': dict['input_type'],
              'out_every_t': 'True'==dict['out_every_t'],
              'loss_function': dict['loss_function'],
              'n_reflections': int(args.n_reflections),
              'flag_telescope': bool(args.flag_telescope),
              'flag_useGivensForLoop': bool(args.flag_useGivensForLoop),
              'w_impl': dict['w_impl']}

    main(**kwargs)
back to top