https://github.com/nbatfai/isaac
Tip revision: 56711cf763dbad59f0b089c56ec813f458a91a6b authored by Norbert Bátfai on 13 November 2015, 05:33:44 UTC
Update README
Update README
Tip revision: 56711cf
ql.hpp
#ifndef QL_HPP
#define QL_HPP
/**
* @brief ISAAC - deep Q learning with neural networks for predicting the next sentence of a conversation
*
* @file ql.hpp
* @author Norbert Bátfai <nbatfai@gmail.com>
* @version 0.0.1
*
* @section LICENSE
*
* Copyright (C) 2015 Norbert Bátfai, batfai.norbert@inf.unideb.hu
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @section DESCRIPTION
* SAMU
*
* The main purpose of this project is to allow the evaluation and
* verification of the results of the paper entitled "A disembodied
* developmental robotic agent called Samu Bátfai". It is our hope
* that Samu will be the ancestor of developmental robotics chatter
* bots that will be able to chat in natural language like humans do.
*
*/
#include <iostream>
#include <cstdarg>
#include <map>
#include <iterator>
#include <cmath>
#include <random>
#include <limits>
#include <fstream>
#include "nlp.hpp"
#include "qlc.h"
#ifndef Q_LOOKUP_TABLE
class Perceptron
{
public:
Perceptron ( int nof, ... )
{
n_layers = nof;
units = new double*[n_layers];
n_units = new int[n_layers];
va_list vap;
va_start ( vap, nof );
for ( int i {0}; i < n_layers; ++i )
{
n_units[i] = va_arg ( vap, int );
if ( i )
units[i] = new double [n_units[i]];
}
va_end ( vap );
weights = new double**[n_layers-1];
std::random_device init;
std::default_random_engine gen {init() };
std::uniform_real_distribution<double> dist ( -1.0, 1.0 );
for ( int i {1}; i < n_layers; ++i )
{
weights[i-1] = new double *[n_units[i]];
for ( int j {0}; j < n_units[i]; ++j )
{
weights[i-1][j] = new double [n_units[i-1]];
for ( int k {0}; k < n_units[i-1]; ++k )
{
weights[i-1][j][k] = dist ( gen );
}
}
}
}
Perceptron ( std::fstream & file )
{
file >> n_layers;
units = new double*[n_layers];
n_units = new int[n_layers];
for ( int i {0}; i < n_layers; ++i )
{
file >> n_units[i];
if ( i )
units[i] = new double [n_units[i]];
}
weights = new double**[n_layers-1];
for ( int i {1}; i < n_layers; ++i )
{
weights[i-1] = new double *[n_units[i]];
for ( int j {0}; j < n_units[i]; ++j )
{
weights[i-1][j] = new double [n_units[i-1]];
for ( int k {0}; k < n_units[i-1]; ++k )
{
file >> weights[i-1][j][k];
}
}
}
}
double sigmoid ( double x )
{
return 1.0/ ( 1.0 + exp ( -x ) );
}
double operator() ( double image [] )
{
units[0] = image;
for ( int i {1}; i < n_layers; ++i )
{
#ifdef CUDA_PRCPS
cuda_layer(i, n_units, units, weights);
#else
#pragma omp parallel for
for ( int j = 0; j < n_units[i]; ++j )
{
units[i][j] = 0.0;
for ( int k = 0; k < n_units[i-1]; ++k )
{
units[i][j] += weights[i-1][j][k] * units[i-1][k];
}
units[i][j] = sigmoid ( units[i][j] );
}
#endif
}
return sigmoid ( units[n_layers - 1][0] );
}
void learning ( double image [], double q, double prev_q )
{
double y[1] {q};
learning ( image, y );
}
void learning ( double image [], double y[] )
{
( *this ) ( image );
units[0] = image;
double ** backs = new double*[n_layers-1];
for ( int i {0}; i < n_layers-1; ++i )
{
backs[i] = new double [n_units[i+1]];
}
int i {n_layers-1};
for ( int j {0}; j < n_units[i]; ++j )
{
backs[i-1][j] = sigmoid ( units[i][j] ) * ( 1.0-sigmoid ( units[i][j] ) ) * ( y[j] - units[i][j] );
for ( int k {0}; k < n_units[i-1]; ++k )
{
weights[i-1][j][k] += ( 0.2* backs[i-1][j] *units[i-1][k] );
}
}
for ( int i {n_layers-2}; i >0 ; --i )
{
#pragma omp parallel for
for ( int j =0; j < n_units[i]; ++j )
{
double sum = 0.0;
for ( int l = 0; l < n_units[i+1]; ++l )
{
sum += 0.19*weights[i][l][j]*backs[i][l];
}
backs[i-1][j] = sigmoid ( units[i][j] ) * ( 1.0-sigmoid ( units[i][j] ) ) * sum;
for ( int k = 0; k < n_units[i-1]; ++k )
{
weights[i-1][j][k] += ( 0.19* backs[i-1][j] *units[i-1][k] );
}
}
}
for ( int i {0}; i < n_layers-1; ++i )
{
delete [] backs[i];
}
delete [] backs;
}
~Perceptron()
{
for ( int i {1}; i < n_layers; ++i )
{
for ( int j {0}; j < n_units[i]; ++j )
{
delete [] weights[i-1][j];
}
delete [] weights[i-1];
}
delete [] weights;
for ( int i {0}; i < n_layers; ++i )
{
if ( i )
delete [] units[i];
}
delete [] units;
delete [] n_units;
}
void save ( std::fstream & out )
{
out << " "
<< n_layers;
for ( int i {0}; i < n_layers; ++i )
out << " " << n_units[i];
for ( int i {1}; i < n_layers; ++i )
{
for ( int j {0}; j < n_units[i]; ++j )
{
for ( int k {0}; k < n_units[i-1]; ++k )
{
out << " "
<< weights[i-1][j][k];
}
}
}
}
private:
Perceptron ( const Perceptron & );
Perceptron & operator= ( const Perceptron & );
int n_layers;
int* n_units;
double **units;
double ***weights;
};
#endif
class QL
{
public:
QL ( )
{}
QL ( SPOTriplet triplet )
{}
~QL()
{
#ifndef Q_LOOKUP_TABLE
for ( std::map<SPOTriplet, Perceptron*>::iterator it=prcps.begin(); it!=prcps.end(); ++it )
delete it->second;
#endif
}
double f ( double u, int n )
{
#ifndef Q_LOOKUP_TABLE
const int N_e = 10;
#else
const int N_e = 10;
#endif
if ( n < N_e )
return 1.0;
else
return u;
}
#ifdef QNN_DEBUG
int get_action_count() const
{
return frqs.size();
}
int get_action_relevance() const
{
return relevance*100.0;
}
#endif
#ifndef Q_LOOKUP_TABLE
double max_ap_Q_sp_ap ( double image[] )
{
double q_spap;
double min_q_spap = -std::numeric_limits<double>::max();
for ( std::map<SPOTriplet, Perceptron*>::iterator it=prcps.begin(); it!=prcps.end(); ++it )
{
q_spap = ( * ( it->second ) ) ( image );
if ( q_spap > min_q_spap )
min_q_spap = q_spap;
}
return min_q_spap;
}
SPOTriplet argmax_ap_f ( std::string prg, double image[] )
{
double min_f = -std::numeric_limits<double>::max();
SPOTriplet ap;
#ifdef QNN_DEBUG
double sum {0.0}, rel;
double a = std::numeric_limits<double>::max(), b = -std::numeric_limits<double>::max();
#endif
for ( std::map<SPOTriplet, Perceptron*>::iterator it=prcps.begin(); it!=prcps.end(); ++it )
{
double q_spap = ( * ( it->second ) ) ( image );
double explor = f ( q_spap, frqs[it->first][prg] );
#ifdef QNN_DEBUG
sum += q_spap;
if ( q_spap > b )
b = q_spap;
if ( q_spap < a )
a = q_spap;
#endif
if ( explor >= min_f )
{
min_f = explor;
ap = it->first;
#ifdef QNN_DEBUG
rel = q_spap;
#endif
}
}
#ifdef QNN_DEBUG
relevance = ( rel - sum/ ( ( double ) prcps.size() ) ) / ( b-a );
#endif
return ap;
}
SPOTriplet operator() ( SPOTriplet triplet, std::string prg, double image[] )
{
// Here 'triplet' will also be used as a simplified state in further developments
// s' = triplet
// r' = reward
double reward =
3.0 * triplet.cmp ( prev_action ) - 1.5;
if ( prcps.find ( triplet ) == prcps.end() )
{
prcps[triplet] = new Perceptron ( 3, 256*256, 80, 1 );
//prcps[triplet] = new Perceptron ( 3, 256*256, 400, 1 );
}
SPOTriplet action = triplet;
if ( prev_reward > -std::numeric_limits<double>::max() )
{
++frqs[prev_action][prev_state];
double max_ap_q_sp_ap = max_ap_Q_sp_ap ( image );
for ( int z {0}; z<10; ++z )
{
double nn_q_s_a = ( *prcps[prev_action] ) ( prev_image );
double q_q_s_a = nn_q_s_a +
alpha ( frqs[prev_action][prev_state] ) *
( reward + gamma * max_ap_q_sp_ap - nn_q_s_a );
prcps[prev_action]->learning ( prev_image, q_q_s_a, nn_q_s_a );
std::cerr << "### "
<< q_q_s_a - nn_q_s_a
<< " "
<< q_q_s_a
<< " "
<< nn_q_s_a
<< std::endl;
}
action = argmax_ap_f ( prg, image );
}
prev_state = prg; // s <- s'
prev_reward = reward; // r <- r'
prev_action = action; // a <- a'
std::memcpy ( prev_image, image, 256*256*sizeof ( double ) );
return action;
}
#else
double max_ap_Q_sp_ap ( std::string prg )
{
double q_spap;
double min_q_spap = -std::numeric_limits<double>::max();
for ( std::map<SPOTriplet, std::map<std::string, double>>::iterator it=table_.begin(); it!=table_.end(); ++it )
{
q_spap = it->second[prg];
if ( q_spap > min_q_spap )
min_q_spap = q_spap;
}
return min_q_spap;
}
SPOTriplet argmax_ap_f ( std::string prg )
{
double q_spap;
double min_f = -std::numeric_limits<double>::max();
SPOTriplet ap;
for ( std::map<SPOTriplet, std::map<std::string, double>>::iterator it=table_.begin(); it!=table_.end(); ++it )
{
q_spap = it->second[prg];
double explor = f ( q_spap, frqs[it->first][prg] );
if ( explor > min_f )
{
min_f = explor;
ap = it->first;
}
}
return ap;
}
SPOTriplet operator() ( SPOTriplet triplet, std::string prg )
{
// s' = triplet
// r' = reward
double reward =
3.0*triplet.cmp ( prev_action ) - 1.5;
SPOTriplet action = triplet;
if ( prev_reward > -std::numeric_limits<double>::max() )
{
++frqs[prev_action][prev_state];
table_[triplet][prg] = table_[triplet][prg];
double max_ap_q_sp_ap = max_ap_Q_sp_ap ( prg );
table_[prev_action][prev_state] =
table_[prev_action][prev_state] +
alpha ( frqs[prev_action][prev_state] ) *
( reward + gamma * max_ap_q_sp_ap - table_[prev_action][prev_state] );
action = argmax_ap_f ( prg );
}
prev_state = prg; // s <- s'
prev_reward = reward; // r <- r'
prev_action = action; // a <- a'
return action;
}
#endif
double reward ( void )
{
return prev_reward;
}
double alpha ( int n )
{
return 1.0/ ( ( ( double ) n ) + 1.0 );
}
void save_prcps ( std::fstream & samuFile )
{
samuFile << prcps.size();
for ( std::map<SPOTriplet, Perceptron*>::iterator it=prcps.begin(); it!=prcps.end(); ++it )
{
std::cout << "Saving Samu: "
<< ( std::distance ( prcps.begin(), it ) * 100 ) / prcps.size()
<< "% (perceptrons)"
<< std::endl;
samuFile << " "
<< it->first;
it->second->save ( samuFile );
}
}
void save_frqs ( std::fstream & samuFile )
{
samuFile << std::endl
<< frqs.size();
for ( std::map<SPOTriplet, std::map<std::string, int>>::iterator it=frqs.begin(); it!=frqs.end(); ++it )
{
std::cout << "Saving Samu: "
<< ( std::distance ( frqs.begin(), it ) * 100 ) / frqs.size()
<< "% (frequency table)"
<< std::endl;
samuFile << " "
<< it->first
<< " "
<< it->second.size();
for ( std::map<std::string, int>::iterator itt=it->second.begin(); itt!=it->second.end(); ++itt )
{
samuFile << " "
<< itt->first
<< " "
<< itt->second;
}
}
}
void save ( std::string & fname )
{
std::fstream samuFile ( fname, std::ios_base::out );
save_prcps ( samuFile );
save_frqs ( samuFile );
samuFile.close();
}
void load_prcps ( std::fstream & file )
{
int prcpsSize {0};
file >> prcpsSize;
SPOTriplet t;
for ( int s {0}; s< prcpsSize; ++s )
{
std::cout << "Loading Samu: "
<< ( s * 100 ) / prcpsSize
<< "% (perceptrons)"
<< std::endl;
file >> t;
prcps[t] = new Perceptron ( file );
}
}
void load_frqs ( std::fstream & file )
{
int frqsSize {0};
file >> frqsSize;
int mapSize {0};
SPOTriplet t;
std::string p;
int n;
for ( int s {0}; s< frqsSize; ++s )
{
std::cout << "Loading Samu: "
<< ( s * 100 ) / frqsSize
<< "% (frequency table)"
<< std::endl;
file >> t;
file >> mapSize;
for ( int ss {0}; ss< mapSize; ++ss )
{
file >> p;
file >> n;
frqs[t][p] = n;
}
}
}
void load ( std::fstream & file )
{
load_prcps ( file );
load_frqs ( file );
}
private:
QL ( const QL & );
QL & operator= ( const QL & );
#ifdef Q_LOOKUP_TABLE
double gamma = .2;
#else
double gamma = .2;
#endif
#ifdef Q_LOOKUP_TABLE
std::map<SPOTriplet, std::map<std::string, double>> table_;
#else
std::map<SPOTriplet, Perceptron*> prcps;
#ifdef QNN_DEBUG
double relevance {0.0};
#endif
#endif
std::map<SPOTriplet, std::map<std::string, int>> frqs;
SPOTriplet prev_action;
std::string prev_state;
double prev_reward { -std::numeric_limits<double>::max() };
double prev_image [256*256];
};
#endif