Revision 63d8a43408637eef9a81e05ffd7e6ff3afa51947 authored by Robert B. Gramacy on 20 September 2006, 00:00:00 UTC, committed by Gabor Csardi on 20 September 2006, 00:00:00 UTC
1 parent 622e02d
Raw File
tree.cc
/******************************************************************************** 
 *
 * Bayesian Regression and Adaptive Sampling with Gaussian Process Trees
 * Copyright (C) 2005, University of California
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Questions? Contact Robert B. Gramacy (rbgramacy@ams.ucsc.edu)
 *
 ********************************************************************************/


extern "C" 
{
#include "matrix.h"
#include "gen_covar.h"
#include "all_draws.h"
#include "rand_pdf.h"
#include "rand_draws.h"
#include "lh.h"
#include "dopt.h"
#include "rhelp.h"
}

#include "tree.h"
#include "base.h"
#include "model.h"
#include "params.h"
#include <stdlib.h>
#include <assert.h>
#include <math.h>

// #define DEBUG
#define CPRUNEOP

TREE_OP tree_op;

/*
 * Tree:
 * 
 * the usual class constructor function
 */

Tree::Tree(double **X, int* p, unsigned int n, unsigned int d, 
	   double *Z, Rect *rect, Tree* parent, Model* model)
{
  this->rect = rect;
  this->model = model;

  /* data size */
  this->n = n;
  this->d = d;

  /* data storage */
  this->X = X; 
  this->p = p;
  XX = NULL; 
  pp = NULL;
  nn = 0;
  this->Z = Z;
  
  /* tree pointers */
  leftChild = NULL;
  rightChild = NULL;
  if(parent != NULL) depth = parent->depth+1;
  else depth = 0;
  this->parent = parent;

  /* changepoint (split) variables */
  var = 0; val = 0;

  /* output file for progress printing, and printing level */
  OUTFILE = model->Outfile(&verb);

  /* create the GP model */
  Base_Prior *prior = model->get_params()->BasePrior();
  base = prior->newBase(model);
  base->Init();
}


/*
 * Tree:
 * 
 * duplication constructor function only copies information about X (not XX)
 * then generates XX stuff from rect, and params.  Any "new" variables are i
 * also set to NULL values
 */

Tree::Tree(const Tree *told)
{
  /* simple non-pointer copies */
  d = told->d;
  n = told->n;
  
  /* tree parameters */
  var = told->var; 	
  val = told->val;
  depth = told->depth; 	
  parent = leftChild = rightChild = next = NULL;
  
  /* things that must be NULL 
   * becuase they point to other tree nodes */
  XX = NULL;
  pp = NULL;
  nn = 0;
  
  /* data */
  assert(told->rect); 	rect = new_dup_rect(told->rect);
  assert(told->X); 	X = new_dup_matrix(told->X,n,d);
  assert(told->Z); 	Z = new_dup_vector(told->Z, n);
  assert(told->p);	p = new_dup_ivector(told->p, n); 
  
  /* copy the core GP model: 
   * must pass in the new X and Z values because they 
   * are stored as pointers in the GP module */

  /* there should be a switch statement here, or
     maybe I should use a copy constructor */
  model = told->model;
  base = told->base->Dup(X, Z);
   
  OUTFILE = told->OUTFILE;
  
  /* recurse down the leaves */
  if(! told->isLeaf()) {
    leftChild =  new Tree(told->leftChild);
    rightChild =  new Tree(told->rightChild);
  }
}


/* 
 * ~Tree:
 * 
 * the usual class destructor function
 */

Tree::~Tree(void)
{
  delete base;
  delete_matrix(X);
  if(Z) free(Z);
  if(XX) delete_matrix(XX);
  if(p) free(p);
  if(pp) free(pp);
  if(leftChild) delete leftChild;
  if(rightChild) delete rightChild;
  if(rect) delete_rect(rect);
};



/* 
 * Add_XX:
 * 
 * deal with the new predictive data; figuring out which XX locations
 * (and pp) belong in this partition, return the count of XX determined
 * via matrix_constrained
 */

unsigned int Tree::add_XX(double **X_pred, unsigned int n_pred, unsigned int d_pred)
{
  assert(d_pred == d);
  assert(isLeaf());
  
  /* do not recompute XX if it has already been computed */
  if(XX) { 
    assert(pp); 
    warning("failed add_XX in leaf");
    return 0; 
  }
  
  int *p_pred = new_ivector(n_pred);
  nn = matrix_constrained(p_pred, X_pred, n_pred, d, rect);
  XX = new_matrix(nn, d);
  pp = new_ivector(nn);
  unsigned int k=0;
  for(unsigned int i=0; i<n_pred; i++)
    if(p_pred[i]) { pp[k] = i; dupv(XX[k], X_pred[i], d); k++; }
  free(p_pred);

  return nn;
}


/* 
 * new_XZ:
 * 
 * very similar to add_XX; 
 * delete old X&Z data, add put new X&Z data at this partition
 */

void Tree::new_XZ(double **X_new, double *Z_new, unsigned int n_new, unsigned int d_new)
{
  assert(d_new == d);
  assert(isLeaf());

  /* delete X if it has already been computed */
  assert(X); delete_matrix(X); X = NULL;
  assert(Z); free(Z); Z = NULL;
  assert(p); free(p); p = NULL;
  base->Clear();
  
  int *p_new = new_ivector(n_new);
  n = matrix_constrained(p_new, X_new, n_new, d, rect);
  assert(n > 0);
  X = new_matrix(n, d);
  Z = new_vector(n);
  p = new_ivector(n);
  unsigned int k=0;
  for(unsigned int i=0; i<n_new; i++) {
    if(p_new[i]) { 
      p[k] = i; 
      dupv(X[k], X_new[i], d); 
      Z[k] = Z_new[i];
      k++; 
    }
  }
  free(p_new);
  
  /* recompute for new data */
  Update();
  Compute();
}


/* 
 * new_data:
 * 
 * deal with the new data; figuring out which X locations (and p)
 * belong in this parition, and all partitions below it 
 * (this is a recursive function)
 */

void Tree::new_data(double **X_new, unsigned int n_new, unsigned int d_new, 
		double *Z_new, int *p_new)
{
  assert(d_new == d);
  delete_matrix(X);
  free(Z); free(p);
  Clear();
  
  /* put the new data in the node */
  n = n_new; X = X_new; Z = Z_new; p = p_new;
  
  /* prepare a leaf node*/
  if(isLeaf()) {
    Update();
    Compute();
    return;
  }
  
  /* deal with an internal node */
  assert(leftChild != NULL && rightChild != NULL);
  
  /* find partition indices */
  unsigned int plen, success; 
  double **Xc = NULL; 
  Rect *newRect = NULL;
  double *Zc = NULL;
  int *pnew = NULL; 
  
  /* data for left child */
  success = part_child(LEQ, &Xc, &pnew, &plen, &Zc, &newRect);
  assert(success);
  /* assert that the rectangles are equal */
  delete_rect(newRect);
  leftChild->new_data(Xc, plen, d_new, Zc, pnew);
  
  success = part_child(GT, &Xc, &pnew, &plen, &Zc, &newRect);
  assert(success); /* rectangles must be equal */
  delete_rect(newRect);
  rightChild->new_data(Xc, plen, d_new, Zc, pnew);
}


/*
 * delete_XX:
 *
 * free everything having to do with predictive locations
 */

void Tree::delete_XX(void)
{
  if(XX) delete_matrix(XX);
  if(pp) free(pp);
  pp = NULL;
  XX = NULL;
  base->ClearPred();
  nn = 0;
}


/*
 * predict:
 * 
 * prediction based on the current parameter settings: (predictive variables 
 * recomputed and/or initialised when appropriate)
 */

void Tree::Predict(double *ZZ, double *Zpred, double **Ds2xy, double *Ego, bool err, void *state)
{
  if(!n) warning("n = %d\n", n);
  assert(isLeaf() && n);
  if(Zpred == NULL && nn == 0) return;

  /* set the partition */
  if(nn > 0) base->UpdatePred(XX, nn, d, Ds2xy);

  /* ready the storage for predictions */
    double *z, *zz, **ds2xy, *ego;
  
  /* allocate necessary space for predictions */
  z = zz = NULL;
  if(Zpred) z = new_vector(n);
  if(nn > 0) zz = new_vector(nn);
  assert(z != NULL || zz != NULL);
  
  /* allocate space for Delta-sigma */
  ds2xy = NULL; if(Ds2xy) ds2xy = new_matrix(nn, nn);
  
  /* allocate space for EGO */
  ego = NULL; if(Ego) ego = new_vector(nn);

  /* predict */
  base->Predict(n, nn, z, zz, ds2xy, ego, err, state);
  
  /* copy predictive statistics to the right place in their respective full matrices */
  if(z) { copy_p_vector(Zpred, p, z, n); free(z); }
  if(zz) { copy_p_vector(ZZ, pp, zz, nn); free(zz); }
  if(ds2xy) { add_p_matrix(1.0, Ds2xy, pp, pp, 1.0, ds2xy, nn, nn); delete_matrix(ds2xy); }
  if(ego) { add_p_vector(1.0, Ego, pp, 1.0, ego, nn); free(ego); }

  /* multiple predictive draws predictions would be better fascilited 
   * if the following statement were moved outside this function */
  base->ClearPred();
}


/* 
 * getDepth:
 * 
 * return the node's depth
 */

unsigned int Tree::getDepth(void)
{
  return depth;
}


/*
 * isLeaf:
 * 
 * TRUE if the node is a leaf,
 * FALSE otherwise
 */

bool Tree::isLeaf(void) const
{
  assert(!(leftChild != NULL && rightChild == NULL));
  assert(!(leftChild == NULL && rightChild != NULL));
  if(leftChild == NULL && rightChild == NULL) return true;
  else return false;
}


/*
 * isRoot:
 * 
 * TRUE if the node is the root (parent == NULL),
 * FALSE otherwise
 */

bool Tree::isRoot(void)
{
  if(parent == NULL) return true;
  else return false;
}


/*
 * internals:
 * 
 * get a list of internal (non-leaf) nodes, where the first in
 * list is pointed to by the first pointer, and the last by the 
 * last pointer.  The length of the list is returned.
 */

unsigned int Tree::internals(Tree **first, Tree **last)
{
  if(isLeaf()) {
    *first = *last = NULL;
    return 0;
  }

  Tree *leftFirst, *leftLast, *rightFirst, *rightLast;
  leftFirst = leftLast = rightFirst = rightLast = NULL;
  
  int left_len = leftChild->internals(&leftFirst, &leftLast);
  int right_len = rightChild->internals(&rightFirst, &rightLast);
  
  if(left_len == 0) {
    this->next = rightFirst;
    *first = this;
    if(right_len > 0) {
      *last = rightLast;
      (*last)->next = NULL;
    } else {
      *last = this;
      (*last)->next = NULL;
    }
    return right_len + 1;
  } else {
    leftLast->next = rightFirst;
    this->next = leftFirst;
    *first = this;
    if(right_len == 0) *last = leftLast;
    else *last = rightLast;
    (*last)->next = NULL;
    return left_len + right_len + 1;
  }
}



/*
 * leaves:
 * 
 * get a list of leaf nodes, where the first in list is pointed to by the 
 * first pointer, and the last by the last pointer.  The length of the list 
 * is returned.
 */

unsigned int Tree::leaves(Tree **first, Tree **last)
{
  if(isLeaf()) {
    *first = this;
    *last = this;
    (*last)->next = NULL;
    return 1;
  }
  
  Tree *leftFirst, *leftLast, *rightFirst, *rightLast;
  leftFirst = leftLast = rightFirst = rightLast = NULL;
  
  int left_len = leftChild->leaves(&leftFirst, &leftLast);
  int right_len = rightChild->leaves(&rightFirst, &rightLast);
  
  leftLast->next = rightFirst;
  *first = leftFirst;
  *last = rightLast;
  return left_len + right_len;
}


/*
 * swapable:
 * 
 * get a list of swapable children , where the first in list is pointed to 
 * by the first pointer, and the last by the last pointer. The length of 
 * the list is returned.
 */

unsigned int Tree::swapable(Tree **first, Tree **last)
{
  if(isLeaf()) return 0;
  
  int len;
  Tree *leftFirst, *leftLast, *rightFirst, *rightLast;
  leftFirst = leftLast = rightFirst = rightLast = NULL;
  
  int left_len = leftChild->swapable(&leftFirst, &leftLast);
  int right_len = rightChild->swapable(&rightFirst, &rightLast);
  
  if(left_len == 0)  {
    if(right_len != 0) {
      *first = rightFirst;
      *last = rightLast;
    }
  } else if(right_len == 0) {
    *first = leftFirst;
    *last = leftLast;
  } else {
    assert(leftLast);
    leftLast->next = rightFirst;
    *first = leftFirst;
    *last = rightLast;
  }
  
  len = left_len + right_len;
  if(*last) (*last)->next = NULL;
  
  if(parent != NULL) {
    this->next = *first;
    *first = this;
    if(!(*last)) *last = this;
    len++;
  }
  
  return len;
}


/*
 * prunable:
 * 
 * get a list of prunable nodes, where the first in list is pointed to by the 
 * first pointer, and the last by the last pointer. The length of the list is returned.
 */

unsigned int Tree::prunable(Tree **first, Tree **last)
{
  if(isLeaf()) return 0;
  
  if(leftChild->isLeaf() && rightChild->isLeaf()) {
    *first = this;
    *last = this;
    (*last)->next = NULL;
    return 1;
  }
  
  Tree *leftFirst, *leftLast, *rightFirst, *rightLast;
  leftFirst = leftLast = rightFirst = rightLast = NULL;
  
  int left_len = leftChild->prunable(&leftFirst, &leftLast);
  int right_len = rightChild->prunable(&rightFirst, &rightLast);
  
  if(left_len == 0)  {
    if(right_len == 0) return 0;
    *first = rightFirst;
    *last = rightLast;
    return right_len;
  } else if(right_len == 0) {
    *first = leftFirst;
    *last = leftLast;
    return left_len;
  }
  
  leftLast->next = rightFirst;
  *first = leftFirst;
  *last = rightLast;
  return left_len + right_len;
}


/*
 * swapData:
 * 
 * swap all data between partition
 */

void Tree::swapData(Tree* t)
{
  /* grab the data from the old parent */
  assert(t);
  delete_matrix(X);		
  X = t->X;
  free(p); 			
  p = t->p;
  delete_XX();
  /*if(XX) delete_matrix(XX);*/ 	
  XX = t->XX;
  /*free(pp);*/ 			
  pp = t->pp;
  free(Z); 			
  Z = t->Z;
  delete_rect(rect);		
  rect = t->rect;
  n = t->n;
  nn = t->nn;
  
  /* create the new child data */
  unsigned int plen; 
  double **Xc;
  Rect *newRect;
  double *Zc;
  int *pnew; 
  
  FIND_OP op;
  if(t == rightChild) op = GT;
  else { assert(t == leftChild); op = LEQ; }
  
  assert(part_child(op, &Xc, &pnew, &plen, &Zc, &newRect));
  t->X = Xc;
  t->p = pnew;
  t->Z = Zc;
  t->rect = newRect;
  t->n = plen;
  
  assert(n == leftChild->n + rightChild->n);
  assert(nn == leftChild->nn + rightChild->nn);
  assert(t->n == t->leftChild->n + t->rightChild->n);
  assert(t->nn == t->leftChild->nn + t->rightChild->nn);
}


/* 
 * rotate_right:
 * 
 * rotate this child to the right
 */

void Tree::rotate_right(void)
{
  Tree *pt = this->parent;
  
  /* set the parent of the parent, and the root of the model */
  if(pt->parent != NULL) {
    if(pt->parent->leftChild == pt) pt->parent->leftChild = this;
    else pt->parent->rightChild = this;
  } else {
    assert(model->get_TreeRoot() == pt);
    model->set_TreeRoot(this);
  }
  this->parent = pt->parent;
  
  /* set the children */
  pt->leftChild = this->rightChild;
  pt->leftChild->parent = pt;
  this->rightChild = pt;
  pt->parent = this;
  
  /* take care of DEPTHS */
  (pt->depth)++;
  (this->depth)--;
  (this->leftChild)->adjustDepth(-1);
  (pt->rightChild)->adjustDepth(1);
  assert(pt->depth == this->depth + 1 && pt->depth >= 0);
  if(this->parent) 
    assert(this->depth == this->parent->depth + 1 && this->depth >= 0);
  else assert(this->depth == 0);
  
  /* take care of the DATA */
  this->swapData(pt);
  this->Clear();
  pt->Clear();
}


/* 
 * rotate_left:
 * 
 * rotate this child to the left
 */

void Tree::rotate_left(void)
{
  Tree *pt = this->parent;
  
  /* set the parent of the parent, and the root of the model */
  if(pt->parent != NULL) {
    if(pt->parent->rightChild == pt) pt->parent->rightChild = this;
    else pt->parent->leftChild = this;
  } else { /* this node is the root */
    assert(model->get_TreeRoot() == pt);
    model->set_TreeRoot(this);
  }
  this->parent = pt->parent;
  
  /* set the children */
  pt->rightChild = this->leftChild;
  pt->rightChild->parent = pt;
  this->leftChild = pt;
  pt->parent = this;
  
  /* take care of DEPTHS */
  (pt->depth)++;
  (this->depth)--;
  (this->rightChild)->adjustDepth(-1);
  (pt->leftChild)->adjustDepth(1);
  assert(pt->depth == this->depth + 1 && pt->depth >= 0);
  if(this->parent) 
    assert(this->depth == this->parent->depth + 1 && this->depth >= 0);
  else assert(this->depth == 0);
  
  /* take care of the DATA */
  this->swapData(pt);
  this->Clear();
  pt->Clear();
}


/* 
 * rotate:
 * 
 * attempt to rotate the split point of this INTERNAL node and its parent.
 */

bool Tree::rotate(void *state)
{
  tree_op = ROTATE;
  assert(!isLeaf());
  assert(parent);
  
  /* do the rotation (child becomes root, etc) */
  if(parent->rightChild == this) { /* this node is a rightChild */
    double alpha = pT_rotate(rightChild, parent->leftChild);
    if(runi(state) < alpha) rotate_left();
    else return(false);
  } else { /* this node is a leftChild */
    assert(parent->leftChild == this);
    double alpha = pT_rotate(leftChild, parent->rightChild);
    if(runi(state) < alpha) rotate_right();
    else return(false);
  }
  return(true);
}


/*
 * pT_rotate:
 * 
 * calculate the prior probablilty ratio for a rotate
 * when low and high are swapped
 */

double Tree::pT_rotate(Tree* low, Tree* high)
{
  unsigned int low_ni, low_nl, high_ni, high_nl, i, t_minpart;
  Tree** low_i = low->internalsList(&low_ni);
  Tree** low_l = low->leavesList(&low_nl);
  Tree** high_i = high->internalsList(&high_ni);
  Tree** high_l = high->leavesList(&high_nl);
 
  double t_alpha, t_beta;
  model->get_params()->get_T_params(&t_alpha, &t_beta, &t_minpart);
 
  double pT_log = 0;
  for(i=0; i<low_ni; i++) pT_log += log(t_alpha)-t_beta*log(1.0+low_i[i]->depth);
  for(i=0; i<low_nl; i++) pT_log += log(1-t_alpha*pow(1.0+low_l[i]->depth,0.0-t_beta));
  for(i=0; i<high_ni; i++) pT_log += log(t_alpha)-t_beta*log(1.0+high_i[i]->depth);
  for(i=0; i<high_nl; i++) pT_log += log(1-t_alpha*pow(1.0+high_l[i]->depth,0.0-t_beta));
  
  double pTstar_log = 0;
  for(i=0; i<low_ni; i++) pTstar_log += log(t_alpha)-t_beta*log((double)low_i[i]->depth);
  for(i=0; i<low_nl; i++) pTstar_log += log(1.0-t_alpha*pow((double)low_l[i]->depth,0.0-t_beta));
  for(i=0; i<high_ni; i++) pTstar_log += log(t_alpha)-t_beta*log(2.0+high_i[i]->depth);
  for(i=0; i<high_nl; i++) pTstar_log += log(1.0-t_alpha*pow(2.0+high_l[i]->depth,0.0-t_beta));
  
  free(low_i); free(low_l); free(high_i); free(high_l);
  
  double a = exp(pTstar_log - pT_log); 
  if(a >= 1.0) return 1.0;
  else return a;
}



/* 
 * swap:
 * 
 * attempt to swap the split point of this INTERNAL node and its parent, 
 * while keeping parameters in the lower partitions the same.
 */

bool Tree::swap(void *state)
{
  tree_op = SWAP;
  assert(!isLeaf());
  assert(parent);
  
  if(parent->var == var) {
    bool success =  rotate(state);
    if(success && verb >= 3) 
      myprintf(OUTFILE, "**ROTATE** @depth %d, var=%d, val=%g\n", 
	       depth, var, val);
    return success;
  }
  
  /* save old stuff */
  double parent_val = parent->val;
  int parent_var = parent->var;
  double old_val = val;
  int old_var = var;
  Tree* oldPLC = parent->leftChild;
  Tree* oldPRC = parent->rightChild;
  
  /* swapped tree */
  parent->val = old_val; val = parent_val;
  parent->var = old_var; var = parent_var;
  
  /* re-build the current child */
  parent->leftChild = parent->rightChild = NULL;
  bool success = parent->grow_children();
  assert(success);
  
  /* continue with new left and right children */
  success = parent->leftChild->match(oldPLC, state);
  if(parent->try_revert(success, oldPLC, oldPRC, parent_var, parent_val))
    { val = old_val; var = old_var; return false; }
  success = parent->rightChild->match(oldPRC, state);
  if(parent->try_revert(success, oldPLC, oldPRC, parent_var, parent_val))
    { val = old_val; var = old_var; return false; }
  
  /* posterior probabilities and acceptance ratio */
  assert(oldPRC->leavesN() + oldPLC->leavesN() == parent->leavesN());
  double pklast = oldPRC->leavesPosterior() + oldPLC->leavesPosterior();
  double pk = parent->leavesPosterior();
  double alpha = exp(pk-pklast);
  if(alpha > 1) alpha = 1;
  
  /* accept or reject? */
  if(runi(state) <= alpha) {
    /* if(verb >= 1) 
      myprintf(OUTFILE, "**SWAP** @depth %d: [%d,%g] <-> [%d,%g]\n", 
      depth, var, val, parent->var, parent->val);*/
    if(oldPRC) delete oldPRC;
    if(oldPRC) delete oldPLC;
    return true;
  } else {
    parent->try_revert(false, oldPLC, oldPRC, parent_var, parent_val);
    val = old_val; var = old_var;
    return false;
  }
}


/* 
 * change:
 * 
 * attempt to move the split point of an INTERNAL node.
 * keeping parameters in the lower partitions the same.
 */

bool Tree::change(void *state)
{
  tree_op = CHANGE;
  assert(!isLeaf());
  
  /* save old tree */
  double old_val = val;
  val = propose_val(state);
  Tree* oldLC = leftChild;
  Tree* oldRC = rightChild;
  leftChild = rightChild = NULL;
  
  /* new left child */
  unsigned int success = grow_child(&leftChild, LEQ);
  if(try_revert((bool)success && leftChild->wellSized(),
		oldLC, oldRC, var, old_val)) return false;
  /* new right child */
  success = grow_child(&rightChild, GT);
  if(try_revert((bool)success && rightChild->wellSized(),
		oldLC, oldRC, var, old_val)) return false;
  
  /* continue with new left and right children */
  success = leftChild->match(oldLC, state);
  if(try_revert(success, oldLC, oldRC, var, old_val)) return false;
  success = rightChild->match(oldRC, state);
  if(try_revert(success, oldLC, oldRC, var, old_val)) return false;
  
  /* posterior probabilities and acceptance ratio */
  assert(oldLC->leavesN() + oldRC->leavesN() == this->leavesN());
  double pklast = oldLC->leavesPosterior() + oldRC->leavesPosterior();
  double pk = leavesPosterior();
  double alpha = exp(pk-pklast);
  if(alpha > 1) alpha = 1;
  
  /* accept or reject? */
  if(runi(state) <= alpha) { /* accept */
    if(oldLC) delete oldLC;
    if(oldRC) delete oldRC;
    if(tree_op == CHANGE && verb >= 4) 
      myprintf(OUTFILE, "**CHANGE** @depth %d: var=%d, val=%g->%g, n=(%d,%d)\n", 
	       depth, var, old_val, val, leftChild->n, rightChild->n);
    else if(tree_op == CPRUNE && verb >= 1)
      myprintf(OUTFILE, "**CPRUNE** @depth %d: var=%d, val=%g->%g, n=(%d,%d)\n", 
	       depth, var, old_val, val, leftChild->n, rightChild->n);
    return true;
  } else { /* reject */
    try_revert(false, oldLC, oldRC, var, old_val);
    return false;
  }
}


/* 
 * match:
 * 
 * match the parameters of oldT with new partition
 * induced by THIS tree
 */

bool Tree::match(Tree* oldT, void *state)
{
  assert(oldT);
  
  if(oldT->isLeaf()) {
    base->Match(oldT->base);
    return true;
  } else {
    var = oldT->var;
    val = oldT->val;
    Clear();
    bool success = grow_children();
    if(success) { 
      success = leftChild->match(oldT->leftChild, state);
      if(!success) return false;
      success = rightChild->match(oldT->rightChild, state);
      if(!success) return false;
    } else { 
      if(tree_op != CHANGE) return false;
      
#ifdef CPRUNEOP
      /* growing failed becuase of <= MINPART, try CPRUNE */
      tree_op = CPRUNE;
      if(!oldT->rightChild->isLeaf()) return match(oldT->rightChild, state);
      else if(!oldT->leftChild->isLeaf()) return match(oldT->leftChild, state);
      else {
	if(runi(state) > 0.5) assert(match(oldT->leftChild, state));
	else assert(match(oldT->rightChild, state));
	return true;
      }
#endif
    }
	}
  return true;
}


/*
 * try_revert:
 * 
 * revert children and changepoint back to the way they were
 */

bool Tree::try_revert(bool success, Tree* oldLC, Tree* oldRC, 
		int old_var, double old_val)
{
  if(!success) {
    val = old_val;
    var = old_var;
    if(leftChild) delete leftChild;
    if(rightChild) delete rightChild;
    leftChild = oldLC;
    rightChild = oldRC;
    assert(leftChild && rightChild);
    return true;
  } else {
    return false;
  }
}


/*
 * propose_val:
 * 
 * given the old var/val pair, propose a new one 
 */

double Tree::propose_val(void *state)
{
  double min, max;
  Tree* root = model->get_TreeRoot();
  double **locs = root->X;
  unsigned int N = root->n;
  min = 1e300*1e300;
  max = -1e300*1e300;
  for(unsigned int i=0; i<N; i++) {
    double Xivar = locs[i][var];
    if(Xivar > val && Xivar < min) min = Xivar;
    else if(Xivar < val && Xivar > max) max = Xivar;
  }
  assert(val != min && val != max);
  
  double alpha = runi(state);
  
  if(alpha < 0.5) return min;
  else return max;
}

/*
 * leavesPosterior:
 * 
 * get the posterior probability of all 
 * leaf children of this node
 */

double Tree::leavesPosterior(void)
{
  Tree *first, *last;
  int numLeaves = leaves(&first, &last);
  assert(numLeaves > 0);
  double p = 0;
  while(first) {
    p += first->Posterior();
    first = first->next;
  }
  return p;
}


/*
 * leavesN:
 * 
 * get the partition sizes (n) at all
 * leaf children of this node
 */

unsigned int Tree::leavesN(void)
{
  Tree *first, *last;
  int numLeaves = leaves(&first, &last);
  assert(numLeaves > 0);
  unsigned int N = 0;
  while(first) {
    N += first->n;
    first = first->next;
  }
  return N;
}



/* 
 * prune:
 * 
 * attempt to remove both children of this PRUNABLE node by deterministically 
 * combining the D and NUGGET parameters of its children.
 */

bool Tree::prune(double ratio, void *state)
{
  tree_op = PRUNE;
  double q_bak, p_log, pk, pklast, alpha;
  
  /* sane prune ? */
  assert(leftChild && leftChild->isLeaf());
  assert(rightChild && rightChild->isLeaf());
  
  /* get the marginalized posterior of the current
   * leaves of this PRUNABLE node*/
  pklast = leavesPosterior();
  
  /* compute the backwards CHANGE probability */
  q_bak = split_prob();
  p_log = 0.0 - log((double) (model->get_TreeRoot())->n);
  
  /* compute corr and p(Delta_corr) for corr1 and corr2 */
  base->Combine(leftChild->base, rightChild->base, state);
  
  /* create covariance matrix, and compute posterior of new tree */
  Update();
  Compute();
  pk = this->Posterior();
  assert(n == leftChild->n + rightChild->n);
  assert(nn == leftChild->nn + rightChild->nn);
  
  /* prior ratio and acceptance ratio */
  alpha = ratio*exp(q_bak+pk-pklast-p_log);
  if(alpha > 1) alpha = 1;
  
  /* accept or reject? */
  if(runi(state) <= alpha) {
    if(verb >= 1) myprintf(OUTFILE, "**PRUNE** @depth %d: [%d,%g]\n", depth, var, val);
    delete leftChild; 
    delete rightChild;
    leftChild = rightChild = NULL;
    base->ClearPred();
    return true;
  } else {
    return false;
  }
}


/* 
 * grow:
 * 
 * attempt to add two children to this LEAF node by randomly choosing 
 * splitting criterion, along new d and nugget parameters
 */

bool Tree::grow(double ratio, void *state)
{
  tree_op = GROW;
  bool success;
  double q_fwd, pStar_log, pk, pklast, alpha;
  
  /* sane grow ? */
  assert(isLeaf());	
  
  /* propose the next tree, by choosing the split point */
  switch(base->BaseModel()){
  case GP: var = sample_seq(0, d-1, state);
    break;
  case MR_GP: var = sample_seq(1, d-1, state);
    break;
  default: error("bad base model in tree:grow, default to GP");
    var = sample_seq(0, d-1, state);
  }
  
  val = propose_split(&q_fwd, state);
  pStar_log = 0.0 - log((double) (model->get_TreeRoot())->n);
  
  /* grow the children; stop if partition too small */
  success = grow_children();
  if(!success) return false;
  
 
  /* propose new correlation paramers for the new leaves */
  base->Split(leftChild->base, rightChild->base, state);

  /* marginalized posteriors and acceptance ratio */
  pk = leftChild->Posterior() + rightChild->Posterior();
  pklast = this->Posterior();
  alpha = ratio*exp(pk-pklast+pStar_log)/q_fwd;

  if(alpha > 1) alpha = 1;
  
 

  /* accept or reject? */
  bool ret_val = true;
  if(runi(state) > alpha) {
    delete leftChild;
    delete rightChild;
    leftChild = rightChild = NULL;
    ret_val =  false;
  } else {
    Clear();
    if(verb >= 1) 
      myprintf(OUTFILE, "**GROW** @depth %d: [%d,%g], n=(%d,%d)\n", 
	       depth, var, val, leftChild->n, rightChild->n);
  }
  return ret_val;
}


/*
 * grow_children:
 * 
 * grow both left and right children based on splitpoint
 */

bool Tree::grow_children(void)
{
  unsigned int suc1 = grow_child(&leftChild, LEQ);
  if(!suc1 || !(leftChild->wellSized())) {
    if(leftChild) delete leftChild;
    leftChild = NULL;
    assert(rightChild == NULL);
    return false;
  }
  unsigned int suc2 = grow_child(&rightChild, GT);
  if(!suc2 || !(rightChild->wellSized())) {
    delete leftChild;
    if(rightChild) delete rightChild;
    leftChild = rightChild = NULL;
    return false;
  }
  assert(suc1 + suc2 == n);
  assert(leftChild->nn + rightChild->nn == nn);
  return true;
}


/*
 * part_child:
 * 
 * creates the data according to the current partition
 * the current var and val parameters, and the operation "op"
 */

int Tree::part_child(FIND_OP op, double ***Xc, int **pnew, unsigned int *plen,
		     double **Zc, Rect **newRect)
{
  unsigned int i,j;
  int *pchild = find_col(X, n, var, op, val, plen);
  if(*plen == 0) return 0;
  
  /* partition the data and predictive locations */
  *Xc = new_matrix(*plen,d);
  *Zc = new_vector(*plen); 
  *pnew = new_ivector(*plen);
  for(i=0; i<d; i++) for(j=0; j<*plen; j++) (*Xc)[j][i] = X[pchild[j]][i];
  for(j=0; j<*plen; j++) {
    (*Zc)[j] = Z[pchild[j]];
    (*pnew)[j] = p[pchild[j]];
  }
  if(pchild) free(pchild); 
  
  /* record the boundary of this partition */
  *newRect = new_rect(d);
  for(unsigned int i=0; i<d; i++) {
    (*newRect)->boundary[0][i] = rect->boundary[0][i];
    (*newRect)->boundary[1][i] = rect->boundary[1][i];
    (*newRect)->opl[i] = rect->opl[i];
    (*newRect)->opr[i] = rect->opr[i];
  }
  if(op == LEQ) { 
    (*newRect)->opr[var] = op;
    (*newRect)->boundary[1][var] = val; 
  }
  else { 
    (*newRect)->opl[var] = op;
    assert(op == GT); (*newRect)->boundary[0][var] = val; 
  }
  
  return (*plen);
}


/*
 * grow_child:
 * 
 * based on current val and var variables, create the corresponding 
 * leftChild partition returns the number of points in the grown region
 */

unsigned int Tree::grow_child(Tree** child, FIND_OP op)
{
  assert(!(*child));
	
  /* find partition indices */
  unsigned int plen; 
  double **Xc = NULL; 
  Rect *newRect = NULL;
  double *Zc = NULL;
  int *pnew = NULL; 
  
  unsigned int success = part_child(op, &Xc, &pnew, &plen, &Zc, &newRect);
  if(success == 0) return success;
  
  /* grow the Child */
  (*child) = new Tree(Xc, pnew, plen, d, Zc, newRect, this, model);
  return plen;
}


#ifdef DONTDOTHIS
/*
 * val_order_probs:
 * 
 * compute the discrete probability distribution over valid 
 * changepoint locations (UNIFORM)
 */

void Tree::val_order_probs(double **Xo, double **probs,
	unsigned int var, double **rX, unsigned int rn)
{
  unsigned int i;
  *Xo = new_vector(rn); 
  *probs = new_vector(rn);
  for(i=0; i<rn; i++) {
    (*Xo)[i] = rX[i][var];
    (*probs)[i] = 1.0/(rn); 
  }	 
}	 
#endif

//#ifdef DONTDOTHIS
/*
 * val_order_probs:
 *      
 * compute the discrete probability distribution over valid     
 * changepoint locations (TRIANGULAR)   
 */

void Tree::val_order_probs(double **Xo, double **probs,
			   unsigned int var, double **rX, unsigned int rn)
{
  unsigned int i;
  double mid = (rect->boundary[1][var] + rect->boundary[0][var]) / 2;
  double *XmMid = new_vector(rn); 
  *Xo = new_vector(rn); 
  for(i=0; i<rn; i++) {
    double diff = rX[i][var] - mid;
    XmMid[i] = (diff)*(diff);
  }
  int *o = order(XmMid, rn);
  for(i=0; i<rn; i++) (*Xo)[i] = rX[o[i]-1][var];
  *probs = new_vector(rn); 
  int * one2n = iseq(1,rn);
  double sum_left, sum_right;
  sum_left = sum_right = 0;
  for(i=0; i<rn; i++) { 
    (*probs)[i] = 1.0/one2n[i];
    if((*Xo)[i] < mid) sum_left += (*probs)[i]; 
    else sum_right += (*probs)[i];
  }
  double mult;
  if(sum_left > 0 && sum_right > 0) mult = 0.5;
  else mult = 1.0;
  for(i=0; i<rn; i++) { 
    if((*Xo)[i] < mid) (*probs)[i] = mult * (*probs)[i]/sum_left; 
    else (*probs)[i] = mult * (*probs)[i]/sum_right;
  }
  free(one2n);
  free(o);
  free(XmMid);
}
//#endif


/* 
 * propose_split:
 * 
 * draw a new split point for the current var-dimension
 */

double Tree::propose_split(double *p, void *state)
{
  double *Xo, *probs;
  double **locs;
  double val;
  unsigned int indx, N;
  Tree* root = model->get_TreeRoot();
  locs = root->X;
  N = root->n;
  val_order_probs(&Xo, &probs, var, locs, N);
  dsample(&val, &indx, 1, N, Xo, probs, state);
  *p = probs[indx];
  free(Xo); free(probs);
  return val;
}


/* 
 * split_prob:
 * 
 * compute the probability of the current split point
 * returns the log probability
 */

double Tree::split_prob()
{
  double *Xo, *probs; 
  double **locs;
  double p;
  unsigned int find_len, N;
  Tree* root = model->get_TreeRoot();
  locs = root->X;
  N = root->n;
  val_order_probs(&Xo, &probs, var, locs, N);
  int *indx = find(Xo, N, EQ, val, &find_len);
  assert(find_len >= 1 && indx[0] >= 0);
  p = log(probs[indx[0]]);
  free(Xo); free(probs); free(indx);
  return p;
}


/* 
 * getN:
 * 
 * return the number of input locations, N
 */

unsigned int Tree::getN(void)
{
  return n;
}



/* 
 * getNN:
 * 
 * return the number of predictive locations locations, NN
 */

unsigned int Tree::getNN(void)
{
  return nn;
}


/*
 * adjustDepth:
 * 
 * auto increment or decrement the depth of
 * a node (and its children) by int "a"
 */

void Tree::adjustDepth(int a)
{
  if(leftChild) leftChild->adjustDepth(a);
  if(rightChild) rightChild->adjustDepth(a);
  depth += a;
  assert(depth >= 0);
}


/* 
 * swapableList:
 * 
 * get an array containing the internal nodes of the tree t
 */

Tree** Tree::swapableList(unsigned int* len)
{
  Tree *first, *last;
  first = last = NULL;
  *len = swapable(&first, &last);
  if(*len == 0) return NULL;
  return first->buildTreeList(*len);
}



/* 
 * internalsList:
 * 
 * get an array containing the internal nodes of the tree t
 */

Tree** Tree::internalsList(unsigned int* len)
{
  Tree *first, *last;
  first = last = NULL;
  *len = internals(&first, &last);
  if(*len == 0) return NULL;
  return first->buildTreeList(*len);
}


/* 
 * leavesList:
 * 
 * get an array containing the leaves of the tree t
 */

Tree** Tree::leavesList(unsigned int* len)
{
  Tree *first, *last;
  first = last = NULL;
  *len = leaves(&first, &last);
  if(*len == 0) return NULL;
  return first->buildTreeList(*len);
}


/* 
 * prunableList:
 * 
 * get an array containing the prunable nodes of the tree t
 */

Tree** Tree::prunableList(unsigned int* len)
{
  Tree *first, *last;
  first = last = NULL;
  *len = prunable(&first, &last);
  if(*len == 0) return NULL;
  return first->buildTreeList(*len);
}


/* 
 * numLeaves:
 * 
 * get a count of the number of leaves in the tree t
 */

unsigned int Tree::numLeaves(void)
{
  Tree *first, *last;
  first = last = NULL;
  int len = leaves(&first, &last);
  return len;
}


/* 
 * numPrunable:
 * 
 * get a count of the number of prunable nodes of the tree t
 */

unsigned int Tree::numPrunable(void)
{
  Tree *first, *last;
  first = last = NULL;
  int len = prunable(&first, &last);
  return len;
}


/*
 * buildTreeList:
 * 
 * takes a pointer to the first element of a Tree list and a 
 * length parameter and builds an array style list
 */

Tree** Tree::buildTreeList(unsigned int len)
{
  unsigned int i;
  Tree* first = this;
  Tree** list = (Tree**) malloc(sizeof(Tree*) * (len));
  for(i=0; i<len; i++) {
    assert(first);
    list[i] = first;
    first = first->next;
  }
  return list;
}


/*
 * PrintTree:
 * 
 * print the tree out to the file in depth first order
 * -- the R CART tree structure format
 *  rect and scale are for unnnormalization of split point
 */

void Tree::PrintTree(FILE* outfile, double** rect, double scale, int root)
{
  if(isLeaf()) myprintf(outfile, "%d\t <leaf>\t", root);
  else myprintf(outfile, "%d\t %d\t ", root, var);
  myprintf(outfile, "%d\t 0\t %.4f\t ", n, base->Var());
  if(isLeaf()) {
    myprintf(outfile, "\"\"\t \"\"\t\n");
    return;
  }
  
  /* unnormalize the val */
  double vn = val / scale;
  vn = (rect[1][var] - rect[0][var])*vn + rect[0][var];
  
  myprintf(outfile, "\"<%-5g\"\t \">%-5g\"\t\n", vn, vn);
  leftChild->PrintTree(outfile, rect, scale, 2*root);
  rightChild->PrintTree(outfile, rect, scale, 2*root+1);
}


/*
 * dopt_from_XX:
 * 
 * return the indices of N d-optimal draws from XX (of size nn);
 */

unsigned int* Tree::dopt_from_XX(unsigned int N, void *state)
{
  assert(N <= nn);
  assert(XX);
  int *fi = new_ivector(N); 
  double ** Xboth = new_matrix(N+n, d);
  // dopt(Xboth, fi, X, XX, d, n, nn, N, d, nug, state);
  dopt(Xboth, fi, X, XX, d, n, nn, N, DOPT_D(d), DOPT_NUG(), state);
  unsigned int *fi_ret = new_uivector(N); 
  for(unsigned int i=0; i<N; i++) {
    fi_ret[i] = pp[fi[i]-1];
    for(unsigned int j=0; j<d; j++)
      assert(Xboth[n+i][j] == XX[fi[i]-1][j]);
  }
  free(fi);
  delete_matrix(Xboth);
  return fi_ret;
}


/*
 * wellSized:
 * 
 * return true if this node (leaf) is well sized (nonzero 
 * area and > t_minp points in the partition)
 */

bool Tree::wellSized(void)
{
  // return  (n >= *t_minp) && (Area() > 0) && (!Singular());
  return  (n >= model->get_params()->T_minp()) && (Area() > 0) && (!Singular());
}


/*
 * Singular:
 * 
 * return true return true iff X has a column
 * with all the same value.
 */

bool Tree::Singular(void)
{
  assert(X);
  unsigned int mn=0;
  if(base->BaseModel()==MR_GP) mn=1;

  for(unsigned int i=mn; i<d; i++) {
    double f = X[0][i];
    unsigned int j = 0;
    for(j=1; j<n; j++) if(f != X[j][i]) break;
    if(j == n) return true;
  }
  return false;
}


/*
 * Area:
 * 
 * return the area of this partition
 */

double Tree::Area(void)
{
  return rect_area(rect);
}


/*
 * GetRect:
 * 
 * return a pointer to the rectangle associated with this partition
 */

Rect* Tree::GetRect(void)
{
  return rect;
}


/*
 * get_pp:
 * 
 * return indices into the XX array
 */

int* Tree::get_pp(void)
{
  return pp;
}


/*
 * get_XX:
 * 
 * return the predictive data locations: XX
 */

double** Tree::get_XX(void)
{
  return XX;
}


/*
 * get_X:
 * 
 * return the data locations: X
 */

double** Tree::get_X(void)
{
  return X;
}


/*
 * get_Z:
 * 
 * return the data responses: Z
 */

double* Tree::get_Z(void)
{
  return Z;
}


/*
 * cut_branch:
 * 
 * cut the children (recursively) from the tree
 */

void Tree::cut_branch(void)
{
  if(!isLeaf()) {
    assert(leftChild != NULL && rightChild != NULL);
    delete leftChild;
    delete rightChild;
    leftChild = rightChild = NULL;
  }
  base->ClearPred();
  base->Init();
  Update();
  Compute();
}


/*
 * Outfile:
 * 
 * set outfile handle
 */

void Tree::Outfile(FILE *file, int verb)
{
  OUTFILE = file;
  this->verb = verb;
  if(leftChild) leftChild->Outfile(file, verb);
  if(rightChild) rightChild->Outfile(file, verb);
}


/* 
 * Height:
 *
 * compute the height of the the tree
 */

unsigned int Tree::Height(void)
{
  if(isLeaf()) return 1;
  
  unsigned int lh = leftChild->Height();
  unsigned int rh = rightChild->Height();
  if(lh > rh) return 1 + lh;
  else return 1 + rh;
}


/*
 * FullPosterior:
 *
 * Calculate the full posterior of the tree
 */

double Tree::FullPosterior(double alpha, double beta)
{
  double post;

  if(isLeaf()) {

    /* probability of not growing this branch */
    post = log(1.0 - alpha*pow(1.0+depth,0.0-beta));

    /* probability of the base model at this leaf */
    post += base->FullPosterior();

  } else {
    
    /* probability of growing here */
    post = log(alpha) - beta*log(1.0 + depth);

    /* probability of the children */
    post += leftChild->FullPosterior(alpha, beta);
    post += rightChild->FullPosterior(alpha, beta);

  }

  return post;
}


/*
 * Update:
 *
 * calls the GP function of the same name with
 * the data for this tree in th is partition
 */

void Tree::Update(void)
{
  base->Update(X, n, d, Z);
}


/*
 * Compute:
 *
 * do necessary computations the (GP) model at this 
 * node in the tree
 */

void Tree::Compute(void)
{
  assert(base);
  base->Compute();
}


/*
 * State:
 *
 * return string state information from the (GP) model
 * at this node in the tree
 */

char* Tree::State(void)
{
  assert(base);
  return base->State();
}


/*
 * Draw:
 *
 * draw from all of the conditional posteriors of the model(s)
 * (e.g. GP) attached to this leaf node
 */

bool Tree::Draw(void *state)
{
  assert(base);
  assert(isLeaf());
  return base->Draw(state);
}


/*
 * Clear:
 *
 * call the model (e.g. GP) clear function
 */

void Tree::Clear(void)
{
  base->Clear();
}


/*
 * ToggleLinear:
 *
 * make adjustments to toggle to the (limiting) linear
 * model (right now, this only makes sense for the
 * GP LLM)
 */

void Tree::ToggleLinear(void)
{
   if(! base->Linear()) {
      base->ToggleLinear();   
    } else {
      Update();
      Compute();
    }
}


/*
 * Linarea:
 *
 * get statistics from the model (e.g. GP) for calculating
 * the area of the domain under the LLM
 */

bool Tree::Linarea(unsigned int *sum_b, double *area)
{
    *sum_b = base->sum_b();
    *area = Area();
    return base->Linear();
}


/* 
 * Base:
 *
 * return the base model (e.g. gp)
 */

Base* Tree::GetBase(void)
{
  return base;
}
 

/*
 * Posterior:
 *
 * check to make sure the model (e.g., GP) is up to date
 * -- has correct data size --, if not then Update it,
 * and then copute the posterior pdf
 */

double Tree::Posterior(void)
{
  unsigned int basen = base->N();
  if(basen == 0) {
    Update();
    Compute();
  } else assert(basen == n);

  return base->Posterior();
}


/*
 * Trace:
 *
 * gathers trace statistics from the Base model
 * and writes them out to the specified file
 */

void Tree::Trace(unsigned int index, FILE* XXTRACEFILE)
{
  double *trace;
  unsigned int len;
  
  /* sanity checks */
  assert(XXTRACEFILE);
  if(!pp) return;

  /* get the trace */
  trace = base->Trace(&len);

  /* write to the XX trace file */
  for(unsigned int i=0; i<nn; i++) {
    myprintf(XXTRACEFILE, "%d %d ", pp[i]+1, index+1);
    printVector(trace, len, XXTRACEFILE);
  }

  /* discard the trace */
  if(trace) free(trace);
}
back to top