https://github.com/jttoivon/MODER
Raw File
Tip revision: c485231e5b468ae509306e1aaeebaa0f3004572d authored by Jarkko Toivonen on 31 March 2020, 18:10:18 UTC
Fixed indexing bug.
Tip revision: c485231
moder.cpp
/*

    MODER is a program to learn DNA binding motifs from SELEX datasets.
    Copyright (C) 2016, 2017  Jarkko Toivonen,
    Department of Computer Science, University of Helsinki

    MODER is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    MODER is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

*/

///////////////////////////////
//
// (c) Jarkko Toivonen
//
// email: jarkko.toivonen@cs.helsinki.fi
//
///////////////////////////////

#define TIMING

#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "unknown"
#endif

#include "iupac.hpp"
#include "matrix.hpp"
#include "vectors.hpp"
#include "timing.hpp"
#include "matrix_tools.hpp"
#include "common.hpp"
#include "probabilities.hpp"
#include "parameters.hpp"
#include "orientation.hpp"
#include "kmer_tools.hpp"
#include "multinomial_helper.hpp"
#include "suffix_array_wrapper.hpp"

#include <sys/stat.h>
#include <cstdio>
#include <cmath>
#include <ctime>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <fenv.h>

#include <libgen.h>   // for dirname and basename

#ifdef _OPENMP
#include <omp.h>
#endif

#include <string>
#include <iostream>
#include <fstream>
#include <numeric>
#include <queue>
#include <cfloat>

#include <boost/algorithm/string.hpp>
#include <boost/foreach.hpp>
#include <boost/tuple/tuple.hpp>
#include <boost/program_options.hpp>
namespace po = boost::program_options;

// clang does not seem to be able to handle 128 bit type (long double)
// in openmp constructs
#if defined(__clang__)
typedef double FloatType;
#else
typedef long double FloatType;
#endif

typedef boost::tuple<int,int> cob_combination_t;

typedef boost::multi_array<dmatrix, 2> cob_of_matrices;




bool use_palindromic_correction=false;
bool use_multimer=true;
bool use_meme_init=false;
bool get_full_flanks=false;  // make motifs for each model that have width 2*L-motif_width
bool use_rna = false;

int max_iter = 50;  // was 300
int minimum_distance_for_learning = 4;
int global_dmax = 10;
//int global_max_dist_for_deviation = -1;
//int global_max_dist_for_deviation = 4;
int global_max_dist_for_deviation = 1000;

double ic_threshold = 0.40;
double learning_fraction = 0.02;

int character_count=0;
int digram_count=0;

std::vector<double> background_frequencies(4);
std::vector<double> background_probabilities(4);
dmatrix background_frequency_matrix(4,4);   // for Markov background
dmatrix background_probability_matrix(4,4); // for Markov background


prior<double> pseudo_counts;

double cob_cutoff = 0.001;  // if an element in a cob table is smaller than this constant,
                           // the element is excluded. Note! This works for spaced dimers as well

bool adjust_seeds = true;
bool use_multinomial=true;
bool local_debug = true;
bool extra_debug = false;   // Even more printing
bool allow_extension = false;
bool use_dimers = true;
bool seeds_given = false;
bool no_unique = false;
bool use_output = false; // whether to write model parameters to files
bool maximize_overlapping_seeds=true;
bool require_directional_seed = false;
bool avoid_palindromes = false; // If tf1==tf2, the orientation is HH or TT and the PPM \tau_ht1,ht2,o,d, the
                                // probability of a sequence is the same in both directions.
                                // If we recognize this situation, we can try to escape from this palindromicity
                                // by using temporarily only a single strand.
int hamming_distance_overlapping_seeds_N=0;
int hamming_distance_overlapping_seeds_OR=2;
int hamming_radius = 1; // For learning the model from sequences in the hamming neighbourhood of the seed

std::string unbound; // Filename where to store the sequences that had greatest probability under background model
std::vector<std::string> names;
std::string outputdir = ".";

typedef std::string (*combine_seeds_func_ptr)(const std::string& seed1, const std::string& seed2, int d);

enum { COMBINE_SEEDS_OR=0, COMBINE_SEEDS_AND=1, COMBINE_SEEDS_N=2};
int default_combine_method = COMBINE_SEEDS_N;
combine_seeds_func_ptr combine_seeds_func;

struct combine_seeds_method_type {
  const char* name;
  combine_seeds_func_ptr function;
};

std::string
combine_seeds_OR(const std::string& seed1, const std::string& seed2, int d);

std::string
combine_seeds_AND(const std::string& seed1, const std::string& seed2, int d);

std::string
combine_seeds_masked(const std::string& seed1, const std::string& seed2, int d);

combine_seeds_method_type combine_seeds_methods[] =
  {
    { "UNION", combine_seeds_OR},
    { "INTERSECTION", combine_seeds_AND},
    { "N", combine_seeds_masked}
  };

template <typename T>
class BinOp
{
public:
  typedef const T& (*type)(const T&, const T&);
};


//template <typename T, typename BinaryOperation>
template <typename T>
class myaccumulate
{
public:
  typedef typename BinOp<T>::type BinaryOperation;

  typedef const T& (*func_ptr)(const T&, const T&);


  myaccumulate(T init, BinaryOperation op_) : result(init), op(op_) {}

  void 
  operator()(T v)
  {
    result = op(result, v);
  }

  T get() const { return result; }

  void
  reset(T t = T())
  { result=t; }

private:
  T result;
  BinaryOperation op;
};

std::string
combine_seeds_OR(const std::string& seed1, const std::string& seed2, int d)
{
  assert(d < 0);
  int k1 = seed1.length();
  int k2 = seed2.length();
  std::string result(k1 + k2 + d, '-');

  for (int i=0; i < k1 + d; ++i)
    result[i] = seed1[i];

  for (int i=k1+d; i < k1; ++i)
    result[i] = iupac_class.bits_to_char(iupac_class.char_to_bits(seed1[i]) | iupac_class.char_to_bits(seed2[i-k1-d]));
  for (int i=k1; i < k1+k2+d; ++i)
    result[i] = seed2[i-k1-d];

  return result;
}


std::string
combine_seeds_AND(const std::string& seed1, const std::string& seed2, int d)
{
  assert(d < 0);
  int k1 = seed1.length();
  int k2 = seed2.length();
  std::string result(k1 + k2 + d, '-');

  for (int i=0; i < k1 + d; ++i)
    result[i] = seed1[i];

  for (int i=k1+d; i < k1; ++i)
    result[i] = iupac_class.bits_to_char(iupac_class.char_to_bits(seed1[i]) & iupac_class.char_to_bits(seed2[i-k1-d]));
  for (int i=k1; i < k1+k2+d; ++i)
    result[i] = seed2[i-k1-d];

  return result;
}

std::string
combine_seeds_masked(const std::string& seed1, const std::string& seed2, int d)
{
  assert(d < 0);
  int k1 = seed1.length();
  int k2 = seed2.length();
  std::string result(k1 + k2 + d, 'N');
  int x = 0; // Make the gap wider by x from both sides
  for (int i=0; i < k1 + d - x; ++i)   // left flank
    result[i] = seed1[i];

  for (int i=k1 + x; i < k1+k2+d; ++i) // right flank
    result[i] = seed2[i-k1-d];

  return result;
}



// gives descending order on the first position of the triple
class comp_first_desc
{
public:

  bool operator()(boost::tuple<double, int, int> x, 
		  boost::tuple<double, int, int> y) const {
    return boost::get<0>(x) > boost::get<0>(y);
  }

};


class gapped_kmer_context
{
public:

  std::string
  make_string(const std::vector<std::string>& sequences)
  {
    std::string str1=join(sequences, '#');
    if (use_two_strands) {
      str1.append("#");
      str1 += join_rev(sequences, '#');
    }

    return str1;
  }

  gapped_kmer_context(const std::vector<std::string>& sequences) : sa(make_string(sequences), use_rna)
  {
    timer = 0.0;
  }


  unsigned long int
  count(const std::string& pattern) const
  {
    return sa.count_iupac(pattern);
  }

  // Finds a string with maximum count from a set of strings.
  // The set of strings is specified by an iupac sequence 's'
  boost::tuple<std::string,int>
  max(const std::string& s) const
  {
    TIME_START(s);
    int k = s.length();
    assert(is_iupac_string(s));

    //int max_count = 0;
    std::string arg_max=s;

    std::vector<int> v(k, 0);  // helper array

    std::vector<int> base(k, 0);
    //    std::vector<const char*> iupac_classes(k);
    std::vector<std::string> iupac_classes(k);
    std::string pattern(k, '-');
    int number_of_combinations = 1;
    for (int i=0; i < k; ++i) {
      iupac_classes[i] = iupac_class(s[i]);
      if (use_rna)
	std::replace(iupac_classes[i].begin(), iupac_classes[i].end(), 'T', 'U');
      int size = iupac_classes[i].length();
      base[i] = size - 1;
      pattern[i] = iupac_classes[i][0];            // Initialize pattern to the first sequence of iupac string
      number_of_combinations *= size;
    }
    v[k-1]=-1;  // Initialize
    // The loop goes through nucleotide sequences that belong to
    //given iupac sequence in alphabetical order
    std::multimap<unsigned long int, std::string> kmers;
    for (int j=0; j < number_of_combinations; ++j) {
      
      int i;
      for (i=k-1; v[i] == base[i]; --i) {
	v[i]=0;
	pattern[i] = iupac_classes[i][v[i]];
      }
      v[i]++;
      pattern[i] = iupac_classes[i][v[i]];

    /* Writes in numocc the number of occurrences of the substring 
       pattern[0..length-1] found in the text indexed by index. */

      unsigned long int number_of_occurrences;
      number_of_occurrences = count(pattern);
      kmers.insert(std::make_pair(number_of_occurrences, pattern));
      
      // if (number_of_occurrences > max_count) {
      // 	max_count = number_of_occurrences;
      // 	arg_max = pattern;
      // }
    }
    unsigned long int count=0;
    BOOST_REVERSE_FOREACH(boost::tie(count, pattern), kmers) {
      if (require_directional_seed and palindromic_index(pattern) <= 1) //is_palindromic(pattern))
	;
      else {
	break;
      }
	
    }
    timer += TIME_GET(s);

    return boost::make_tuple(pattern, count);
    //    return boost::make_tuple(arg_max, max_count);
  }

  // Allow mismatch ('N') in 'errors' positions in the subsequence s_orig[begin,begin+len)
  boost::tuple<std::string,int>
  max(const std::string& s_orig, int errors, int begin=0, int len=-1) const
  {
    assert(errors == 0 or errors == 1 or errors == 2);
    if (errors == 0)
      return max(s_orig);
    std::string s = s_orig;
    int k = s.length();
    if (len == -1)
      len = k;
    int max_count=0;
    std::string arg_max = s;
    char old_i = 'N';
    char old_j = 'N';
    std::string result;
    int count;
    int end = begin+len;
    if (errors == 1) {
      for (int i=begin; i < end; ++i) {
	if (s[i] == 'N')
	  continue;
	std::swap(s[i], old_i);
	boost::tie(result, count) = max(s);
	if (count > max_count) {
	  max_count = count;
	  arg_max = result;
	}
	std::swap(s[i], old_i);
      }
      return boost::make_tuple(arg_max, max_count);
    }
    else if (errors == 2) {
      for (int i=begin; i < end-1; ++i) {
	if (s[i] == 'N')
	  continue;
	std::swap(s[i], old_i);
	for (int j=i+1; j < end; ++j) {
	  if (s[j] == 'N')
	    continue;
	  std::swap(s[j], old_j);
	  boost::tie(result, count) = max(s);
	  if (count > max_count) {
	    max_count = count;
	    arg_max = result;
	  }
	  std::swap(s[j], old_j);
	}  // end for j
	std::swap(s[i], old_i);
      } // end for i
      return boost::make_tuple(arg_max, max_count);
    }
    return boost::make_tuple(std::string(""), 0);
  }



  ~gapped_kmer_context() 
  { 
    printf("Gapped kmer context spend %.2f seconds in total finding maxima\n", timer);
  }

private:
  suffix_array sa;
  mutable double timer;
};


// return -1 if Hamming distance is 0,
//        -2 if Hamming distance is greater than one,
//        pos of the mismatch if distance equals one
int
iupac_hamming_dist_helper(const std::string& str, const std::string& pattern)
{
  assert(str.length() == pattern.length());
  int result = -1;
  for (int i=0; i < str.length(); ++i) {
    if (not iupac_match(str[i], pattern[i])) {
      if (result == -1)
	result = i;
      else
	return -2;
    }
  }
  
  return result;
}


// The most significant bit in the bit vector corresponds to position 0 in strings.
// If length of strings is n, then
// result & 1 == 1 iff str[n-1] does not match pattern[n-1]. 
template <typename T>
T
iupac_mismatch_positions(const std::string& str, const std::string& pattern)
{
  assert(str.length() == pattern.length());
  assert(str.length() * 2 <= sizeof(T)*8);
  T result = 0;
  for (int i=0; i < str.length(); ++i) {
    result <<= 1;
    result |= (iupac_match(str[i], pattern[i]) ? static_cast<T>(0) : static_cast<T>(1));
  }
  
  return result;
}

template <typename T, typename F>
matrix<T>
log2(const matrix<F>& orig)
{
  matrix<T> result(orig.dim());
  int rows=orig.get_rows();
  int cols=orig.get_columns();
  for (int i=0; i < rows; ++i)
    for (int j=0; j < cols; ++j)
      result(i, j) = log2l(orig(i, j));

  return result;
}

template <typename T>
T
log_sum(std::priority_queue<T, std::vector<T>, std::greater<T> >& queue)
{
  do {
    T q = queue.top(); queue.pop();
    T p = queue.top(); queue.pop();
    assert (p >= q);
    queue.push(p + log2l(1+exp2l(q-p)));
  } while (queue.size() > 1);
  
  return queue.top();
}

template <typename T>
T
log_sum(std::vector<T>& v)
{
  T p = v.back(); v.pop_back();
  do {
    T q = v.back(); v.pop_back();
    if (q > q)
      std::swap(p, q);
    p = p + log2l(1+exp2l(q-p));
  } while (v.size() > 0);
  
  return p;
}

typedef boost::multi_array<FloatType, 4> array_4d_type; // for Z variable, indices are (i,k,dir,j)
typedef boost::multi_array<FloatType, 5> array_5d_type; // for Z variable, indices are (i,o,d,dir,j)


struct cob_params_t
{
  typedef boost::multi_array<double, 2>::extent_range range;

  cob_params_t(int tf1_, int tf2_, 
	       const boost::multi_array<double, 2>& dimer_lambdas_,
	       const boost::multi_array<std::string, 2>& dimer_seeds_,
	       const boost::multi_array<dmatrix, 2>& overlapping_dimer_PWM_,
	       const std::vector<std::string>& fixed_seeds_, const std::vector<int>& L_, int dmin_, int dmax_,
	       int max_dist_for_deviation_)
    :  tf1(tf1_), tf2(tf2_), dimer_lambdas(dimer_lambdas_), dimer_seeds(dimer_seeds_),
       overlapping_dimer_PWM(overlapping_dimer_PWM_), L(L_), dmin(dmin_), dmax(dmax_),
       max_dist_for_deviation(max_dist_for_deviation_)
  {
    k1 = fixed_seeds_[tf1].length();
    k2 = fixed_seeds_[tf2].length();
    //    assert(dmin == -std::min(k1, k2) + min_flank);
    
    number_of_orientations = use_rna ? 1 : 3;
    if (tf1 != tf2)
      ++number_of_orientations;  // is a hetero dimer
    
    expected_overlapping_dimer_PWMs.resize(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]);
    lines = L.size();
    dimer_w.resize(boost::extents[range(dmin, dmax+1)]);
    dimer_m.resize(boost::extents[lines][range(dmin, dmax+1)]);

    deviation.resize(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]);
    for (int o=0; o < number_of_orientations; ++o) {
      for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	deviation[o][d] = dmatrix(4, k1 + k2 + d);  // This contains redundant flanks. Just to ease
	                                            // operating with matrices expected and observed
	                                            // which have the same dimensions.
      }
    }
  }

  std::string
  name() const 
  {
    return to_string("%i-%i", tf1, tf2);
  }

  void
  set_lengths() {
    myaccumulate<int> acc(0, static_cast<BinOp<int>::type>(std::max));
    for (int d=dmin; d < std::min(0, dmax+1); ++d) {
      dimer_w[d] = overlapping_dimer_PWM[0][d].get_columns();
      for (int i=0; i < lines; ++i) {
	dimer_m[i][d] = L[i] - dimer_w[d] + 1;
	acc(dimer_m[i][d]);
      }
    }  // end for d

    overlapping_dimer_m_max = acc.get();
    
    acc.reset(0);
    for (int d=0; d <= dmax; ++d){
      dimer_w[d] = k1 + d + k2;
      for (int i=0; i < lines; ++i) {
	dimer_m[i][d] = L[i] - dimer_w[d] + 1;  // One past maximum start pos for dimer
	acc(dimer_m[i][d]);
      }
    }  // end for d
      
    spaced_dimer_m_max = acc.get();
  }

  void
  initialize_Z(int lines) {
    int directions = use_two_strands ? 2 : 1;
    overlapping_dimer_Z.resize(boost::extents[lines][number_of_orientations][range(dmin,max_dist_for_deviation+1)][directions][overlapping_dimer_m_max]);
    spaced_dimer_Z.resize(boost::extents[lines][number_of_orientations][range(max_dist_for_deviation+1,dmax+1)][directions][spaced_dimer_m_max]);
  }

  void
  update_oriented_matrices(const std::vector<dmatrix>& fixed_PWM, const std::vector<std::string>& fixed_seed) {
    oriented_dimer_matrices.resize(4);
    oriented_dimer_seeds.resize(4);
    for (int o=0; o < number_of_orientations; ++o) {
      oriented_dimer_matrices[o] =
	get_matrices_according_to_hetero_orientation(o, fixed_PWM[tf1], fixed_PWM[tf2], use_rna);
      oriented_dimer_seeds[o] =
	get_seeds_according_to_hetero_orientation(o, fixed_seed[tf1], fixed_seed[tf2], use_rna);
    }
  }

  void
  compute_expected_matrices(const std::vector<dmatrix>& fixed_PWM) {
    // Compute expected matrices
    for (int o=0; o < number_of_orientations; ++o) {
      for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	dmatrix expected = matrix_product(oriented_dimer_matrices[o].get<0>(), oriented_dimer_matrices[o].get<1>(), d);
	normalize_matrix_columns(expected);
	expected_overlapping_dimer_PWMs[o][d] = expected;
      } // end for d
    }   // end for o
  }

  void
  compute_deviation_matrices()
  {
    const int& w1 = k1;
    const int& w2 = k2;
    for (int o=0; o < number_of_orientations; ++o) {
      for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	int first, last;
	// The interval [first,last] is either the overlap area or the gap.
	if (use_rna and o == RNA_TH) {
	  if (d < 0) {
	    first = w2 + d;
	    last = w2 - 1;
	  } else {
	    first = w2;
	    last = w2 + d - 1;
	  }
	}
	else {
	  if (d < 0) {
	    first = w1 + d;
	    last = w1 - 1;
	  } else {
	    first = w1;
	    last = w1 + d - 1;
	  }
	}
	for (int row = 0; row < 4; ++row) {
	  for (int column = first - 1; column <= last + 1; ++column) // The overlapping part with flanks of 1bp on both sides
	    deviation[o][d](row,column) = 
	      overlapping_dimer_PWM[o][d](row,column) - expected_overlapping_dimer_PWMs[o][d](row,column);
	}
      }  // end for d
	 
    }  // end for o
 
  }

  int tf1;
  int tf2;
  boost::multi_array<double, 2>      dimer_lambdas;
  boost::multi_array<std::string, 2> dimer_seeds;
  boost::multi_array<dmatrix, 2>     overlapping_dimer_PWM;
  boost::multi_array<dmatrix, 2>     deviation;

  //  const std::vector<std::string>&    fixed_seeds;

  int k1;
  int k2;
  const std::vector<int>& L;
  int lines;
  int dmin;
  int dmax;
  int max_dist_for_deviation;
  int number_of_orientations;

  int overlapping_dimer_m_max;                     // maximum width of a pwm
  int spaced_dimer_m_max;                     // maximum width of a pwm

  boost::multi_array<dmatrix, 2> expected_overlapping_dimer_PWMs;
  boost::multi_array<double, 1> dimer_w;
  boost::multi_array<double, 2> dimer_m;


  array_5d_type overlapping_dimer_Z;
  array_5d_type spaced_dimer_Z;

  std::vector<boost::tuple<dmatrix,dmatrix> > oriented_dimer_matrices;
  std::vector<boost::tuple<std::string,std::string> > oriented_dimer_seeds;
};


void
print_math_error(int retval)
{
  printf("Error! ");
  if (retval & FE_INVALID)
    printf("FE_INVALID ");
  if (retval & FE_DIVBYZERO)
    printf("FE_DIVBYZERO ");
  if (retval &FE_OVERFLOW )
    printf("FE_OVERFLOW ");
  if (retval & FE_UNDERFLOW)
    printf("FE_UNDERFLOW ");

}


// This computes the expected log likelihood of the COMPLETE data. that is: E log P(X,Z|total model)
double
complete_data_log_likelihood(const std::vector<dmatrix>& PWM, 
			     const std::vector<double>& q, 
			     const dmatrix& q2, 
			     const std::vector<double>& q_rev, 
			     const dmatrix& q2_rev, 
			     const std::vector<double>& lambda, 
			     double lambda_bg, std::vector<cob_params_t>& my_cob_params,
			     const array_4d_type& fixed_Z,
			     const std::vector<std::string>& sequences,
			     const std::vector<std::string>& sequences_rev,
			     const std::vector<int>& fixed_w,
			     const boost::multi_array<int, 2>& fixed_m)
{

  double result=0.0;
  int p=PWM.size();
  //  int L=sequences[0].length();
  int number_of_cobs = my_cob_params.size();

  FloatType l2 = log2l(2);
  FloatType log_lambda_bg = log2l(lambda_bg);
  std::vector<FloatType> log_q(4);
  std::vector<FloatType> log_q_rev(4);
  std::vector<double> log_lambda(p);
  std::vector<matrix<FloatType> > log_PWM;
  for (int i=0; i < 4; ++i) {
    log_q[i] = log2l(q[i]);
    log_q_rev[i] = log2l(q_rev[i]);
  }
  for (int i=0; i < p; ++i) {
    log_lambda[i] = log2l(lambda[i]);
    log_PWM.push_back(log2<FloatType>(PWM[i]));
  }
  boost::multi_array<matrix<FloatType>, 3> log_oriented_dimer_matrices(boost::extents[number_of_cobs][4][2]);
  typedef boost::multi_array<matrix<FloatType>, 2> cob_of_matrices_t;
  typedef cob_of_matrices_t::extent_range range;
  std::vector<cob_of_matrices_t> overlapping_models;
  for (int r=0; r < number_of_cobs; ++r) {
    int tf1 = my_cob_params[r].tf1;
    int tf2 = my_cob_params[r].tf2;
    int number_of_orientations = use_rna ? 1 : 3;
    if (tf1 != tf2)
      ++number_of_orientations;
    int dmin = my_cob_params[r].dmin;
    int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    //    int dmax = my_cob_params[r].dmax;
    
    overlapping_models.push_back(cob_of_matrices_t(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]));
    for (int o=0; o < number_of_orientations; ++o) {
      log_oriented_dimer_matrices[r][o][0] = log2<FloatType>(my_cob_params[r].oriented_dimer_matrices[o].get<0>());
      log_oriented_dimer_matrices[r][o][1] = log2<FloatType>(my_cob_params[r].oriented_dimer_matrices[o].get<1>());
      for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	overlapping_models[r][o][d] = log2<FloatType>(my_cob_params[r].expected_overlapping_dimer_PWMs[o][d] + my_cob_params[r].deviation[o][d]);
      }
    }
  }
#pragma omp parallel for shared(use_two_strands) reduction(+:result) schedule(static)
  for (int i=0; i < sequences.size(); ++i) {
    const std::string& line = sequences[i];
    const std::string& line_rev = sequences_rev[i];
    double temp = 0.0;
    double sum_of_Zs=0.0;

    /////////////////////
    // Monomer models
    
    for (int k=0; k < p; ++k) {
      for (int j=0; j < fixed_m[i][k]; ++j) {
	feclearexcept(FE_ALL_EXCEPT);
	temp +=
	  (compute_log_probability<FloatType>(line, line_rev, j, 1, log_PWM[k], log_q, q2)
	   + log_lambda[k] - log(fixed_m[i][k]) - l2)
	  * fixed_Z[i][k][0][j];
	if (use_two_strands)
	  temp +=
	    (compute_log_probability<FloatType>(line, line_rev, j, -1, log_PWM[k], log_q_rev, q2_rev)
	     + log_lambda[k] - log(fixed_m[i][k]) - l2)
	    * fixed_Z[i][k][1][j];
	int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
	if (retval) {
	  print_math_error(retval);
	  printf("Sequence %i, monomer model %i, position %i\n", i, k, j);
	}
	sum_of_Zs += fixed_Z[i][k][0][j];
	if (use_two_strands)
	  sum_of_Zs += fixed_Z[i][k][1][j];
      }
    }

    for (int r=0; r < number_of_cobs; ++r) {
      int tf1 = my_cob_params[r].tf1;
      int tf2 = my_cob_params[r].tf2;
      int number_of_orientations = use_rna ? 1 : 3;
      if (tf1 != tf2)
	++number_of_orientations;
      int dmin = my_cob_params[r].dmin;
      int dmax = my_cob_params[r].dmax;
      int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
      for (int o=0; o < number_of_orientations; ++o) {
	for (int d=max_dist_for_deviation+1; d <= dmax; ++d) {       // spaced dimers
	  int m = my_cob_params[r].dimer_m[i][d];
	  if (m <= 0)
	    continue;
	  FloatType log_m = log2l(m);
	  for (int j=0; j < m; ++j) {
	    if (my_cob_params[r].dimer_lambdas[o][d] > 0.0) {
	      feclearexcept(FE_ALL_EXCEPT);
	      temp +=
		(compute_log_dimer_probability<FloatType>(line, line_rev,
							  j, +1,
							  log_oriented_dimer_matrices[r][o][0], 
							  log_oriented_dimer_matrices[r][o][0], 
							  d, log_q, q2)
		 + log2l(my_cob_params[r].dimer_lambdas[o][d]) - log_m - l2)
		* my_cob_params[r].spaced_dimer_Z[i][o][d][0][j];
	      if (use_two_strands)
		temp +=
		  (compute_log_dimer_probability<FloatType>(line, line_rev,
							    j, -1,
							    log_oriented_dimer_matrices[r][o][0], 
							    log_oriented_dimer_matrices[r][o][1], 
							    d, log_q_rev, q2_rev)
		   + log2l(my_cob_params[r].dimer_lambdas[o][d]) - log_m - l2)
		  * my_cob_params[r].spaced_dimer_Z[i][o][d][1][j];
	      int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
	      if (retval) {
		print_math_error(retval);
		printf("Sequence %i, r=%i, spaced dimer model %s %i, position %i\n", i, r, orients[o], d, j);
	      }
	    }
	    sum_of_Zs += my_cob_params[r].spaced_dimer_Z[i][o][d][0][j];
	    if (use_two_strands)
	      sum_of_Zs += my_cob_params[r].spaced_dimer_Z[i][o][d][1][j];
	  } // end for j
	} // end for d

	for (int d=dmin; d <= max_dist_for_deviation; ++d) {        // overlapping dimers
	  int m = my_cob_params[r].dimer_m[i][d];
	  if (m <= 0)
	    continue;
	  FloatType log_m = log2l(m);
	  const matrix<FloatType>& model = overlapping_models[r][o][d];
	  for (int j=0; j < m; ++j) {
	    if (my_cob_params[r].dimer_lambdas[o][d] > 0.0) {
	      feclearexcept(FE_ALL_EXCEPT);
	      temp +=
		(compute_log_probability<FloatType>(line, line_rev, j, +1, model, log_q, q2)
		 + log2l(my_cob_params[r].dimer_lambdas[o][d]) - log_m - l2)
		* my_cob_params[r].overlapping_dimer_Z[i][o][d][0][j];
	      if (use_two_strands)
		temp +=
		  (compute_log_probability<FloatType>(line, line_rev, j, -1, model, log_q_rev, q2_rev)
		   + log2l(my_cob_params[r].dimer_lambdas[o][d]) - log_m - l2)
		  * my_cob_params[r].overlapping_dimer_Z[i][o][d][1][j];
	      int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
	      if (retval) {
		print_math_error(retval);
		printf("Sequence %i, r=%i, overlapping dimer model %s %i, position %i\n", i, r, orients[o], d, j);
	      }
	    }
	    sum_of_Zs += my_cob_params[r].overlapping_dimer_Z[i][o][d][0][j];
	    if (use_two_strands)
	      sum_of_Zs += my_cob_params[r].overlapping_dimer_Z[i][o][d][1][j];
	  }
	}

      }  // end for o
    } // end for r

    feclearexcept(FE_ALL_EXCEPT);
    if (lambda_bg > 0.0)
      temp += (compute_log_background_probability<FloatType>(line, log_q, q2) + log_lambda_bg) * (1.0 - sum_of_Zs);
    int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
    if (retval) {
      print_math_error(retval);
      printf("Sequence %i, background model\n", i);
    }
    result += temp;
  } // end for i

  return result;
}




////////////////////////////////////////////
//
// Helper functions to compute expectations
//
////////////////////////////////////////////

// This is for monomer cases

void
expectation_Z_dir_j(boost::multi_array<FloatType, 4>& Z, int i, int k,
		    const std::string& line,
		    const std::string& line_rev,
		    int m, FloatType log_lambda, const matrix<FloatType>& log_PWM, 
		    const std::vector<FloatType>& log_bg_model,
		    const std::vector<FloatType>& log_bg_model_rev,
		    const dmatrix& bg_model_markov, const dmatrix& bg_model_markov_rev)
{
  FloatType log_m = log2l((long double)m);
  FloatType log_lambda_per_pos = log_lambda - log_m;
  if (use_two_strands)
    log_lambda_per_pos -= log2l(2.0);
  for (int j=0; j < m; ++j) {
    FloatType p1 = compute_log_probability<FloatType>(line, line_rev, j, 1, log_PWM, log_bg_model, bg_model_markov);

    // f_j
    feclearexcept(FE_ALL_EXCEPT);
    Z[i][k][0][j]=p1 + log_lambda_per_pos;
    int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
    if (retval) {
      print_math_error(retval);
      printf("Expectation, sequence %i, pos %i, monomer model %i\n", i, j, k);
    }
    if (use_two_strands) {
      FloatType p2 = compute_log_probability<FloatType>(line, line_rev, j, -1, log_PWM, log_bg_model_rev, bg_model_markov_rev);

      feclearexcept(FE_ALL_EXCEPT);
      Z[i][k][1][j]=p2 + log_lambda_per_pos;   // reverse complement
      int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
      if (retval) {
	print_math_error(retval);
	printf("Expectation, sequence %i, pos %i, monomer model %i\n", i, j, k);
      }
    }
  }
}

//This is for overlapping dimer cases
void
expectation_Z_dir_j_overlapping(boost::multi_array<FloatType, 5>& Z, int i, int o, int d,
				const std::string& line,
				const std::string& line_rev,
				int m, FloatType log_lambda, const matrix<FloatType>& log_PWM, 
				const std::vector<FloatType>& log_bg_model,
				const std::vector<FloatType>& log_bg_model_rev,
				const dmatrix& bg_model_markov, const dmatrix& bg_model_markov_rev)
{
  
  if (not std::isfinite(log_lambda) or log_lambda == 0.0) { // the dimer does not fit on this line
    for (int j=0; j < m; ++j) {
      Z[i][o][d][0][j]=0.0;
      if (use_two_strands)
	Z[i][o][d][1][j]=0.0;
    }
    return;
  }
  
  FloatType log_m = log2l(m);
  FloatType log_lambda_per_pos = log_lambda - log_m;
  if (use_two_strands)
    log_lambda_per_pos -= log2l(2.0);
  for (int j=0; j < m; ++j) {
    FloatType p1 = compute_log_probability<FloatType>(line, line_rev, j, 1, log_PWM, log_bg_model, bg_model_markov);

    // f_j
    feclearexcept(FE_ALL_EXCEPT);
    Z[i][o][d][0][j]=p1 + log_lambda_per_pos;
    int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
    if (retval) {
      print_math_error(retval);
      printf("Expectation, sequence %i, pos %i, overlapping model %s %i\n", i, j, orients[o], d);
    }

    if (use_two_strands) {
      FloatType p2 = compute_log_probability<FloatType>(line, line_rev, j, -1, log_PWM, log_bg_model_rev, bg_model_markov_rev);

      feclearexcept(FE_ALL_EXCEPT);
      Z[i][o][d][1][j]=p2 + log_lambda_per_pos;   // reverse complement
      int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
      if (retval) {
        print_math_error(retval);
        printf("Expectation, sequence %i, pos %i, overlapping model %s %i\n", i, j, orients[o], d);
      }

    }
  }
}

//This is for spaced dimer cases
void
expectation_Z_dir_j_spaced(boost::multi_array<FloatType, 5>& Z, int i,
			   int o, int d,
			   const std::string& line,
			   const std::string& line_rev,
			   int m, FloatType log_lambda,
			   const matrix<FloatType>& log_PWM1,
			   const matrix<FloatType>& log_PWM2,
			   const std::vector<FloatType>& log_bg_model,
			   const std::vector<FloatType>& log_bg_model_rev,
			   const dmatrix& bg_model_markov, const dmatrix& bg_model_markov_rev)
{
  if (not std::isfinite(log_lambda) or log_lambda == 0.0) {
    for (int j=0; j < m; ++j) {
      Z[i][o][d][0][j]=0.0;
      if (use_two_strands)
	Z[i][o][d][1][j]=0.0;
    }
    return;
  }
  FloatType log_m = log2l(m);
  FloatType log_lambda_per_pos = log_lambda - log_m;
  if (use_two_strands)
    log_lambda_per_pos -= log2l(2.0);
  for (int j=0; j < m; ++j) {
    FloatType p1 = compute_log_dimer_probability<FloatType>(line, line_rev, j, 1, log_PWM1, log_PWM2, d, log_bg_model, bg_model_markov);


    // f_j
    feclearexcept(FE_ALL_EXCEPT);
    Z[i][o][d][0][j]=p1 + log_lambda_per_pos;
    int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
    if (retval) {
      print_math_error(retval);
      printf("Expectation, sequence %i, pos %i, spaced model %s %i\n", i, j, orients[o], d);
    }
    if (use_two_strands) {
      FloatType p2 = compute_log_dimer_probability<FloatType>(line, line_rev, j, -1, log_PWM1, log_PWM2, d, log_bg_model_rev, bg_model_markov_rev);

      feclearexcept(FE_ALL_EXCEPT);
      Z[i][o][d][1][j]=p2 + log_lambda_per_pos;   // reverse complement
      int retval = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
      if (retval) {
        print_math_error(retval);
        printf("Expectation, sequence %i, pos %i, spaced model %s %i\n", i, j, orients[o], d);
      }
    }
  }
}


typedef std::priority_queue<FloatType, std::vector<FloatType>, std::greater<FloatType> > queue_type;
//typedef std::vector<FloatType> queue_type;

void
sum_Z_dir_j(const boost::multi_array<FloatType, 5>& Z, int i, int o, int d, int m, queue_type& queue)
{
  for (int j=0; j < m; ++j) {
    if (Z[i][o][d][0][j] != 0.0) {
      queue.push(Z[i][o][d][0][j]);
    }
  }
  if (use_two_strands) {
    for (int j=0; j < m; ++j) {
      if (Z[i][o][d][1][j] != 0.0) {
	queue.push(Z[i][o][d][1][j]);
      }
    }
  }
  return;
}

void
sum_Z_dir_j(const boost::multi_array<FloatType, 4>& Z, int i, int k, int m, queue_type& queue)
{
  for (int j=0; j < m; ++j) {
    if (Z[i][k][0][j] != 0.0) {
      queue.push(Z[i][k][0][j]);
    }
  }
  if (use_two_strands) {
    for (int j=0; j < m; ++j) {
      if (Z[i][k][1][j] != 0.0) {
	queue.push(Z[i][k][1][j]);
      }
    }
  }
  return;
}




void
normalize_Z_dir_j(boost::multi_array<FloatType, 4>& Z, int i, int k, int m, FloatType divisor)
{
  for (int j=0; j < m; ++j) {
    if (Z[i][k][0][j] != 0.0)
      Z[i][k][0][j] = exp2l(Z[i][k][0][j] - divisor);
    

    assert(Z[i][k][0][j] >= 0);
    assert(Z[i][k][0][j] <= 1);
  }

  if (use_two_strands) {
    for (int j=0; j < m; ++j) {
      if (Z[i][k][1][j] != 0.0)
	Z[i][k][1][j] = exp2l(Z[i][k][1][j] - divisor);
	  
      assert(Z[i][k][1][j] >= 0);
      assert(Z[i][k][1][j] <= 1);
    }
  }
}

void
normalize_Z_dir_j(boost::multi_array<FloatType, 5>& Z, int i, int o, int d, int m, FloatType divisor)
{
  for (int j=0; j < m; ++j) {
    if (Z[i][o][d][0][j] != 0.0)
      Z[i][o][d][0][j] = exp2l(Z[i][o][d][0][j] - divisor);

    assert(Z[i][o][d][0][j] >= 0);
    assert(Z[i][o][d][0][j] <= 1);
  }

  if (use_two_strands) {
    for (int j=0; j < m; ++j) {
      if (Z[i][o][d][1][j] != 0.0)
	Z[i][o][d][1][j] = exp2l(Z[i][o][d][1][j] - divisor);
	  
      assert(Z[i][o][d][1][j] >= 0);
      assert(Z[i][o][d][1][j] <= 1);
    }
  }
}


void
get_new_weights(int j1, int dir, double z, int w, const std::string& seed,
		const std::string& line_orig, const std::string& line_rev_orig,
		dmatrix& weights, std::vector<double>& pred_flank, std::vector<double>& succ_flank,
		bool force_multinomial, bool compute_flanks)
{
  assert(seed.length() == w);
  typedef myuint128 bitstring_t;
  const std::string& line = dir == 1 ? line_orig : line_rev_orig;
  int L = line.length();
  if (dir == -1)
    j1 = L - j1 - w;
  bitstring_t mismatches = iupac_mismatch_positions<bitstring_t>(line.substr(j1, w), seed);
  int hd = mypopcount(mismatches);
  bitstring_t positions = 0;
  if (not force_multinomial or hd < hamming_radius)
    positions = ~static_cast<bitstring_t>(0);  // update all
  else if (hd == hamming_radius)  // update only mismatch positions
    positions = mismatches;
  else
    positions = 0;   // update nothing
  //  printf("HD is %i %i %s %s\n", hd, mismatches, print_bitvector(mismatches).c_str(), print_bitvector(positions).c_str());
  bitstring_t mask = static_cast<bitstring_t>(1)<<(w-1);
  for (int pos=0; pos < w; ++pos, mask>>=1) {
    if (positions & mask)
      weights(to_int(line[j1+pos]), pos) += z; // update columns of pwm marked by bit vector positions
  }
  if (allow_extension and compute_flanks and positions == ~0) {     // If dir == -1, then the caller must swap pred_flank and succ_flank
    if (j1 != 0)
      pred_flank[to_int(line[j1-1])] += z;
    if (j1 + w < L)
      succ_flank[to_int(line[j1+w])] += z;
  }
}

void
get_new_spaced_dimer_weights(int j1, int dir, double z, int d,
			     int w1, int w2,
			     const std::string& seed1, const std::string& seed2,
			     const std::string& line_orig, const std::string& line_rev_orig,
			     dmatrix& weights1, dmatrix& weights2, 
			     bool force_multinomial)
{
  assert(seed1.length() == w1);
  assert(seed2.length() == w2);
  assert(z >= 0);
  assert(z < 1.0);
  assert(w1 == weights1.get_columns());
  assert(w2 == weights2.get_columns());
  const std::string& line = dir == 1 ? line_orig : line_rev_orig;
  int L = line.length();
  int dimer_len = w1 + d + w2;
  if (dir == -1)
    j1 = L - j1 - dimer_len;
  int j2 = j1 + d + w1;  // position of the second leg

  // first part
  code_t mismatches = iupac_mismatch_positions<code_t>(line.substr(j1, w1), seed1);
  int hd = mypopcount(mismatches);
  code_t positions = 0;
  if (not force_multinomial or hd < hamming_radius)
    positions = ~0;  // update all
  else if (hd == hamming_radius)  // update only mismatch positions
    positions = mismatches;
  else
    positions = 0;   // update nothing
  code_t mask = 1ull<<(w1-1);
  for (int pos=0; pos < w1; ++pos, mask>>=1) {
    if (positions & mask)
      weights1(to_int(line[j1+pos]), pos) += z; // update all columns
  }


  // second part
  code_t mismatches2 = iupac_mismatch_positions<code_t>(line.substr(j2, w2), seed2);
  int hd2 = mypopcount(mismatches2);
  code_t positions2 = 0;
  if (not force_multinomial or hd2 < hamming_radius)
    positions2 = ~0;  // update all
  else if (hd2 == hamming_radius)  // update only mismatch positions
    positions2 = mismatches2;
  else
    positions2 = 0;   // update nothing
  mask=1ull<<(w2-1);
  for (int pos=0; pos < w2; ++pos,mask>>=1) {
    if (positions2 & mask)
      weights2(to_int(line[j2+pos]), pos) += z; // update all columns
  }

  
}

void
get_new_gap_weights(int j1, int dir, double z, int d,
		    int w1, int w2,
		    const std::string& seed1, const std::string& seed2,
		    const std::string& line_orig, const std::string& line_rev_orig,
		    dmatrix& weights1,
		    bool force_multinomial)//, bool compute_flanks)
{
  assert(seed1.length() == w1);
  assert(seed2.length() == w2);
  assert(z >= 0);
  assert(z < 1.0);
  const std::string& line = dir == 1 ? line_orig : line_rev_orig;
  int L = line.length();
  int dimer_len = w1 + d + w2;
  if (dir == -1)
    j1 = L - j1 - dimer_len;

  // first part
  std::string seed = seed1 + std::string(d, 'N') + seed2;
  seed[w1-1] = 'N';  // flanks of the gap are also N
  seed[w1+d] = 'N';
  typedef myuint128 bitstring_t;
  assert(seed.length() == dimer_len);
  bitstring_t mismatches = iupac_mismatch_positions<bitstring_t>(line.substr(j1, dimer_len), seed);
  int hd = mypopcount(mismatches);
  bitstring_t positions = 0;
  if (not force_multinomial or hd <= hamming_radius)
    positions = ~static_cast<bitstring_t>(0);  // update all
  else
    positions = 0;   // update nothing
  int first = w1-1;  // last position of the first half-site
  int last = w1+d;   // first position of the second half-site
  bitstring_t mask = static_cast<bitstring_t>(1)<<(d+w2);
  for (int pos=first; pos <= last; ++pos, mask>>=1) {
    if (positions & mask)
      weights1(to_int(line[j1+pos]), pos) += z; // update all columns
  }

}




void
get_new_weights_with_flanks(int j1, int dir, double z, int w, int Lmax, const std::string& seed,
		const std::string& line_orig, const std::string& line_rev_orig,
		dmatrix& weights,
		bool force_multinomial, bool compute_flanks)
{
  assert(seed.length() == w);
  typedef myuint128 bitstring_t;
  const std::string& line = dir == 1 ? line_orig : line_rev_orig;
  int L = line.length();
  if (dir == -1)
    j1 = L - j1 - w;
  int motif_pos = Lmax - w;        // Motif pos inside matrix 'weights' which has width 2*Lmax-w
  int seq_pos = motif_pos - j1; // Position of sequence inside matrix 'weights'
  
  bitstring_t mismatches = iupac_mismatch_positions<bitstring_t>(line.substr(j1, w), seed);
  int hd = mypopcount(mismatches);
  //  assert(hamming_distance(line.substr(j1, w), seed) == hd);
  bitstring_t positions = 0;
  if (not force_multinomial or hd < hamming_radius)
    positions = ~static_cast<bitstring_t>(0);  // update all
  else if (hd == hamming_radius)  // update only mismatch positions
    positions = mismatches;
  else
    positions = 0;   // update nothing
  //  printf("HD is %i %i %s %s\n", hd, mismatches, print_bitvector(mismatches).c_str(), print_bitvector(positions).c_str());
  bitstring_t mask = static_cast<bitstring_t>(1)<<(w-1);
  for (int pos=0; pos < w; ++pos, mask>>=1) {
    if (positions & mask)
      weights(to_int(line[j1+pos]), motif_pos + pos) += z; // update columns of pwm marked by bit vector positions
  }

  if (compute_flanks and (not force_multinomial or hd < hamming_radius)) {
    // Left flank
    for (int i = 0; i < j1; ++i)
      weights(to_int(line[i]), seq_pos + i) += z;
    // Right flank
    for (int i = j1+w; i < L; ++i)
      weights(to_int(line[i]), seq_pos + i) += z;
  }
}



void
get_new_spaced_dimer_weights_with_flanks(int j1, int dir, double z, int d,
					 int w1, int w2, int Lmax,
		    const std::string& seed1, const std::string& seed2,
		    const std::string& line_orig, const std::string& line_rev_orig,
		    dmatrix& weights,
		    bool force_multinomial,
		    bool compute_flanks)
{
  assert(seed1.length() == w1);
  assert(seed2.length() == w2);
  assert(z >= 0);
  assert(z < 1.0);
  const std::string& line = dir == 1 ? line_orig : line_rev_orig;
  int L = line.length();
  int dimer_len = w1 + d + w2;
  if (dir == -1)
    j1 = L - j1 - dimer_len;
  int motif_pos = Lmax - dimer_len;        // Motif pos inside matrix 'weights' which has width 2*Lmax-w
  int seq_pos = motif_pos - j1; // Position of sequence inside matrix 'weights'

  // first part
  std::string seed = seed1 + std::string(d, 'N') + seed2;
  seed[w1-1] = 'N';  // flanks of the gap are also N
  seed[w1+d] = 'N';
  typedef myuint128 bitstring_t;
  assert(seed.length() == dimer_len);
  bitstring_t mismatches = iupac_mismatch_positions<bitstring_t>(line.substr(j1, dimer_len), seed);
  int hd = mypopcount(mismatches);
  bitstring_t positions = 0;
  //  bitstring_t gap_positions = ((static_cast<bitstring_t>(1) << d) - 1) << w2;
  if (not force_multinomial or hd < hamming_radius)
    positions = ~static_cast<bitstring_t>(0);  // update all, including the gap
  else if (hd == hamming_radius)  // update only mismatch positions
    positions = mismatches;
  else
    positions = 0;   // update nothing
  bitstring_t mask = static_cast<bitstring_t>(1)<<(dimer_len-1);
  for (int pos=0; pos < dimer_len; ++pos, mask>>=1) {
    if (positions & mask)
      weights(to_int(line[j1+pos]), motif_pos + pos) += z; // update all columns
  }
  if (compute_flanks and (not force_multinomial or hd < hamming_radius)) {
    // Left flank
    for (int i = 0; i < j1; ++i)
      weights(to_int(line[i]), seq_pos + i) += z;
    // Right flank
    for (int i = j1+dimer_len; i < L; ++i)
      weights(to_int(line[i]), seq_pos + i) += z;
  }
}


boost::tuple<std::vector<dmatrix>, std::vector<cob_of_matrices> >
get_models_with_flanks(const std::vector<std::string>& sequences,
		       const std::vector<std::string>& sequences_rev,
		       const std::vector<std::string>& fixed_seed,
		       const std::vector<int>& fixed_w,
		       int Lmax,
		       const boost::multi_array<int, 2>& fixed_m,
		       const std::vector<cob_params_t>& my_cob_params,
		       const array_4d_type& fixed_Z)
{
  typedef boost::multi_array<double, 2>::extent_range range;

  int number_of_cobs = my_cob_params.size();
  int fixed_p = fixed_seed.size();
  int lines = sequences.size();
  

  std::vector<dmatrix> flank_fixed_PWM;

  std::vector<cob_of_matrices> flank_dimer_PWM;
      


  // Initialize the models to zero

  // spaced models
  for (int r = 0; r < number_of_cobs; ++r) {
    int no = my_cob_params[r].number_of_orientations;
    //    int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    int dmax = my_cob_params[r].dmax;
    flank_dimer_PWM.push_back(cob_of_matrices(boost::extents[no][range(my_cob_params[r].dmin,dmax+1)]));
  }

  // Fixed models
  for (int k=0; k < fixed_p; ++k) { 
    flank_fixed_PWM.push_back(dmatrix(4, 2*Lmax-fixed_w[k]));
  }


  // Overlapping dimer models
  for (int r = 0; r < number_of_cobs; ++r) {
    const int& dmin = my_cob_params[r].dmin;
    const int& dmax = my_cob_params[r].dmax;
    const int& no = my_cob_params[r].number_of_orientations;
    for (int o=0; o < no; ++o) {
      for (int d=dmin; d <= dmax; ++d) {
	  flank_dimer_PWM[r][o][d] = dmatrix(4, 2*Lmax - my_cob_params[r].dimer_w[d]);
      }
    }
  }

  int dirs[2] = {1, -1};
  int maxdir = use_two_strands ? 2 : 1;

  ////////////////////////////
  //
  // Monomer models
  //
  ////////////////////////////
  
  // Signal from monomeric models
  for (int k=0; k < fixed_p; ++k) {
    dmatrix pwm(4, 2*Lmax-fixed_w[k]);
    // This requires at least gcc 4.9.
    // clang (at least not 3.8) does not support 'declare reduction' even
    // though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double, std::allocator<double> > : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))
#pragma omp parallel for shared(lines,sequences,use_two_strands) reduction(+:pwm) schedule(static)
#endif
    for (int i = 0; i < lines; ++i) {
      const std::string& line = sequences[i];
      const std::string& line_rev = sequences_rev[i];
      for (int j1=0; j1 < fixed_m[i][k]; ++j1) {  // iterates through start positions
	for (int dir=0; dir < maxdir; ++dir) {
	  double z = fixed_Z[i][k][dir][j1];
	  get_new_weights_with_flanks(j1, dirs[dir], z, fixed_w[k], Lmax, fixed_seed[k], line, line_rev, pwm, 
			use_multinomial, true);
	}
      }
    }  // end for lines
    flank_fixed_PWM[k] = pwm;
  } // for k, fixed PWMs


	
  ////////////////////////////
  //
  // Overlapping dimer
  //
  ////////////////////////////
	
  // #pragma omp parallel for schedule(static)
  for (int r = 0; r < number_of_cobs; ++r) {
    for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
      for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	if (my_cob_params[r].dimer_lambdas[o][d] == 0.0)
	  continue;

	dmatrix pwm(4, 2*Lmax - my_cob_params[r].dimer_w[d]);
	// This requires gcc 4.9
	// clang (at least not 3.8) does not support 'declare reduction' even
	// though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double, std::allocator<double> > : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))	
#pragma omp parallel for reduction(+:pwm) schedule(static)
#endif
	for (int i = 0; i < lines; ++i) {
	  const std::string& line = sequences[i];
	  const std::string& line_rev = sequences_rev[i];

	  for (int j1=0; j1 < my_cob_params[r].dimer_m[i][d]; ++j1) {  // iterates through start positions
	    for (int dir=0; dir < maxdir; ++dir) {
	      double z = my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j1];
	      get_new_weights_with_flanks(j1, dirs[dir], z, my_cob_params[r].dimer_w[d], Lmax, my_cob_params[r].dimer_seeds[o][d], 
					  line, line_rev, pwm, use_multinomial, true);
	    }
	  } // for j1
	}  // for i
	flank_dimer_PWM[r][o][d] = pwm;
      } // for d, overlapping dimer PWMs
    } // for o, overlapping dimer PWMs

  } // end for r


      





      
  ////////////////////////////
  //
  // spaced dimer
  //
  ////////////////////////////


  for (int r = 0; r < number_of_cobs; ++r) {
    int tf1 = my_cob_params[r].tf1;
    int tf2 = my_cob_params[r].tf2;
    // temps for spaced
    int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
      //      for (int d=0; d <= dmax; ++d) {
      for (int d=0; d <= max_dist_for_deviation; ++d) {
	int dimer_len = fixed_w[tf1] + d + fixed_w[tf2];
	dmatrix pwm(4, 2*Lmax - dimer_len);

	pwm.fill_with(0.0);

	// This requires gcc 4.9
    // clang (at least not 3.8) does not support 'declare reduction' even
    // though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double> : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))
#pragma omp parallel for reduction(+:pwm) schedule(static)
#endif	    
	for (int i = 0; i < lines; ++i) {
	  const std::string& line = sequences[i];
	  const std::string& line_rev = sequences_rev[i];

	  for (int dir=0; dir < maxdir; ++dir) {

	    for (int j1=0; j1 < my_cob_params[r].dimer_m[i][d]; ++j1) {  // iterates through start positions
	      double z = my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j1];
	      get_new_spaced_dimer_weights_with_flanks(j1, dirs[dir], z, d, fixed_w[tf1], fixed_w[tf2], Lmax,
						       my_cob_params[r].oriented_dimer_seeds[o].get<0>(), 
						       my_cob_params[r].oriented_dimer_seeds[o].get<1>(),
						       line, line_rev, 
						       pwm,
						       use_multinomial, true);
	    } // for j1
	  }  // for dir
	  
	} // for i in lines
	flank_dimer_PWM[r][o][d] = pwm;
      } // for d, spaced dimer PWMs
    } // for o, spaced dimer PWMs

  } // for end r

	    
  for (int k=0; k < fixed_p; ++k) {
    if (use_pseudo_counts)
      pseudo_counts.add(flank_fixed_PWM[k]);
    normalize_matrix_columns(flank_fixed_PWM[k]);
    //    write_matrix(stdout, flank_fixed_PWM[k], to_string("Flank fixed matrix %i:\n", k), "%.6f");
  }
  
  for (int r = 0; r < number_of_cobs; ++r) {
    int dmin = my_cob_params[r].dmin;
    int dmax = my_cob_params[r].dmax;
    int no = my_cob_params[r].number_of_orientations;
    for (int o=0; o < no; ++o) {
      for (int d=dmin; d <= dmax; ++d) {
	if (my_cob_params[r].dimer_lambdas[o][d] == 0.0)
	  continue;
	if (use_pseudo_counts)
	  pseudo_counts.add(flank_dimer_PWM[r][o][d]);
	normalize_matrix_columns(flank_dimer_PWM[r][o][d]);
      }
    }
    
  }

  return boost::make_tuple(flank_fixed_PWM, flank_dimer_PWM);

} // end of get_models_with_flanks





double
reestimate_dimer_lambdas(const int lines,
			 const int number_of_orientations,
			 const int dmin, const int dmax,
			 const boost::multi_array<double, 2>& dimer_m,
			 const array_5d_type& dimer_Z,
			 boost::multi_array<double, 2>& dimer_lambdas
)
{
  double total_sum = 0.0;
  for (int o=0; o < number_of_orientations; ++o) {
    for (int d=dmin; d <= dmax; ++d) {
      if (dimer_lambdas[o][d] == 0.0)
	continue;
      double k_sum = 0.0;
#pragma omp parallel for shared(use_two_strands) reduction(+:k_sum) schedule(static)
      for (int i = 0; i < lines; ++i) {
	for (int j=0; j < dimer_m[i][d]; ++j) {
	  k_sum += dimer_Z[i][o][d][0][j];
	  if (use_two_strands)
	    k_sum += dimer_Z[i][o][d][1][j];
	}
      }  // end for i
      double temp = k_sum / lines;
      dimer_lambdas[o][d] = temp;
      total_sum += temp;
    } // end for d
  } // end for o

  return total_sum;
}

FloatType
reestimate_PWM_lambdas(int lines, int p, const boost::multi_array<int, 2>& m, const array_4d_type& Z, std::vector<double>& lambda)
{
  FloatType total_sum = 0.0;
  for (int k=0; k < p; ++k) {
    FloatType k_sum = 0.0;
#pragma omp parallel for shared(lines,m,use_two_strands,Z,k) reduction(+:k_sum) schedule(static)
    for (int i = 0; i < lines; ++i) {
      for (int j=0; j < m[i][k]; ++j) 
	k_sum += Z[i][k][0][j];
      if (use_two_strands)
	for (int j=0; j < m[i][k]; ++j) 
	  k_sum += Z[i][k][1][j];
    }  // end for i
    lambda[k] = k_sum / lines;
    total_sum += lambda[k];
  } // end for k

  return total_sum;
}

void
add_columns(std::vector<double>& v, const dmatrix& m)
{
  assert(v.size() == 4);
  for (int j=0; j < 4; ++j) { // for every row
    v[j] += sum(m.row(j));
    //    if (use_two_strands)
    //      v[4-j-1] += sum(m.row(j));
  }
}

std::string
create_overlapping_seed(const std::string& seed1, const std::string& seed2, int d, const gapped_kmer_context& my_gapped_kmer_context)
{
  int k1 = seed1.length();
  int k2 = seed2.length();

  std::string pattern;
  int hd;
  hd = hamming_distance_overlapping_seeds_N;   // this is zero
  pattern = combine_seeds_func(seed1, seed2, d);

  assert(pattern.size() == k1 + k2 + d);
  std::string overlapping_seed;
  if (maximize_overlapping_seeds) {
    int begin = k1 + d;
    int len = -d;
    overlapping_seed = my_gapped_kmer_context.max(pattern, hd, begin, len).get<0>();
  }
  else
    overlapping_seed = pattern;
  assert(overlapping_seed.size() == k1 + k2 + d);
  return  overlapping_seed;
}


// Here a sequence is considered as background if the posterior probability of background model
// is higher than the posterior probability of any other model
void
print_background_sequences(const std::vector<std::string>& sequences, 
			   const array_4d_type& fixed_Z, 
			   const std::vector<cob_params_t>& my_cob_params,
			   const std::vector<dmatrix>& fixed_PWM,
			   const std::vector<double>& bg_model,
			   const dmatrix& bg_model_markov)
{
  int lines = sequences.size();
  int L=sequences[0].length();
  int fixed_p = fixed_PWM.size();
  int number_of_cobs = my_cob_params.size();
  std::vector<int> fixed_w(fixed_p);
  std::vector<int> fixed_m(fixed_p);
  for (int k=0; k < fixed_p; ++k) {
    fixed_w[k] = fixed_PWM[k].get_columns();
    fixed_m[k] = L-fixed_w[k]+1;
  }
  FILE* fp = fopen(unbound.c_str(), "w");
  assert(fp != NULL);
  for (int i=0; i < lines; ++i) {
    const std::string& line = sequences[i];

    // First compute the proportion of background as the complement of the sum of other models
    double total_sum = 0;
    for (int k=0; k < fixed_p; ++k) {
      double p = 0.0;
      for (int dir=0; dir < 2; ++dir) {
	for (int j=0; j < fixed_m[k]; ++j) {
	  p += fixed_Z[i][k][dir][j];
	}
      }
      total_sum += p;
    }
    for (int r=0; r < number_of_cobs; ++r) {
      int number_of_orientations = my_cob_params[r].number_of_orientations;
      int dmin = my_cob_params[r].dmin;
      int dmax = my_cob_params[r].dmax;
      int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
      for (int o=0; o < number_of_orientations; ++o) {

	for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	  double p = 0.0;
	  for (int dir=0; dir < 2; ++dir) {
	    for (int j=0; j < my_cob_params[r].dimer_m[i][d]; ++j) {
	      p += my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j];
	    }
	  }
	  total_sum += p;
	}  // end for d

	for (int d=max_dist_for_deviation+1; d <= dmax; ++d) {
	  double p = 0.0;
	  for (int dir=0; dir < 2; ++dir) {
	    for (int j=0; j < my_cob_params[r].dimer_m[i][d]; ++j) {
	      p += my_cob_params[r].spaced_dimer_Z[i][o][d][dir][j];
	    }
	  }
	  total_sum += p;
	}  // end for d

      }  // end for o
    }  // end for r



    double background = 1.0 - total_sum;
    // Is the posterior probability of any monomeric model higher than the posterior probability of the bg model
    for (int k=0; k < fixed_p; ++k) {
      double p = 0.0;
      for (int dir=0; dir < 2; ++dir) {
	for (int j=0; j < fixed_m[k]; ++j) {
	  p += fixed_Z[i][k][dir][j];
	}
      }
      if (p >= background)
	goto my_exit;
    }
    // Is the posterior probability of any dimeric model higher than the posterior probability of the bg model
    for (int r=0; r < number_of_cobs; ++r) {
      int number_of_orientations = my_cob_params[r].number_of_orientations;
      int dmin = my_cob_params[r].dmin;
      int dmax = my_cob_params[r].dmax;
      int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
      for (int o=0; o < number_of_orientations; ++o) {

	// dimeric models with deviation
	for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	  double p = 0.0;
	  for (int dir=0; dir < 2; ++dir) {                                      // KORJAA TÄMÄ, USE TWO STRANDS
	    for (int j=0; j < my_cob_params[r].dimer_m[i][d]; ++j) {
	      p += my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j];
	    }
	  }
	  if (p >= background)
	    goto my_exit;
	}  // end for d

	// dimeric models without deviation
	for (int d=max_dist_for_deviation+1; d <= dmax; ++d) {
	  double p = 0.0;
	  for (int dir=0; dir < 2; ++dir) {
	    for (int j=0; j < my_cob_params[r].dimer_m[i][d]; ++j) {
	      p += my_cob_params[r].spaced_dimer_Z[i][o][d][dir][j];
	    }
	  }
	  if (p >= background)
	    goto my_exit;
	}  // end for d

      }  // end for o
    }  // end for r

    fprintf(fp, "%s\n", line.c_str());
    my_exit:
    ;
  } // end for i in lines
  fclose(fp);
}


int
get_number_of_parameters(std::vector<cob_params_t>& my_cob_params, std::vector<dmatrix> fixed_PWM)
{
  int fixed_p=fixed_PWM.size();                // Number of fixed models
  int number_of_parameters = 0;
  number_of_parameters += fixed_p;              // pwm lambdas
  for (int k=0; k < fixed_p; ++k) {
    int w = fixed_PWM[k].get_columns();
    number_of_parameters += 3*w;       // pwm parameters
  }
  number_of_parameters += 3;                    // background
  for (int r=0; r < my_cob_params.size(); ++r) {
    int number_of_orientations = my_cob_params[r].number_of_orientations;
    int dmin = my_cob_params[r].dmin;
    int dmax = my_cob_params[r].dmax;
    int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    number_of_parameters += number_of_orientations * (dmax-dmin+1);   // lambda parameters
    for (int d=dmin; d <= max_dist_for_deviation; ++d) {
      for (int o=0; o < number_of_orientations; ++o) {
	if (my_cob_params[r].dimer_lambdas[o][d] != 0.0)
	  number_of_parameters += 4 * (2+abs(d));     // correction matrices
      }
    }
  }
  // Background lambda is not counted since it is the complement of the rest of the lambdas
  return number_of_parameters;
}

dmatrix
reverse_complement_markov_model(const dmatrix& m)
{
  assert(m.get_columns() == 4);
  assert(m.get_rows() == 4);
  dmatrix result(m.dim());   // same shape
  for (int i=0; i < 4; ++i) {
    for (int j=0; j < 4; ++j) {
      result(i, j) = m(3-j, 3-i);
    }
  }
  return result;
}

bool
is_almost_palindrome(const dmatrix& m)
{
  dmatrix m_rev = reverse_complement(m);
  return distance(m, m_rev) < 0.001;
}

// uses the zoops model (zero or one occurrence per sequence)
// the error rate lambda[p] equals the quantity 1-gamma of the paper
std::vector<dmatrix>
multi_profile_em_algorithm(const std::vector<std::string>& sequences,
			   std::vector<dmatrix> fixed_PWM,
			   const std::vector<bool>& keep_monomer_fixed,
			   dmatrix& bg_model_markov, 
			   std::vector<double> bg_model,
			   double background_lambda,
			   std::vector<double> fixed_lambda, std::vector<std::string> fixed_seed, 
			   std::vector<cob_params_t>& my_cob_params,
			   double epsilon, double extension_ic_threshold,
			   const gapped_kmer_context& my_gapped_kmer_context)
{
  printf("\nIn multi_profile_em_algorithm:\n");

  int number_of_cobs = my_cob_params.size();
  int directions = use_two_strands ? 2 : 1;

  int fixed_p=fixed_PWM.size();                // Number of fixed models

  assert(fixed_p == fixed_lambda.size());
  assert(fixed_p == keep_monomer_fixed.size());


  typedef BinOp<int>::type func_ptr;
  //typedef const int& (*func_ptr)(const int&, const int&);

  typedef boost::multi_array<dmatrix, 2> cob_of_matrices;

  typedef boost::multi_array<FloatType, 4> array_4d_type;  // for Z variable, indices are (i,k,dir,j)

  std::vector<std::string> sequences_rev(sequences.size());
  for (int i=0; i < sequences.size(); ++i)
    sequences_rev[i] = reverse_complement(sequences[i]);
  
  
  double first_maximum_log_likelihood = 0; //maximum_log_likelihood(fixed_PWM, bg_model, fixed_lambda, 
  //		       background_lambda, my_cob_params, sequences);
  double mll = first_maximum_log_likelihood;

  int lines = sequences.size();
  std::vector<int> fixed_w(fixed_p);
  boost::multi_array<int, 2> fixed_m(boost::extents[lines][fixed_p]);
  std::vector<int> L(lines);
  
  
  std::vector<std::vector<double> > pred_flank(fixed_p);
  std::vector<std::vector<double> > succ_flank(fixed_p);
  std::vector<double> pred_ic(fixed_p);
  std::vector<double> succ_ic(fixed_p);


  std::vector<dmatrix> flank_fixed_PWM;
  std::vector<cob_of_matrices> flank_dimer_PWM;


  typedef boost::multi_array<double, 2>::extent_range range;

  std::vector<double> fixed_av_ic(fixed_p);
  const int max_extension_round = 5;
  int extension_round=0;
  int round;
  std::vector<int> iterations;
  while (true) {
    printf("===================================================\n");
    if (local_debug) {
      printf("Extension round %i\n", extension_round);
    }

    

    //////////////
    //
    // Fixed models
    //
    //////////////

    int Lmin = std::numeric_limits<int>::max();
    int Lmax = std::numeric_limits<int>::min();
    for (int i=0; i < lines; ++i) {
      L[i] = sequences[i].length();
      if (L[i] < Lmin)
	Lmin = L[i];
      if (L[i] > Lmax)
	Lmax = L[i];
    }

    myaccumulate<int> acc(0, static_cast<func_ptr>(std::max));   // The cast is needed because std::max is overloaded

    // Set lengths
    acc.reset(0);
    for (int k=0; k < fixed_p; ++k) {
      fixed_w[k] = fixed_PWM[k].get_columns();
      for (int i=0; i < lines; ++i) {
	fixed_m[i][k] = L[i]-fixed_w[k]+1;
	acc(fixed_m[i][k]);
      }
    }

    int fixed_m_max=acc.get();                     // maximum number of motif starting positions in a sequence



    array_4d_type fixed_Z(boost::extents[lines][fixed_p][directions][fixed_m_max]);
    
    ///////////////////////////
    //
    // Overlapping dimer models
    //
    ///////////////////////////
    
    for (int r=0; r < my_cob_params.size(); ++r) {   
      my_cob_params[r].set_lengths();
      my_cob_params[r].initialize_Z(lines);
    }


    std::vector<double> fixed_dist(fixed_p, DBL_MAX);   // Distances between matrices in consecutive rounds
                                                  // is the convergence criterion

    std::vector<boost::multi_array<double, 2> > deviation_dist;
    for (int r=0; r < my_cob_params.size(); ++r) {
      int number_of_orientations = my_cob_params[r].number_of_orientations;
      int dmin = my_cob_params[r].dmin;
      int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
      boost::multi_array<double, 2> temp(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]);
      //fill_with(temp, DBL_MAX); 
      deviation_dist.push_back(temp);
    }

    int number_of_parameters = get_number_of_parameters(my_cob_params, fixed_PWM);
    if (local_debug)
      printf("Total number of parameters is %i\n", number_of_parameters);


    for (int r=0; r < my_cob_params.size(); ++r) {
      my_cob_params[r].update_oriented_matrices(fixed_PWM, fixed_seed);
      my_cob_params[r].compute_expected_matrices(fixed_PWM);
    }


    //    std::vector<double> spaced_dimer_dist(spaced_dimer_p, DBL_MAX);   
    bool convergence_criterion_reached = false;
    for (round = 0; round < max_iter and not convergence_criterion_reached; ++round) {

      if (local_debug)
	printf("Round %i\n", round);


      // Print seeds
      printf("Fixed seeds are %s\n", print_vector(fixed_seed).c_str());
      if (use_multinomial and local_debug) {

	
	for (int r=0; r < my_cob_params.size(); ++r) {
	  const cob_params_t& cp = my_cob_params[r];
	  if (local_debug) 
	    print_cob(stdout, cp.dimer_seeds, 
		      to_string("Seeds of overlapping dimer cases %s:\n", cp.name().c_str()), "%s");

	  boost::multi_array<int, 2> seed_counts(get_shape(cp.dimer_seeds));
	  boost::multi_array<double, 2> exp_counts(get_shape(cp.dimer_seeds));

	  std::map<int, int> sites;
	  MY_FOREACH(d, cp.dimer_seeds[0]) {
	    int dimer_len = cp.dimer_w[d];
	    for (int i = 0; i < lines; ++i)
		sites[d] += L[i] - dimer_len + 1;
	  }
	  MY_FOREACH(o, cp.dimer_seeds) {
	    MY_FOREACH(d, cp.dimer_seeds[o]) {
	      int dimer_len = cp.dimer_w[d];
	      double p = pow(4, -dimer_len);
	      seed_counts[o][d] = my_gapped_kmer_context.count(cp.dimer_seeds[o][d]);
	      exp_counts[o][d] = directions * sites[d] * p;
	    }
	  }

	  if (local_debug) {
	    print_cob(stdout, seed_counts, to_string("Seed counts %s:\n", cp.name().c_str()), "%i");
	    print_cob(stdout, exp_counts, to_string("Exp seed counts %s:\n", cp.name().c_str()), "%.2f");
	    printf("\n");
	  }
	}
      }


      /////////////////////////////////
      //
      // compute expectations
      //
      /////////////////////////////////


      std::vector<double> bg_model_rev(bg_model.rbegin(), bg_model.rend()); // use this model when considering the reverse strand
      dmatrix bg_model_markov_rev = reverse_complement_markov_model(bg_model_markov);
      std::vector<FloatType> log_bg_model(4);
      std::vector<FloatType> log_bg_model_rev(4);
      
      FloatType log_background_lambda = log2l(background_lambda);

      std::vector<FloatType> log_fixed_lambda(fixed_p);
      std::vector<matrix<FloatType> > log_fixed_PWM;
      for (int i=0; i < 4; ++i) {
	log_bg_model[i] = log2l(bg_model[i]);
	log_bg_model_rev[i] = log2l(bg_model_rev[i]);
      }
      for (int i=0; i < fixed_p; ++i) {
	log_fixed_lambda[i] = log2l(fixed_lambda[i]);
	log_fixed_PWM.push_back(log2<FloatType>(fixed_PWM[i]));
      }

      int mindmin = std::numeric_limits<int>::max();
      int maxdmax = std::numeric_limits<int>::min();
      for (int r=0; r < number_of_cobs; ++r) {
	mindmin = std::min(mindmin, my_cob_params[r].dmin);
	maxdmax = std::max(maxdmax, my_cob_params[r].dmax);
      }
      
      typedef boost::multi_array<matrix<FloatType>, 2> cob_of_matrices_t;
      typedef cob_of_matrices_t::extent_range range;
      std::vector<cob_of_matrices_t> log_overlapping_models;
      boost::multi_array<matrix<FloatType>, 3> log_oriented_dimer_matrices(boost::extents[number_of_cobs][4][2]);
      boost::multi_array<FloatType, 3> log_dimer_lambda(boost::extents[number_of_cobs][4][range(mindmin,maxdmax+1)]);
      for (int r=0; r < number_of_cobs; ++r) {
	//int tf1 = my_cob_params[r].tf1;
	//int tf2 = my_cob_params[r].tf2;
	int number_of_orientations = my_cob_params[r].number_of_orientations;
	int dmin = my_cob_params[r].dmin;
	int dmax = my_cob_params[r].dmax;
	int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    
	log_overlapping_models.push_back(cob_of_matrices_t(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]));
	for (int o=0; o < number_of_orientations; ++o) {
	  log_oriented_dimer_matrices[r][o][0] = log2<FloatType>(my_cob_params[r].oriented_dimer_matrices[o].get<0>());
	  log_oriented_dimer_matrices[r][o][1] = log2<FloatType>(my_cob_params[r].oriented_dimer_matrices[o].get<1>());
	  for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	    log_overlapping_models[r][o][d] = log2<FloatType>(my_cob_params[r].expected_overlapping_dimer_PWMs[o][d] + my_cob_params[r].deviation[o][d]);
	  }
	  for (int d=dmin; d <= dmax; ++d)
	    if (my_cob_params[r].dimer_lambdas[o][d] != 0.0)
	      log_dimer_lambda[r][o][d] = log2l(my_cob_params[r].dimer_lambdas[o][d]);
	    else
	      log_dimer_lambda[r][o][d] = 0.0;
	}
      }

#pragma omp parallel for shared(lines,sequences,bg_model,bg_model_markov,use_two_strands) schedule(static)
      for (int i=0; i < lines; ++i) {
	const std::string& line = sequences[i];
	const std::string& line_rev = sequences_rev[i];
	//printf("Processing sequence %i\n", i);

	// f_0
	FloatType log_background = compute_log_background_probability<FloatType>(line, log_bg_model, bg_model_markov);
	log_background += log_background_lambda;

	// Monomer models
	for (int k=0; k < fixed_p; ++k)
	  expectation_Z_dir_j(fixed_Z, i, k, line, line_rev, fixed_m[i][k], log_fixed_lambda[k], log_fixed_PWM[k], 
			      log_bg_model, log_bg_model_rev, bg_model_markov, bg_model_markov_rev);

	// Dimer models
	for (int r=0; r < my_cob_params.size(); ++r) {
	  int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	  // Overlapping dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	    for (int d=my_cob_params[r].dmin; d <= max_dist_for_deviation; ++d) {
	      expectation_Z_dir_j_overlapping(my_cob_params[r].overlapping_dimer_Z, i, o, d, line, line_rev, 
					      my_cob_params[r].dimer_m[i][d], log_dimer_lambda[r][o][d],
					      log_overlapping_models[r][o][d],
					      log_bg_model, log_bg_model_rev, 
					      bg_model_markov, bg_model_markov_rev);
	    }
	  }
	  // Spaced dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o)
	    for (int d=max_dist_for_deviation+1; d <= my_cob_params[r].dmax; ++d){
	      expectation_Z_dir_j_spaced(my_cob_params[r].spaced_dimer_Z, i, o, d, line, line_rev,
					 my_cob_params[r].dimer_m[i][d], log_dimer_lambda[r][o][d],
					 log_oriented_dimer_matrices[r][o][0],
					 log_oriented_dimer_matrices[r][o][1],
					 log_bg_model, log_bg_model_rev, 
					 bg_model_markov, bg_model_markov_rev);
	    }
	} // end for r

	// compute the sum
	
	std::priority_queue<FloatType, std::vector<FloatType>, std::greater<FloatType> > queue;
	//std::vector<FloatType> queue;
	
	queue.push(log_background);
	//queue.push_back(log_background);
	
	// Fixed models
	for (int k=0; k < fixed_p; ++k) 
	  sum_Z_dir_j(fixed_Z, i, k, fixed_m[i][k], queue);

	for (int r=0; r < my_cob_params.size(); ++r) {
	  int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	  // Overlapping dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o)
	    for (int d=my_cob_params[r].dmin; d <= max_dist_for_deviation; ++d)
	      sum_Z_dir_j(my_cob_params[r].overlapping_dimer_Z, i, o, d, my_cob_params[r].dimer_m[i][d], queue);

	  // Spaced dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o)
	    for (int d=max_dist_for_deviation+1; d <= my_cob_params[r].dmax; ++d)
	      sum_Z_dir_j(my_cob_params[r].spaced_dimer_Z, i, o, d, my_cob_params[r].dimer_m[i][d], queue);
	}

	FloatType normalizing_constant = log_sum(queue);
	//	FloatType normalizing_constant = fixed_sum + overlapping_sum + spaced_sum + background;
	//	printf("Dividing term is %e\n", sum);
	

	// normalize
	
	// Fixed models
	for (int k=0; k < fixed_p; ++k) 
	  normalize_Z_dir_j(fixed_Z, i, k, fixed_m[i][k], normalizing_constant);

	for (int r=0; r < my_cob_params.size(); ++r) {
	  int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	  // Overlapping dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o)
	    for (int d=my_cob_params[r].dmin; d <= max_dist_for_deviation; ++d)
	      normalize_Z_dir_j(my_cob_params[r].overlapping_dimer_Z, i, o, d, my_cob_params[r].dimer_m[i][d], normalizing_constant);

	  // Spaced dimer models
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o)
	    for (int d=1+max_dist_for_deviation; d <= my_cob_params[r].dmax; ++d)
	      normalize_Z_dir_j(my_cob_params[r].spaced_dimer_Z, i, o, d, my_cob_params[r].dimer_m[i][d], normalizing_constant);
	}

      } // for i in lines




      ////////////////////////
      //
      // do the maximization
      //
      ////////////////////////

    
      // re-estimate PWM models

      // Initialize weights, pred_flank, succ_flank
      
      // These are used just to compute the background by subtracting signal from the whole data
      std::vector<double> fixed_signal_sum(4, 0.0);   // These are used to learn new PWM models
      std::vector<double> fixed2_signal_sum(4, 0.0);  // These are not used to learn new PWM models
      std::vector<double> overlapping_signal_sum(4, 0.0);
      std::vector<double> gap_signal_sum(4, 0.0);     // For gap in spaced dimers with d\in[0,max_dist_for_deviation]
      
      dmatrix dinucleotide_signal(4, 4);

      std::vector<dmatrix> new_fixed_PWM;
      std::vector<dmatrix> new_fixed_PWM2;   // These are not used to learn new PWMs

      
      std::vector<cob_of_matrices> overlapping_dimer_weights;
      std::vector<cob_of_matrices> gap_weights;  // covers the area of [0,max_dist_for_deviation] + 1 flank on each side
      std::vector<cob_of_matrices> new_deviation;
      std::vector<boost::multi_array<bool, 2> > empty_gap;
      
      std::vector<boost::tuple<dmatrix,dmatrix> > spaced_dimer_weights_sum;
      std::vector<boost::tuple<dmatrix,dmatrix> > spaced_dimer_weights2_sum;



      // Initialize the models to zero


      // Fixed models
      for (int k=0; k < fixed_p; ++k) { 
	new_fixed_PWM.push_back(dmatrix(4, fixed_w[k]));
	new_fixed_PWM2.push_back(dmatrix(4, fixed_w[k]));
	if (allow_extension) {
	  pred_flank[k].assign(4, 0.0);
	  succ_flank[k].assign(4, 0.0);
	}
      }

      // spaced models
      for (int r = 0; r < number_of_cobs; ++r) {
	int width1 = fixed_w[my_cob_params[r].tf1];
	int width2 = fixed_w[my_cob_params[r].tf2];
	spaced_dimer_weights_sum.push_back(boost::make_tuple(dmatrix(4, width1), 
							     dmatrix(4, width2)));
	spaced_dimer_weights2_sum.push_back(boost::make_tuple(dmatrix(4, width1), 
							      dmatrix(4, width2)));
      }

      // Overlapping dimer models
      for (int r = 0; r < number_of_cobs; ++r) {
	const int& dmin = my_cob_params[r].dmin;
	int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	const int& no = my_cob_params[r].number_of_orientations;
	const int& k1 = my_cob_params[r].k1;
	const int& k2 = my_cob_params[r].k2;
	overlapping_dimer_weights.push_back(cob_of_matrices(boost::extents[no][range(my_cob_params[r].dmin,max_dist_for_deviation+1)]));
	gap_weights.push_back(cob_of_matrices(boost::extents[no][range(0, max_dist_for_deviation+1)]));
	new_deviation.push_back(cob_of_matrices(boost::extents[no][range(dmin,max_dist_for_deviation+1)]));
	
	empty_gap.push_back(boost::multi_array<bool, 2>(boost::extents[no][range(dmin, max_dist_for_deviation+1)]));
	for (int o=0; o < no; ++o) {
	  for (int d=dmin; d <= max_dist_for_deviation; ++d) {
	    empty_gap[r][o][d] = false;
	    new_deviation[r][o][d] = dmatrix(4, k1 + k2 + d);  // This contains redundant flanks. Just to ease things
	    if (d < 0)
	      overlapping_dimer_weights[r][o][d] = dmatrix(4, my_cob_params[r].dimer_w[d]);
	    else
	      gap_weights[r][o][d] = dmatrix(4, my_cob_params[r].dimer_w[d]);
	  }
	}
      }




      ////////////////////////////////////
      //
      // re-estimate mixing parameters
      //
      ////////////////////////////////////

      double total_sum=0.0;

      // Fixed models
      total_sum += reestimate_PWM_lambdas(lines, fixed_p, fixed_m, fixed_Z, fixed_lambda);  // modifies fixed_lambda


      for (int r = 0; r < number_of_cobs; ++r) {	
	// Overlapping dimers
	total_sum += reestimate_dimer_lambdas(lines,
					      my_cob_params[r].number_of_orientations,
					      my_cob_params[r].dmin, my_cob_params[r].max_dist_for_deviation,
					      my_cob_params[r].dimer_m,
					      my_cob_params[r].overlapping_dimer_Z,
					      my_cob_params[r].dimer_lambdas);              // Modifies dimer_lambdas


	// Spaced dimers
	total_sum += reestimate_dimer_lambdas(lines,
					      my_cob_params[r].number_of_orientations,
					      my_cob_params[r].max_dist_for_deviation+1, my_cob_params[r].dmax,
					      my_cob_params[r].dimer_m,
					      my_cob_params[r].spaced_dimer_Z,
					      my_cob_params[r].dimer_lambdas);                             // Modifies dimer_lambdas
      } // end for r

      assert(total_sum - 1.0 < 0.000001);
      
      background_lambda = 1.0 - total_sum;    // weight for background model

      assert(background_lambda <= 1.0);

      if (background_lambda < 0) {   // to correct rounding errors
	assert(fabs(background_lambda - 0.0) < 0.001);
	background_lambda=0.0;
      }
	

      //minimum_distance_for_learning 

      std::vector<bool> is_fixed_pwm_part_of_cob(fixed_p, false);
      for (int r=0; r < my_cob_params.size(); ++r) {
	using boost::multi_array_types::index_range;
	int dmax = my_cob_params[r].dmax;
	if (dmax >= minimum_distance_for_learning) {
	  boost::multi_array<double, 2> subarray =
	    my_cob_params[r].dimer_lambdas[ boost::indices[index_range()][(long int)minimum_distance_for_learning <= index_range()] ];
	  if (sum(subarray) >= learning_fraction) {
	    is_fixed_pwm_part_of_cob[my_cob_params[r].tf1] = true;
	    is_fixed_pwm_part_of_cob[my_cob_params[r].tf2] = true;
	  }
	}
      }
      printf("Is monomer pwm learnt purely modularly: %s\n", print_vector(is_fixed_pwm_part_of_cob).c_str());




      ///////////////////////////////////////////////////////////////////////////////////
      //
      // Reestimate models
      //
      ///////////////////////////////////////////////////////////////////////////////////


      std::vector<double> dummy(4, 0.0);   // For flanks that are not really computed

      
      ////////////////////////////
      //
      // Monomeric models
      //
      ////////////////////////////
      int dirs[2] = {1, -1};
      int maxdir = use_two_strands ? 2 : 1;
      
      // Signal from monomeric models
      for (int k=0; k < fixed_p; ++k) {
	dmatrix pwm(4, fixed_w[k]);
	std::vector<double>& signal = is_fixed_pwm_part_of_cob[k] ? fixed2_signal_sum : fixed_signal_sum;

	dmatrix temp_dinucleotide_signal(4, 4);
	std::vector<double> temp_signal(4, 0.0);

	int w = fixed_w[k];
	// This requires at least gcc 4.9
	// clang (at least not 3.8) does not support 'declare reduction' even
	// though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double, std::allocator<double> > : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))
#pragma omp parallel for shared(lines,sequences,use_two_strands) reduction(+:pwm,temp_dinucleotide_signal,temp_signal) schedule(static)
#endif
	for (int i = 0; i < lines; ++i) {
	  const std::string& line = sequences[i];
	  const std::string& line_rev = sequences_rev[i];
	  for (int j1=0; j1 < fixed_m[i][k]; ++j1) {  // iterates through start positions
	    for (int dir=0; dir < maxdir; ++dir) {
	      double z = fixed_Z[i][k][dir][j1];
	      get_new_weights(j1, dirs[dir], z, fixed_w[k], fixed_seed[k], line, line_rev, pwm, 
			      dummy, dummy, use_multinomial, false);

	      for (int pos=0; pos < w; ++pos)
		temp_signal[to_int(line[j1+pos])] += z;
	      
	      if (use_markov_background) {
		for (int pos2=0; pos2 < w-1; ++pos2)
		  temp_dinucleotide_signal(to_int(line[j1+pos2]), to_int(line[j1+pos2+1])) += z;
	      }
	    }
	  }
	}  // end for lines
	signal += temp_signal;
	dinucleotide_signal += temp_dinucleotide_signal;

	if (is_fixed_pwm_part_of_cob[k])
	  new_fixed_PWM2[k] += pwm;
	else
	  new_fixed_PWM[k] += pwm;

      } // for k, fixed PWMs


	
      ////////////////////////////
      //
      // Overlapping dimer
      //
      ////////////////////////////
	
      for (int r = 0; r < number_of_cobs; ++r) {
	std::vector<double>& signal = overlapping_signal_sum;
	
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	    if (my_cob_params[r].dimer_lambdas[o][d] == 0.0)
	      continue;

	    //	    bool local_use_two_strands;
	    // if (avoid_palindromes and use_two_strands and (o == HH or o==TT) and
	    // 	is_almost_palindrome(my_cob_params[r].deviation[o][d])) {
	    //   local_use_two_strands = false;
	    //   printf("Avoiding palindrome %s %d iteration %i\n", orients[o], d, round);
	    // }
	    // else
	    //   local_use_two_strands = use_two_strands;
	    
	    int w = my_cob_params[r].dimer_w[d];
	    dmatrix temp_dinucleotide_signal(4, 4);
	    std::vector<double> temp_signal(4, 0.0);

	    dmatrix pwm(overlapping_dimer_weights[r][o][d].dim());
	    // This requires gcc 4.9
	    // clang (at least not 3.8) does not support 'declare reduction' even
	    // though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double, std::allocator<double> > : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))	
#pragma omp parallel for reduction(+:pwm,temp_dinucleotide_signal,temp_signal) schedule(static)
#endif
	    for (int i = 0; i < lines; ++i) {
	      const std::string& line = sequences[i];
	      const std::string& line_rev = sequences_rev[i];

	      for (int j1=0; j1 < my_cob_params[r].dimer_m[i][d]; ++j1) {  // iterates through start positions
		for (int dir=0; dir < maxdir; ++dir) {
		  double z = my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j1];
		  get_new_weights(j1, dirs[dir], z, my_cob_params[r].dimer_w[d], my_cob_params[r].dimer_seeds[o][d], 
				  line, line_rev, pwm, dummy, dummy, use_multinomial, false);

		  for (int pos=0; pos < w; ++pos)
		    temp_signal[to_int(line[j1+pos])] += z;
	      
		  if (use_markov_background) {
		    for (int pos2=0; pos2 < w-1; ++pos2)
		      temp_dinucleotide_signal(to_int(line[j1+pos2]), to_int(line[j1+pos2+1])) += z;
		  }
		} // for dir
	      } // for j1
	    }  // for i
	    signal += temp_signal;
	    dinucleotide_signal += temp_dinucleotide_signal;
	    overlapping_dimer_weights[r][o][d] = pwm;
	  } // for d, overlapping dimer PWMs
	} // for o, overlapping dimer PWMs

      } // end for r


      
	////////////////////////////
	//
	// Spaced dimer
	//
	////////////////////////////


      for (int r = 0; r < number_of_cobs; ++r) {

	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  int tf1 = my_cob_params[r].tf1;
	  int tf2 = my_cob_params[r].tf2;
	  if (use_rna and o == RNA_TH) {
	    std::swap(tf1, tf2);
	  }
	  // temps for spaced
	  dmatrix m1(4, fixed_w[tf1]);
	  dmatrix m2(4, fixed_w[tf2]);
	  for (int d=0; d <= my_cob_params[r].dmax; ++d) {

	    m1.fill_with(0.0);
	    m2.fill_with(0.0);

	    int w1 = fixed_w[tf1];
	    int w2 = fixed_w[tf2];
	    std::vector<double>& signal = d >= minimum_distance_for_learning ? fixed_signal_sum : fixed2_signal_sum;
	    
	    std::vector<double> temp_signal(4, 0.0);
	    dmatrix temp_dinucleotide_signal(4, 4);
	    // This requires gcc 4.9
	    // clang (at least not 3.8) does not support 'declare reduction' even
	    // though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double> : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))
#pragma omp parallel for reduction(+:m1,m2, temp_dinucleotide_signal,temp_signal) schedule(static)
#endif	    
	    for (int i = 0; i < lines; ++i) {
	      const std::string& line = sequences[i];
	      const std::string& line_rev = sequences_rev[i];


	      for (int dir=0; dir < maxdir; ++dir) {
		int first = dir == 0 ? 0 : w2 + d;
		int second = dir == 0 ? w1+d : 0;
		for (int j1=0; j1 < my_cob_params[r].dimer_m[i][d]; ++j1) {  // iterates through start positions

		  double z = d <= my_cob_params[r].max_dist_for_deviation ?
		    my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j1] : 
		    my_cob_params[r].spaced_dimer_Z[i][o][d][dir][j1];
		  get_new_spaced_dimer_weights(j1, dirs[dir], z, d, fixed_w[tf1], fixed_w[tf2], 
					       my_cob_params[r].oriented_dimer_seeds[o].get<0>(), 
					       my_cob_params[r].oriented_dimer_seeds[o].get<1>(),
					       line, line_rev, 
					       m1, m2,
					       use_multinomial);

		  for (int pos=0; pos < w1; ++pos)
		    temp_signal[to_int(line[j1+first+pos])] += z;

		  for (int pos=0; pos < w2; ++pos)
		    temp_signal[to_int(line[j1+second+pos])] += z;

  
		  if (use_markov_background) {
		    // first part
		    for (int pos2=0; pos2 < w1-1; ++pos2)
		      temp_dinucleotide_signal(to_int(line[j1+first+pos2]), to_int(line[j1+first+pos2+1])) += z;
		    // second part
		    for (int pos2=0; pos2 < w2-1; ++pos2)
		      temp_dinucleotide_signal(to_int(line[j1+second+pos2]), to_int(line[j1+second+pos2+1])) += z;
		  }

		}
	      }

	    } // for i in lines
	    signal += temp_signal;
	    dinucleotide_signal += temp_dinucleotide_signal;
	    boost::tuple<dmatrix,dmatrix>& temp = 
	      d >= minimum_distance_for_learning ? spaced_dimer_weights_sum[r] : spaced_dimer_weights2_sum[r];

	    if (use_rna) {
	      switch (o) {
	      case HT:
		temp.get<0>() += m1;
		temp.get<1>() += m2;
		break;
	      case RNA_TH:
		temp.get<0>() += m2;
		temp.get<1>() += m1;
		break;
	      default:
		printf("Unknown orientation. Exiting!\n");
		exit(1);
	      }	      
	    }
	    else {
	      switch (o) {
	      case HT:
		temp.get<0>() += m1;
		temp.get<1>() += m2;
		break;
	      case HH:
		temp.get<0>() += m1;
		temp.get<1>() += reverse_complement(m2);
		break;
	      case TT:
		temp.get<0>() += reverse_complement(m1);
		temp.get<1>() += m2;
		break;
	      case TH:
		temp.get<0>() += reverse_complement(m1);
		temp.get<1>() += reverse_complement(m2);
		break;
	      }	      
	    }
	    
	  } // for d, spaced dimer PWMs
	} // for o, spaced dimer PWMs

      } // for end r


      // Learn the 'fixed' models from spaced dimers
      

      for (int r = 0; r < number_of_cobs; ++r) {      // Combine all the dimer half sites into fixed PWMs
	// from spaced models with minimum_distance_for_learning >= d
	new_fixed_PWM[my_cob_params[r].tf1] += spaced_dimer_weights_sum[r].get<0>();     
	new_fixed_PWM[my_cob_params[r].tf2] += spaced_dimer_weights_sum[r].get<1>();

	// from spaced models with 0 <= d < minimum_distance_for_learning, and from monomer models
	new_fixed_PWM2[my_cob_params[r].tf1] += spaced_dimer_weights2_sum[r].get<0>();   
	new_fixed_PWM2[my_cob_params[r].tf2] += spaced_dimer_weights2_sum[r].get<1>();
      } // end for r



      
      	////////////////////////////
	//
	// gap
	//
	////////////////////////////


      for (int r = 0; r < number_of_cobs; ++r) {
	int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;

	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  int tf1 = my_cob_params[r].tf1;
	  int tf2 = my_cob_params[r].tf2;
	  if (use_rna and o == RNA_TH) {
	    std::swap(tf1, tf2);
	  }
	  
	  for (int d=0; d <= max_dist_for_deviation; ++d) {
	    if (my_cob_params[r].dimer_lambdas[o][d] == 0.0)
	      continue;
	    
	    int dimer_len = fixed_w[tf1] + d + fixed_w[tf2];
	    dmatrix m1(4, dimer_len);

	    m1.fill_with(0.0);
	    //	    bool local_use_two_strands;
	    // if (avoid_palindromes and use_two_strands and (o == HH or o==TT) and
	    // 	is_almost_palindrome(my_cob_params[r].deviation[o][d])) {
	    //   local_use_two_strands = false;
	    //   printf("Avoiding palindrome %s %d iteration %i\n", orients[o], d, round);
	    // }
	    // else
	    //   local_use_two_strands = use_two_strands;


	    int w1 = fixed_w[tf1];
	    int w2 = fixed_w[tf2];
	    std::vector<double> temp_signal(4, 0.0);
	    dmatrix temp_dinucleotide_signal(4, 4);
	    // This requires gcc 4.9
	    // clang (at least not 3.8) does not support 'declare reduction' even
	    // though it defines _OPENMP to 201307 (that is openmp 4.0).
#if defined(_OPENMP) && (_OPENMP >= 201307) && !defined(__clang__)
#pragma omp declare reduction( + : dmatrix : omp_out+=omp_in ) initializer(omp_priv(omp_orig.dim()))
#pragma omp declare reduction( + : std::vector<double> : omp_out+=omp_in ) initializer(omp_priv(std::vector<double>(omp_orig.size())))
#pragma omp parallel for reduction(+:m1, temp_dinucleotide_signal, temp_signal) schedule(static)
#endif	    
	    for (int i = 0; i < lines; ++i) {
	      const std::string& line = sequences[i];
	      const std::string& line_rev = sequences_rev[i];

	      for (int dir=0; dir < maxdir; ++dir) {
		int first = dir == 0 ? w1 : w2;  // in the second strand the binding sites are in different order, first gap pos 
		int last = first+d;              // last gap pos
		for (int j1=0; j1 < my_cob_params[r].dimer_m[i][d]; ++j1) {  // iterates through start positions
		  double z = my_cob_params[r].overlapping_dimer_Z[i][o][d][dir][j1];
		  get_new_gap_weights(j1, dirs[dir], z, d, fixed_w[tf1], fixed_w[tf2], 
				      my_cob_params[r].oriented_dimer_seeds[o].get<0>(), 
				      my_cob_params[r].oriented_dimer_seeds[o].get<1>(),
				      line, line_rev, 
				      m1,
				      use_multinomial);
		  for (int pos=first; pos < last; ++pos)
		    temp_signal[to_int(line[j1+pos])] += z;

		} // for j1
	      } // for dir
	    } // for i in lines
	    gap_signal_sum += temp_signal;
	    gap_weights[r][o][d] = m1;

	  } // for d, spaced dimer PWMs
	} // for o, spaced dimer PWMs

      } // for end r


      
      /////////////////////////
      //
      // Re-estimate background
      //
      /////////////////////////
      
      std::vector<double> total_signal_sum(4, 0.0);

	
      total_signal_sum += fixed_signal_sum;
      if (local_debug)
	printf("fixed signal: %s\n", print_vector(fixed_signal_sum).c_str());

      // These weren't used to learning fixed PWMs.
      // Only to form background model by subtracting signal from full data
      total_signal_sum += fixed2_signal_sum;
      if (local_debug)
	printf("fixed2 signal: %s\n", print_vector(fixed2_signal_sum).c_str());


      total_signal_sum += overlapping_signal_sum;
      if (local_debug)
	printf("overlapping signal: %s\n", print_vector(overlapping_signal_sum).c_str());

      total_signal_sum += gap_signal_sum;
      if (local_debug)
	printf("gap signal: %s\n", print_vector(gap_signal_sum).c_str());
      
      //      if (use_two_strands)  
		//	total_signal_sum /= 2.0;  // This should not be used
      if (local_debug)
	printf("Total signal is %s\n", print_vector(total_signal_sum).c_str());
      dvector bg_temp = background_frequencies;    // always counted only in single direction!!!!!
      dmatrix bg_markov_temp = background_frequency_matrix;
      if (local_debug)
	printf("Data distribution is %s\n", print_vector(bg_temp).c_str());

      //recompute the background probabilities, without motif occurences
      for (int i=0; i < 4; ++i) {
	bg_model[i] = bg_temp[i] - total_signal_sum[i];
	assert(bg_model[i] >= 0);
      }

      bg_model_markov = bg_markov_temp - dinucleotide_signal;
      if (local_debug) {
	printf("Unnormalized background distribution (intermed): %s\n", print_vector(bg_model).c_str());
	write_matrix(stdout, bg_model_markov, "Unnormalized dinucleotide background:\n", "%.6f");
      }
      if (use_pseudo_counts) {
	pseudo_counts.add(bg_model);
	pseudo_counts.add(bg_model_markov);
      }
      normalize_vector(bg_model);
      normalize_matrix_rows(bg_model_markov);

      if (local_debug) {
	printf("Background distribution (intermed): %s\n", print_vector(bg_model).c_str());
	write_matrix(stdout, bg_model_markov, "Dinucleotide background:\n", "%.6f");
      }


      
      ///////////////////////////////////////////////////////////////////////////
      //
      // Normalize PWMs, compute ICs for PWM positions and for flanking positions
      //
      ///////////////////////////////////////////////////////////////////////////

      printf("\n");

      // Monomer models
      for (int k=0; k < fixed_p; ++k) {
        if (local_debug)
	  write_matrix(stdout, new_fixed_PWM[k], to_string("Unnormalized fixed matrix %i:\n", k).c_str(), "%.6f");

	if (use_pseudo_counts)
	  pseudo_counts.add(new_fixed_PWM[k]);
	normalize_matrix_columns(new_fixed_PWM[k]);

	assert(is_column_stochastic_matrix(new_fixed_PWM[k]));

	if (local_debug) {
	  write_matrix(stdout, new_fixed_PWM[k], to_string("Intermediate fixed matrix %i:\n", k).c_str(), "%.6f");
	  std::vector<double> ic(fixed_w[k]);
	  for (int i=0; i < fixed_w[k]; ++i)
	    ic[i] = information_content(new_fixed_PWM[k].column(i), bg_model);
	  printf("Information content by columns\n");
	  printf("%s\n", print_vector(ic, "\t", 2).c_str());
	}

	
	if (allow_extension) {
	  if (use_pseudo_counts) {
	    pseudo_counts.add(pred_flank[k]);
	    pseudo_counts.add(succ_flank[k]);
	  }
	  normalize_vector(pred_flank[k]);
	  normalize_vector(succ_flank[k]);
	}
	
      }

      // Overlapping dimer models
      for (int r = 0; r < number_of_cobs; ++r) {
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	    if (use_pseudo_counts)
	      pseudo_counts.add(overlapping_dimer_weights[r][o][d]);
	    normalize_matrix_columns(overlapping_dimer_weights[r][o][d]);
	    assert(is_column_stochastic_matrix(overlapping_dimer_weights[r][o][d]));
	  }
	}
      } // end for r

      // gap models
      for (int r = 0; r < number_of_cobs; ++r) {
	int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=0; d <= max_dist_for_deviation; ++d) {
	    double mysum = sum(gap_weights[r][o][d]);
	    if (extra_debug) {
	      write_matrix(stdout, gap_weights[r][o][d],
			   to_string("Observed unnormalized gap matrix %s %s %i:\n",
				     my_cob_params[r].name().c_str(), orients[o], d).c_str(), "%.6f");
	      printf("Sum is %e\n", mysum);
	    }
	    if (mysum == 0.0)
	      empty_gap[r][o][d]=true;
	    if (use_pseudo_counts)
	      pseudo_counts.add(gap_weights[r][o][d]);
	    normalize_matrix_columns(gap_weights[r][o][d]);
	    assert(is_column_stochastic_matrix(gap_weights[r][o][d]));
	  }
	}
      } // end for r


      
      for (int k=0; k < fixed_p; ++k) {
	if (keep_monomer_fixed[k]) {
	  new_fixed_PWM[k] = fixed_PWM[k];
	}	  
      }

      
      ///////////////
      //
      // Adjust seeds
      //
      ///////////////
      
      for (int k=0; k < fixed_p; ++k) {
	fixed_seed[k] = string_giving_max_score(new_fixed_PWM[k], use_rna);
      }
      
      if (use_multinomial and adjust_seeds) {

	for (int r = 0; r < number_of_cobs; ++r) {
	  int number_of_orientations = my_cob_params[r].number_of_orientations;
	  int dmin = my_cob_params[r].dmin;
	  int dmax = my_cob_params[r].dmax;
	  int tf1 = my_cob_params[r].tf1;
	  int tf2 = my_cob_params[r].tf2;
	  for (int o=0; o < number_of_orientations; ++o) {
	    std::string seed1, seed2;
	    boost::tie(seed1, seed2) = //my_cob_params[r].oriented_dimer_seeds[0]; 
	      get_seeds_according_to_hetero_orientation(o, fixed_seed[tf1], fixed_seed[tf2], use_rna);
	    for (int d=dmin; d < std::min(0, dmax+1); ++d) {
	      my_cob_params[r].dimer_seeds[o][d] = create_overlapping_seed(seed1, seed2, d, my_gapped_kmer_context);
	    }  // end for d

	  }  // end for o
	} // end for r

      } // end if use_multinomial


      for (int r=0; r < my_cob_params.size(); ++r) {
	my_cob_params[r].update_oriented_matrices(new_fixed_PWM, fixed_seed);
	my_cob_params[r].compute_expected_matrices(new_fixed_PWM);
      }
      
      //////////////////////
      //
      // Prune dimeric cases
      //
      //////////////////////
      
      for (int r = 0; r < number_of_cobs; ++r) {
	const int& w1 = my_cob_params[r].k1;
	//	const int& w2 = my_cob_params[r].k2;
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	    if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	      double ic = average_information_content(overlapping_dimer_weights[r][o][d].cut(0, w1+d-1, 4, 2-d));
	      double lambda = my_cob_params[r].dimer_lambdas[o][d];
	      bool too_weak = ic < ic_threshold;
	      // bool too_faraway = hamming_distance(my_cob_params[r].dimer_seeds[o][d], 
	      // 					  string_giving_max_score(overlapping_dimer_weights[r][o][d])) >= hamming_threshold;
	      bool too_small = lambda < cob_cutoff;
	      //	      if (too_weak or (use_multinomial and too_faraway) or too_small) {
	      if (too_weak or too_small) {
		my_cob_params[r].dimer_lambdas[o][d] = 0.0;
		if (local_debug) {
		  printf("Excluded dimer case %s %s %i:", my_cob_params[r].name().c_str(), orients[o], d);
		  if (too_weak)
		    printf(" Too weak (ic=%f)", ic);
		  if (too_small)
		    printf(" Too small (lambda=%f)", lambda);
		  printf("\n");
		  //			 too_faraway ? "Too far away" : "");
		}
	      }
	    }
	  }  // end for d
	 for (int d=0; d <= my_cob_params[r].dmax; ++d) {
	   if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	     double lambda = my_cob_params[r].dimer_lambdas[o][d];
	     bool too_small = lambda < cob_cutoff;
	     if (too_small) {
		my_cob_params[r].dimer_lambdas[o][d] = 0.0;
		if (local_debug) {
		  printf("Excluded dimer case %s %s %i: Too small (%f)\n", my_cob_params[r].name().c_str(), orients[o], d, lambda);
		}
	     }
	   }
	 } // end for d
	 
	}  // end for o
      }  // end for r

      
      /////////////////////////////////
      //
      // print overlapping dimer models
      //
      /////////////////////////////////
      
      if (extra_debug) {
	for (int r = 0; r < number_of_cobs; ++r) {
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	    for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	      if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
		std::vector<double> ic(my_cob_params[r].dimer_w[d]);
		for (int i=0; i < my_cob_params[r].dimer_w[d]; ++i) {
		  ic[i] = information_content(overlapping_dimer_weights[r][o][d].column(i), bg_model);
		}
		printf("\n");
		write_matrix(stdout, overlapping_dimer_weights[r][o][d],
			     to_string("Observed overlapping dimer case matrix %s %s %i:\n", 
				       my_cob_params[r].name().c_str(), orients[o], d).c_str(), "%.6f");
		printf("Information content of overlapping dimer by columns\n");
		printf("%s\n", print_vector(ic, "\t", 2).c_str());
	      }
	    }
	  }
	} // end for r
      }
      
      /////////////////////////////////
      //
      // print gap dimer models
      //
      /////////////////////////////////

      if (extra_debug) {
	for (int r = 0; r < number_of_cobs; ++r) {
	  int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	    for (int d=0; d <= max_dist_for_deviation; ++d) {
	      if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
		std::vector<double> ic(my_cob_params[r].dimer_w[d]);
		for (int i=0; i < my_cob_params[r].dimer_w[d]; ++i) {
		  ic[i] = information_content(gap_weights[r][o][d].column(i), bg_model);
		}
		printf("\n");
		write_matrix(stdout, gap_weights[r][o][d],
			     to_string("Observed gap matrix %s %s %i:\n", 
				       my_cob_params[r].name().c_str(), orients[o], d).c_str(), "%.6f");
		printf("Information content of gap by columns\n");
		printf("%s\n", print_vector(ic, "\t", 2).c_str());
	      }
	    }
	  }
	} // end for r
      }

      
      ////////////////////////////
      //
      // Reestimate the deviations
      //
      ////////////////////////////
      
      for (int r = 0; r < number_of_cobs; ++r) {
	const int& w1 = my_cob_params[r].k1;
	//	const int& w2 = my_cob_params[r].k2;
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	    if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	      for (int row = 0; row < 4; ++row) {
		for (int column = w1 + d - 1; column < w1 + 1; ++column) // The overlapping part with flanks of 1bp on both sides
		  new_deviation[r][o][d](row,column) = 
		    overlapping_dimer_weights[r][o][d](row,column) - my_cob_params[r].expected_overlapping_dimer_PWMs[o][d](row,column);
	      }
	    }
	  }  // end for d
	 for (int d=0; d <= my_cob_params[r].max_dist_for_deviation; ++d) {
	   if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	     if (empty_gap[r][o][d])
	       continue;   // The deviation array stays empty
	     int first, last;
	     // The interval [first,last] is the gap.
	     first = w1;
	     last = w1 + d - 1;
	     for (int row = 0; row < 4; ++row) {
	       for (int column = first - 1; column <= last + 1; ++column) // The gap part with flanks of 1bp on both sides
		 new_deviation[r][o][d](row,column) = 
		   gap_weights[r][o][d](row,column) - my_cob_params[r].expected_overlapping_dimer_PWMs[o][d](row,column);
	     } 
	   }
	 } // end for d
	}  // end for o
      }  // end for r

      
      int number_of_parameters = get_number_of_parameters(my_cob_params, fixed_PWM);
      if (local_debug)
	printf("Total number of parameters is %i\n", number_of_parameters);

      

      // Print reestimated mixing parameters
      if (local_debug ) {
	printf("\n");
	printf("Intermediate background lambda is %f\n", background_lambda);
	printf("Intermediate fixed lambdas are %s\n", print_vector(fixed_lambda).c_str());
	if (use_dimers) {
	  for (int r = 0; r < number_of_cobs; ++r) {
	    print_cob(stdout, my_cob_params[r].dimer_lambdas, to_string("Intermediate dimer lambdas %s:\n", my_cob_params[r].name().c_str()), "%e");
	    printf("Intermediate sum of dimer lambdas %s is %.4f\n", my_cob_params[r].name().c_str(),
		   sum(my_cob_params[r].dimer_lambdas));
	  }
	}
      }




      assert(background_lambda >= 0);
      assert(background_lambda <= 1);

      if (allow_extension) {
	for (int k=0; k < fixed_p; ++k) {
	  pred_ic[k] = information_content(pred_flank[k], bg_model);
	  succ_ic[k] = information_content(succ_flank[k], bg_model);
	  printf("Preceeding flank for matrix %i was %s with ic %.2f\n", k,
		 print_vector(pred_flank[k]).c_str(), 
		 pred_ic[k]);
	  printf("Succeeding flank for matrix %i was %s with ic %.2f\n", k,
		 print_vector(succ_flank[k]).c_str(), 
		 succ_ic[k]);
	}
      }
      
     // Print deviation tables
      if (extra_debug) {
	for (int r = 0; r < number_of_cobs; ++r) {
	  int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	  for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	    for (int d=my_cob_params[r].dmin; d <= max_dist_for_deviation; ++d) {
	      if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
		write_matrix(stdout, new_deviation[r][o][d], 
			     to_string("Deviation matrix %s %s %i:\n", my_cob_params[r].name().c_str(),
				       orients[o], d).c_str(), "%.6f");
	      }
	    }
	  }
	}  // end for r
      }


      ///////////////////////
      // Compute distances between old and new models

      typedef BinOp<double>::type my_func;
      myaccumulate<double> acc(0, static_cast<my_func>(std::max));

      printf("\n");


      for (int k=0; k < fixed_p; ++k)
	fixed_dist[k]=distance(new_fixed_PWM[k], fixed_PWM[k]);
      if (local_debug)
	printf("Fixed distances are %s\n", print_vector(fixed_dist).c_str());
      acc(max_element(fixed_dist));

      // Compute distances between 'bridging' table with this round and the previous round
      for (int r = 0; r < number_of_cobs; ++r) {      
	int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
	const int& w1 = my_cob_params[r].k1;
	for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
	  for (int d=my_cob_params[r].dmin; d < std::min(0, my_cob_params[r].dmax+1); ++d) {
	    int first;
	    //int last;
	    // The interval [first,last] is either the overlap area or the gap.
	    if (d < 0) {
	      first = w1 + d;
	      //last = w1 - 1;
	    } else {
	      first = w1;
	      //last = w1 + d - 1;
	    }
	    if (my_cob_params[r].dimer_lambdas[o][d] != 0.0)
	      //	      deviation_dist[r][o][d]=distance(new_deviation[r][o][d], my_cob_params[r].deviation[o][d]);
	      deviation_dist[r][o][d]=distance(overlapping_dimer_weights[r][o][d].cut(0, first-1, 4, 2-d),
					       my_cob_params[r].overlapping_dimer_PWM[o][d].cut(0, first-1, 4, 2-d));
	    else
	      deviation_dist[r][o][d]=0.0;
	  } // for d
	  for (int d=0; d <= max_dist_for_deviation; ++d) {
	    int first;
	    //int last;
	    // The interval [first,last] is either the overlap area or the gap.
	    if (d < 0) {
	      first = w1 + d;
	      //last = w1 - 1;
	    } else {
	      first = w1;
	      //last = w1 + d - 1;
	    }
	    if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	      deviation_dist[r][o][d]=distance(gap_weights[r][o][d].cut(0, first-1, 4, 2+d),
					       my_cob_params[r].overlapping_dimer_PWM[o][d].cut(0, first-1, 4, 2+d));
	      overlapping_dimer_weights[r][o][d] = my_cob_params[r].expected_overlapping_dimer_PWMs[o][d];

	      // The next line is doing a little too much, since the flanks of overlapping_dimer_PWM aren't used anywhere
	      overlapping_dimer_weights[r][o][d].inject(gap_weights[r][o][d].cut(0, first-1, 4, 2+d), 0, first-1);
	    }
	    else
	      deviation_dist[r][o][d]=0.0;
	  } // for d
	}
	if (local_debug and my_cob_params[r].dmin < 0) {
	  print_cob(stdout, deviation_dist[r], to_string("Deviation %s distances are\n", 
		    my_cob_params[r].name().c_str()), "%f");
	  printf("Max distance in deviation %s is %f\n", my_cob_params[r].name().c_str(),
		 max_element(deviation_dist[r]));
	}
	acc(max_element(deviation_dist[r]));
      }

      double total_maximum_distance = acc.get();
      if (local_debug)
        printf("Total maximum distance is %f\n\n", total_maximum_distance);


      ///////////////////////
      // Replace old models

      
      fixed_PWM = new_fixed_PWM;              // Replace old fixed models
      
      for (int r = 0; r < number_of_cobs; ++r) {
	my_cob_params[r].deviation = new_deviation[r];
	my_cob_params[r].overlapping_dimer_PWM = overlapping_dimer_weights[r];  // This is needed only to compute the distance for convergence
      }

      if (local_debug) {
	for (int k=0; k < fixed_p; ++k)
	  fixed_av_ic[k] = average_information_content(fixed_PWM[k], bg_model);
	printf("Intermediate average information content of fixed models: %s\n", print_vector(fixed_av_ic).c_str());
      }

      bg_model_rev = std::vector<double>(bg_model.rbegin(), bg_model.rend()); // use this model when considering the reverse strand
      bg_model_markov_rev = reverse_complement_markov_model(bg_model_markov);

      mll = complete_data_log_likelihood(fixed_PWM,
					 bg_model, bg_model_markov,
					 bg_model_rev, bg_model_markov_rev,
					 fixed_lambda, background_lambda, my_cob_params, fixed_Z, sequences, sequences_rev, fixed_w, fixed_m);
      if (local_debug)
	printf("Log likelihood is %f\n", mll);

      bool deviation_converged = true;
      for (int r=0; r < my_cob_params.size(); ++r) {
	if (max_element(deviation_dist[r]) >= epsilon) {
	  deviation_converged = false;
	  break;
	}
      }

      convergence_criterion_reached = 
	max_element(fixed_dist) < epsilon and deviation_converged;

      if ((convergence_criterion_reached or round+1 >= max_iter) and get_full_flanks)
	boost::tie(flank_fixed_PWM, flank_dimer_PWM) =
	  get_models_with_flanks(sequences,
				 sequences_rev,
				 fixed_seed,
				 fixed_w,
				 Lmax,
				 fixed_m,
				 my_cob_params,
				 fixed_Z);

      if (local_debug) {
 	printf("---------------------------------------------------\n");
        fflush(stdout);
      }
    } // for round
    iterations.push_back(round);

    if (unbound.length() != 0)
      print_background_sequences(sequences, fixed_Z, my_cob_params, fixed_PWM, bg_model, bg_model_markov);
    
    ++extension_round;
      

    if (not allow_extension or extension_round == max_extension_round)
      break;

    bool all_below = true; 
    for (int k=0; k < fixed_p; ++k) {
      if (pred_ic[k] >= extension_ic_threshold or succ_ic[k] >= extension_ic_threshold) {
	all_below = false;
	break;
      }
    }
    if (all_below)
      break;
    for (int k=0; k < fixed_p; ++k) {
      char nucs[] = "ACGT";
      if (pred_ic[k] >= extension_ic_threshold) {
	fixed_PWM[k].insert_column(0, pred_flank[k]);
	fixed_seed[k].insert(0, 1, nucs[arg_max(pred_flank[k])]);  // THIS SHOULD BE CORRECTED. HOW IS NEW BASE DECIDED, NOW MAX
      }
      if (succ_ic[k] >= extension_ic_threshold) {
	fixed_PWM[k].insert_column(fixed_PWM[k].get_columns(), succ_flank[k]);
	fixed_seed[k].insert(fixed_seed[k].length(), 1, nucs[arg_max(succ_flank[k])]);  // THIS SHOULD BE CORRECTED
      }
      if (pred_ic[k] >= extension_ic_threshold or succ_ic[k] >= extension_ic_threshold) {
	write_matrix(stdout, fixed_PWM[k], to_string("Extended matrix %i:\n", k).c_str(), "%.6f");
      }
    }    
  } // while extension_round  

  /******************************************************************************************/

  printf("%s\n", std::string(40, '*').c_str());
  printf("Results:\n");

  
  // Print deviations
  for (int r = 0; r < number_of_cobs; ++r) {
    int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
    for (int o=0; o < my_cob_params[r].number_of_orientations; ++o) {
      for (int d=my_cob_params[r].dmin; d <= max_dist_for_deviation; ++d) {
	if (my_cob_params[r].dimer_lambdas[o][d] != 0.0) {
	  write_matrix(stdout, my_cob_params[r].deviation[o][d], 
		       to_string("Deviation matrix %s %s %i:\n", my_cob_params[r].name().c_str(),
				 orients[o], d).c_str(), "%.6f");
	}
      }
    }
  }  // end for r

  // Print flanks

  if (get_full_flanks) {
    for (int k=0; k < fixed_p; ++k) {
      write_matrix(stdout, flank_fixed_PWM[k], to_string("Flank fixed matrix %i:\n", k), "%.6f");
    }
  
    for (int r = 0; r < number_of_cobs; ++r) {
      int dmin = my_cob_params[r].dmin;
      int dmax = my_cob_params[r].dmax;
      int no = my_cob_params[r].number_of_orientations;
      int tf1 = my_cob_params[r].tf1;
      int tf2 = my_cob_params[r].tf2;
      for (int o=0; o < no; ++o) {
	for (int d=dmin; d <= dmax; ++d) {
	  if (my_cob_params[r].dimer_lambdas[o][d] == 0.0)
	    continue;
	  write_matrix(stdout, flank_dimer_PWM[r][o][d], to_string("Flank dimer case matrix %i-%i %s %i:\n", tf1,tf2, orients[o], d), "%.6f");
	}
      }
    
    }
  }
  
  printf("Maximum log likelihood in the beginning: %i\n", 
	 (int)first_maximum_log_likelihood);
  printf("Maximum log likelihood in the end: %i\n", (int)mll);

  printf("EM-algorithm took %s = %d iterations \n", print_vector(iterations, "+", 0).c_str(), sum(iterations));
  printf("\n");
  printf("Background lambda is %f\n", background_lambda);
  printf("Fixed lambdas are %s\n", print_vector(fixed_lambda).c_str());
  if (use_output) {
    std::string file = to_string("%s/monomer_weights.txt", outputdir.c_str());
    FILE* fp = fopen(file.c_str(), "w");
    if (fp == NULL) {
      fprintf(stderr, "Couldn't open file %s for writing\n", file.c_str());
      exit(1);
    }
    fprintf(fp, "%s\n", print_vector(names, ",", 0).c_str());
    fprintf(fp, "%s\n", print_vector(fixed_lambda, ",", 6).c_str());
    fclose(fp);
  }


  double total_dimer_lambda=0.0;
  if (use_dimers) {
    for (int r = 0; r < number_of_cobs; ++r) {
      int max_dist_for_deviation = my_cob_params[r].max_dist_for_deviation;
      const cob_params_t& cp = my_cob_params[r];
      print_cob(stdout, cp.dimer_lambdas, 
		to_string("Dimer lambdas %s:\n", cp.name().c_str()), 
		"%.5f");
      if (use_output)
	write_cob_file(to_string("%s/%s-%s.cob", outputdir.c_str(), names[cp.tf1].c_str(), names[cp.tf2].c_str()), cp.dimer_lambdas);
      printf("Sum of dimer lambdas of cob table %s is %.4f\n", 
	     cp.name().c_str(),
	     sum(cp.dimer_lambdas));
      total_dimer_lambda += sum(cp.dimer_lambdas);
      std::vector<std::string> excluded;
      MY_FOREACH(o, cp.dimer_lambdas) {
	MY_FOREACH(d, cp.dimer_lambdas[o]) {
	  if (d <= max_dist_for_deviation) {
	    if (cp.dimer_lambdas[o][d] == 0.0) {
	      excluded.push_back(to_string("%s %i", orients[o], d));
	    } else {
	      if (use_output) {
		write_matrix_file(to_string("%s/%s-%s.%s.%i.dev", outputdir.c_str(), names[cp.tf1].c_str(), names[cp.tf2].c_str(),
					    orients[o], d),
				  cp.deviation[o][d]);
	      }
	    }
	  }
	  else { // d>=0
 	    if (cp.dimer_lambdas[o][d] == 0.0) {
              excluded.push_back(to_string("%s %i", orients[o], d));
	    }
	  }
	}
      }
      printf("Cob %s excluded cases: %s\n", cp.name().c_str(), print_vector(excluded).c_str());
    }
  }

  assert(fabs(background_lambda + sum(fixed_lambda) + total_dimer_lambda) - 1.0 < 0.001);

  printf("\n");

  printf("Background distribution: %s\n", print_vector(bg_model).c_str());
  printf("Fixed seeds are %s\n", print_vector(fixed_seed).c_str());

  for (int k=0; k < fixed_p; ++k) {
    write_matrix(stdout, fixed_PWM[k], to_string("Fixed matrix %i:\n", k), "%.6f");
    if (use_output)
      write_matrix_file(to_string("%s/%s.pfm", outputdir.c_str(), names[k].c_str()), fixed_PWM[k]);
    fixed_av_ic[k] = average_information_content(fixed_PWM[k], bg_model);
  }
  printf("Average information content in fixed PWMs: %s\n", print_vector(fixed_av_ic).c_str());

  printf("%s\n", std::string(40, '*').c_str());

  
  return fixed_PWM;

} // end multi_profile_em_algorithm





bool
ends_with(const std::string& s, const std::string& ending)
{
  int k=ending.length();
  if (s.length() < k)
    return false;

  return s.substr(s.length()-k, k) == ending;
}

std::string
replace_ending(std::string s, const std::string& old_ending, const std::string& new_ending)
{
  int k = old_ending.length();
  assert(s.length() >= k);

  s.replace(s.length()- k, k, new_ending);
  return s;
}


// print a help message of positional parameters,
// using boost's program options library
void
print_positional_options(const po::positional_options_description& popt,
			 const po::options_description& o)
{
  printf("Positional parameters:\n");
  int count = popt.max_total_count();
  for (int i=0; i < count; ++i) {
    const std::string& name = popt.name_for_position(i);
    const std::string& desc =  o.find(name, false).description();
    printf("  %-22s%s\n", name.c_str(), desc.c_str());
  }
}

/*
// Not used currently
class mytempfile
{
public:

  mytempfile() 
  {
    char templ[] = "/tmp/mytempXXXXXX";
    fd = mkstemp(templ);
    //    f = fdopen(fd);
    name = templ;
    reference_count = new int(1);
    //    *reference_count = 1;
  }

  mytempfile(const mytempfile& orig) {
    fd = orig.fd;
    reference_count = orig.reference_count;
    name = orig.name;
    ++*reference_count;
  }

  ~mytempfile() 
  {
    --*reference_count;
    if (*reference_count == 0) {
      //    fclose(f);
      close(fd);
      remove(name.c_str());
      delete reference_count;
    }
  }

  std::string name;
  //  FILE* f;
private:


  // NOT IMPLEMENTED
  mytempfile&
  operator==(const mytempfile& rhs)
  {
    return *this;
  }

  int fd;
  int* reference_count;
};


// Not used!!!
void
pwm_list_to_logos(const std::string& result_filename, const std::vector<dmatrix>& matrices)
{
  int p = matrices.size();
  std::string append_command = "convert ";
  std::vector<mytempfile> output;
  bool use_spacek = true;  // Use Jussi's program to create logos

  std::string logo_program;
  if (use_spacek)
    logo_program = "spacek40 --logo";
  else
    logo_program = "to_logo.sh";

  for (int i=0; i < p; ++i) {
    mytempfile input;
    output.push_back(mytempfile());

    write_matrix_file(input.name, matrices[i]);

    std::string logo_command = to_string("%s %s %s > /dev/null", 
					 logo_program.c_str(), input.name.c_str(), output[i].name.c_str());
    printf("%s\n", logo_command.c_str());

    int code = system(logo_command.c_str());
    //error(code != 0, "system returned non-zero result");
    if (code != 0)
      return;
    append_command += " ";
    append_command += output[i].name;
    if (use_spacek)
      append_command += ".png";
  }
  append_command += " ";
  if (use_spacek)
    append_command += " -append ";
  else
    append_command += " +append ";
  append_command += result_filename;
  printf("%s\n", append_command.c_str());
  int code = system(append_command.c_str());
  //error(code != 0, "system returned non-zero result");
  if (code != 0)
    return;

}
*/

dmatrix
multinomial1_multimer_bernoulli_corrected(const std::string& seed, const std::vector<std::string>& sequences)
{
  int lines = sequences.size();
  int L = sequences[0].length();
  int k = seed.length();
  int seed_count;
  int multinomial_count;
  dmatrix multinomial1_motif;
  boost::tie(multinomial1_motif, seed_count, multinomial_count) = find_snips_multimer_helper(seed, sequences, use_rna);
  double mean = lines * (use_two_strands ? 2 : 1) * (L-k+1) * pow(4, -k);
      
  dmatrix mean_matrix(4, k);
  mean_matrix.fill_with(mean);
  dmatrix result;
  result = multinomial1_motif - mean_matrix;
  
  result.apply(cut);

  return result;
}

cob_params_t
create_cob(cob_combination_t cob_combination,
	   const std::vector<std::string>& fixed_seeds,
	   const std::vector<dmatrix>& fixed_M,
	   double dimer_lambda_fraction,
	   const std::vector<std::string>& sequences,
	   const std::vector<int>& L,
	   const gapped_kmer_context& my_gapped_kmer_context, int dmin, int dmax, int max_dist_for_deviation)
{
  int tf1, tf2;
  boost::tie(tf1, tf2) = cob_combination;
  //  int L = sequences[0].length();
  int number_of_orientations = use_rna ? 1 : 3;
  if (tf1 != tf2)
    ++number_of_orientations;  // hetero dimer
  
  const int number_of_spaced_dimer_cases = use_dimers ? (dmax + 1) * number_of_orientations : 0;
  const int number_of_overlapping_dimer_cases = use_dimers ? (-dmin) * number_of_orientations : 0;
    

  //typedef typename double_cob_type::extent_range range;
  typedef boost::multi_array<double, 2>::extent_range range;
  boost::multi_array<double, 2> dimer_lambdas(boost::extents[number_of_orientations][range(dmin,dmax+1)]);
  int temp = std::min(0, dmax+1);
  boost::multi_array<std::string, 2> dimer_seeds(boost::extents[number_of_orientations][range(dmin,temp)]);

  boost::multi_array<dmatrix, 2> overlapping_dimer_PWM(boost::extents[number_of_orientations][range(dmin, max_dist_for_deviation+1)]);


  if (use_dimers) {
    // These were given on the command line
    double uniform_lambda = dimer_lambda_fraction / (number_of_overlapping_dimer_cases + number_of_spaced_dimer_cases);
    
    // Fill dimer matrix with the rest of overlapping cases
    for (int o=0; o < number_of_orientations; ++o) {
      std::string seed1, seed2;
      boost::tie(seed1, seed2) = get_seeds_according_to_hetero_orientation(o, fixed_seeds[tf1], fixed_seeds[tf2], use_rna);
      for (int d=dmin; d < std::min(0, dmax+1); ++d) {
	if (dimer_seeds[o][d] != "")                  // value already set
	  continue;
	dimer_seeds[o][d] = create_overlapping_seed(seed1, seed2, d, my_gapped_kmer_context);
	dimer_lambdas[o][d] = uniform_lambda;
      }
    }
 
    // Fill dimer matrix with the rest of spaced cases
    for (int o=0; o < number_of_orientations; ++o) {
      for (int d=0; d <= dmax; ++d) {
	if (dimer_lambdas[o][d] != 0.0)                  // value already set
	  continue;
	dimer_lambdas[o][d] = uniform_lambda;
	//	spaced_dimer_lambda.push_back(uniform_lambda);
      }
    }

  } // end if use_dimers


  //  if (seeds_given) {
  std::vector<boost::tuple<dmatrix,dmatrix> > oriented_dimer_matrices(4);
  for (int o=0; o < number_of_orientations; ++o) {
    oriented_dimer_matrices[o] = get_matrices_according_to_hetero_orientation(o, fixed_M[tf1], fixed_M[tf2], use_rna);
    
    for (int d=dmin; d < std::min(0, dmax+1); ++d) {
      std::string seed = dimer_seeds[o][d];
      overlapping_dimer_PWM[o][d] = multinomial1_multimer_bernoulli_corrected(seed, sequences);
      //       = find_snips_multimer(seed, sequences);
      if (use_pseudo_counts)
	pseudo_counts.add(overlapping_dimer_PWM[o][d]);
      normalize_matrix_columns(overlapping_dimer_PWM[o][d]);
    }  // end for d
    for (int d=0; d <= max_dist_for_deviation; ++d) {
      overlapping_dimer_PWM[o][d] =
	normalize_matrix_columns_copy(matrix_product(oriented_dimer_matrices[o].get<0>(),
						     oriented_dimer_matrices[o].get<1>(),
						     d));


	}
  } // end for o
  //  }

  cob_params_t cb(tf1, tf2, dimer_lambdas, dimer_seeds, overlapping_dimer_PWM, fixed_seeds, L, dmin, dmax, max_dist_for_deviation);

  return cb;
}


dmatrix
get_meme_init_pwm(const std::string& seed)
{
  double x = 0.5;
  int k = seed.length();
  dmatrix result(4, k);
  for (int c=0; c < k; ++c) {
    int seed_row = to_int(seed[c]);
    for (int r=0; r < 4; ++r) {
      result(r,c) = (r == seed_row ? x : x / 3);
    }
  }

  return result;
}

std::vector<cob_combination_t>
get_all_cob_combinations(int fixed_p)
{
  std::vector<cob_combination_t> cob_combinations;
  for (int i=0; i < fixed_p; ++i) {
    for (int j=i; j < fixed_p; ++j) {
      cob_combinations.push_back(boost::make_tuple(i,j));
    }
  }
  return cob_combinations;
}

void
reverse_complement_sequences(std::vector<std::string>& sequences)
{
  int lines = sequences.size();
  for (int i=0; i < lines; ++i)
    sequences[i] = reverse_complement(sequences[i]);
}

void
reverse_complement_rna_sequences(std::vector<std::string>& sequences)
{
  int lines = sequences.size();
  for (int i=0; i < lines; ++i)
    sequences[i] = reverse_complement_rna(sequences[i]);
}

int main(int argc, char* argv[])
{
  TIME_START(t);
  WALL_TIME_START(t2);
#ifdef FE_NOMASK_ENV
  feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);   // These exceptions cause trap to occur
#endif

  if (argc > 1) {
    printf("Command line was: ");
    print_command_line(argc, argv);
  }
  
  use_two_strands = true;
  bool dmin_given = false;
  bool dmax_given = false;
  bool max_dist_for_deviation_given = false;
  bool use_reverse_strand=false;
  int unique_param = -1; // either -1, 0, 1, ...
  std::vector<int> dmina;
  std::vector<int> dmaxa;
  std::vector<int> max_dist_for_deviationa;
  using namespace boost;
  using std::string;
  using std::cout;
  double epsilon = 0.01;
  double extension_threshold = 2.00;
  std::vector<cob_combination_t> cob_combinations;
  string prior_parameter = "dirichlet";

  std::vector<std::string> sequences;
  std::vector<bool> keep_monomer_fixed;
  
  combine_seeds_func = combine_seeds_methods[default_combine_method].function;

  int number_of_threads=1;
#if defined(_OPENMP)
  {
    char* p = getenv("OMP_NUM_THREADS"); 
    if (p)
      number_of_threads = atoi(p);
  }
#endif

  
  // Declare the supported options.
  po::options_description hidden("Allowed options");
  hidden.add_options()
    ("sequencefile",                           "Name of the sequence file containing sequences")
    ("parameterlist", po::value<string>(), "Seeds, or names of the initial matrix files, separated by commas")
    ;

  po::options_description nonhidden("Allowed options");
  nonhidden.add_options()
    ("help",                           "Produce help message")
    ("matrices",                          "Matrix filenames are given as parameters instead of seeds")
    ("keep-monomer-fixed",            po::value<std::string>(), "A list of indices of monomers to keep fixed during EM algorithms. You can also specify 'all' as parameter, default: no monomers are fixed.")
    ("disable-multinomial",                    m("Use plain alignment of sequences instead of the multinomial model", not use_multinomial).c_str())
    ("directional-seed",                    m("Are overlapping seeds required to be non-palindromic", require_directional_seed).c_str())
    ("hamming-radius", po::value<int>(),     m("Hamming radius", hamming_radius).c_str())
    ("meme-init",                    m("Derive the initial PWM from the seed using MEME style initialization instead of multinomial style", use_meme_init).c_str())
    ("flanks",  m("Get full flanks for each model", get_full_flanks).c_str())

    ("max-iter", po::value<int>(),   m("Maximum number of iterations", max_iter).c_str())
    ("epsilon", po::value<double>(),        m("Epsilon defines convergence of EM (double). Elementwise maximum distance between two consequent matrices", epsilon).c_str())

    ("cob", po::value<string>(),     "List of cob tables wanted. Example format: --cob '0-0,1-1,0-1', "
                                     "where the numbers refer to the indices of the list of monomers. "
                                     "Note, that the indexing starts from zero. "
                                     "Use option '--cob all' to create all combinations.")
    ("cob-cutoff", po::value<double>(),        m("Cob cutoff constant", cob_cutoff).c_str())

    ("dmin", po::value<std::string>(),   "Smallest negative distance in dimer, comma separated list or a single global value, default: half of the length of the shorter monomer")
    ("dmax", po::value<std::string>(),   m("Maximum positive distance in dimer, comma separated list or a single global value", global_dmax).c_str())
    ("max-gap-learned", po::value<std::string>(),   m("Maximum positive distance in dimer for which the gap is learned, comma separated list or a single global value", global_max_dist_for_deviation).c_str())
    ("minimum-distance-for-learning", po::value<int>(),   m("Minimum distance in dimer cases for a site to be used for monomer model learning", minimum_distance_for_learning).c_str())
    ("ic-threshold", po::value<double>(),   m("Information content threshold for an overlapping dimer to be accepted", ic_threshold).c_str())
    ("min-fraction-for-learning", po::value<double>(),   m("The monomers are learned from the dimeric cases alone, when the fraction of dimeric cases "
							   "with distance >=minimum-distance-for-learning is at least this threshold", learning_fraction).c_str())
    
    // positional-background is not implemented currently
    //    ("positional-background",        m("Use positional model for background", use_positional_background).c_str())

    // not implemented
    //("markov-model",                 m("Use markov model for background", use_markov_background).c_str())

    // not used currently ("extension-threshold", po::value<double>(),   m("Information content threshold for extending models. Default: no extension", extension_threshold).c_str())
    //("fixed-lambdalist", po::value<string>(),     "Comma separated list of mixing parameters, one for each matrix")


    ("unbound", po::value<string>(&unbound), "File to store the unbound sequences")
    ("output", m("Write model parameters to files", use_output).c_str())
    ("names", po::value<string>(),     "Names for the monomer models. Comma separated list. Default: TF0,TF1, ...")
    ("outputdir", po::value<string>(), m("Directory where to put the learned matrices", outputdir).c_str())
    ("number-of-threads", po::value<int>(), m("Number of parallel threads", number_of_threads).c_str())
    ("prior", po::value<std::string>(), m("Choose either addone or dirichlet prior, or none for no pseudo-counts", prior_parameter).c_str())
    ("single-strand", m("Only consider binding sites in forward strand", not use_two_strands).c_str())
    ("reverse-strand", m("Take reverse complement of each input strand. Implies single strand.", use_reverse_strand).c_str())
    ("rna", m("Assume input sequences are RNA (experimental)", use_rna).c_str())
    ("unique", po::value<std::string>(), "Uniqueness of sequences. Either off, unique, 1, 2, 3, ..., default: off")
    ("quiet", m("Don't print intermediate results", not local_debug).c_str())
    ;

  po::options_description desc("Allowed options");
  desc.add(hidden).add(nonhidden);

  po::positional_options_description popt;
  popt.add("sequencefile", 1);
  popt.add("parameterlist", 1);

  
  string seqsfile;
  std::vector<std::string> fixedmatrixfilelist;
  std::vector<std::string> fixedseedlist;

  double background_lambda;
  std::vector<double> fixed_lambda;
  std::vector<double> overlapping_dimer_lambda;
  std::vector<double> spaced_dimer_lambda;



  po::variables_map vm;
  int fixed_p = 0; // number of fixed models
  boost::multi_array<std::string, 2> dimer_seeds;

   ////////////////////////////////
  // parse command line parameters
  ////////////////////////////////

  try {
    po::store(po::command_line_parser(argc, argv).
	      options(desc).positional(popt).run(), vm);
    po::notify(vm);

    // are all positional parameters present
    bool all_positional = vm.count("sequencefile") && vm.count("parameterlist");

    if (vm.count("help") || not all_positional) {
      cout << basename(argv[0]) << " version " << PACKAGE_VERSION << std::endl;
      cout << "Usage:\n" 
	   << basename(argv[0]) << " [options] sequencefile seedlist\n"
	   << basename(argv[0]) << " [options] --matrices sequencefile matrixfilelist\n";
      print_positional_options(popt, hidden);
      cout << nonhidden << "\n";
      return 1;
    }

    if (vm.count("rna")) 
      use_rna = true;

    if (vm.count("reverse-strand")) {
      use_two_strands = false;
      use_reverse_strand = true;
    }

    if (vm.count("markov-model")) 
      use_markov_background = true;

    if (vm.count("single-strand")) 
      use_two_strands = false;

    if (vm.count("unique")) {
      std::string arg = vm["unique"].as< string >();
      if (arg == "off")
	unique_param = -1;
      else if (arg == "unique")
	unique_param = 0;
      else {
	unique_param = atoi(arg);
	error(unique_param <= 0, "Argument to --unique must be one of the following: off, unique, 1, 2, 3, ...");
      }
    }
    

    if (vm.count("quiet")) 
      local_debug = false;

    if (vm.count("flanks")) 
      get_full_flanks = true;

    if (vm.count("positional-background")) 
      use_positional_background = true;

    if (vm.count("disable-multinomial")) 
      use_multinomial = false;
 
    if (vm.count("directional-seed")) 
      require_directional_seed = true;

    if (vm.count("meme-init")) 
      use_meme_init = true;

    if (vm.count("output")) 
      use_output = true;

    if (vm.count("outputdir")) {
      outputdir = vm["outputdir"].as< string >();
      use_output = true;
    }

    if (vm.count("prior")) {
      prior_parameter = vm["prior"].as< string >();
      if (prior_parameter == "addone" || prior_parameter == "dirichlet")
	use_pseudo_counts=true;
      else if (prior_parameter == "none") {
	use_pseudo_counts=false;
      }
      else {
	error(true, to_string("Invalid prior: %s.\nAllowed options are addone, dirichlet, or none\n", prior_parameter.c_str()));
      }
    }   

#if defined(_OPENMP)
    if (vm.count("number-of-threads")) {
      number_of_threads = vm["number-of-threads"].as< int >();
    }
    omp_set_num_threads(number_of_threads);
#endif

    if (vm.count("hamming-radius"))
      hamming_radius = vm["hamming-radius"].as< int >();
    error(hamming_radius <= 0, "hamming-radius must be positive integer.");

    if (vm.count("max-iter"))
      max_iter = vm["max-iter"].as< int >();
    error(max_iter <= 0, "max-iter must be positive integer.");

    if (vm.count("cob-cutoff"))
      cob_cutoff = vm["cob-cutoff"].as< double >();
    error(cob_cutoff <= 0.0 or cob_cutoff >= 1.0, "cob-cutoff must be between 0 and 1.");

    if (vm.count("minimum-distance-for-learning"))
      minimum_distance_for_learning = vm["minimum-distance-for-learning"].as< int >();
    error(minimum_distance_for_learning < 0, "minimum-distance-for-learning must be non-negative.");

    if (vm.count("min-fraction-for-learning"))
      learning_fraction = vm["min-fraction-for-learning"].as< double >();
    error(learning_fraction < 0.0 or learning_fraction > 1.0, "min-fraction-for-learning must be between 0 and 1.");

    if (vm.count("ic-threshold"))
      ic_threshold = vm["ic-threshold"].as< double >();
    error(ic_threshold < 0.0 or ic_threshold > 1.0, "ic-threshold must be between 0 and 1.");

    seqsfile   = vm["sequencefile"].as< string >();

    if (vm.count("overlap-maximize")) {
      hamming_distance_overlapping_seeds_N = vm["overlap-maximize"].as< int >();
      maximize_overlapping_seeds = true;
    }
    error(hamming_distance_overlapping_seeds_N < 0 or hamming_distance_overlapping_seeds_N > 2, "overlap maximize must be either 0, 1 or 2.");


    if (vm.count("overlap-combine")) {
      std::string method = vm["overlap-combine"].as<std::string>();
      if (method == combine_seeds_methods[COMBINE_SEEDS_OR].name)
	combine_seeds_func = combine_seeds_methods[COMBINE_SEEDS_OR].function;
      else if (method == combine_seeds_methods[COMBINE_SEEDS_AND].name)
	combine_seeds_func = combine_seeds_methods[COMBINE_SEEDS_AND].function;
      else if (method == combine_seeds_methods[COMBINE_SEEDS_N].name)
	combine_seeds_func = combine_seeds_methods[COMBINE_SEEDS_N].function;
      else {
	error(true, "The overlap-combine method must be either union, intersection or N.");
      }

    }

    // Command line parameters for PWM models

    if (vm.count("parameterlist")) {
      if (vm.count("matrices")) {         // matrices were given
	fixedmatrixfilelist = split(vm["parameterlist"].as< string >(), ',');
	fixed_p = fixedmatrixfilelist.size();   // number of monomer models
	seeds_given=false;
      } else {                         // Initial seed were given
	fixedseedlist = split(vm["parameterlist"].as< string >(), ',');
	BOOST_FOREACH(std::string& seed, fixedseedlist) {
	  boost::to_upper(seed);
	  error(not is_iupac_string(seed), "Seed must be an IUPAC string");
	}
	seeds_given=true;
	fixed_p = fixedseedlist.size();   // number of models
      }
    }

    if (vm.count("cob")) {
      if (vm["cob"].as<std::string>() == "all")
	cob_combinations = get_all_cob_combinations(fixed_p);
      else {
	std::vector<std::string> cob_tables = split(vm["cob"].as<std::string>(), ',');
	BOOST_FOREACH(std::string s, cob_tables) {
	  std::vector<std::string> temp = split(s, '-');
	  error(temp.size() != 2,
		to_string("Cob combinations given to --cob should be pairs of integers separated by '-'. Error in %s", s.c_str()));

	  int tf1 = atoi(temp[0]);
	  int tf2 = atoi(temp[1]);
	  error(tf1 < 0 or tf1 >= fixed_p, to_string("TF index pair in %s out of range.", s.c_str()));
	  error(tf2 < 0 or tf2 >= fixed_p, to_string("TF index pair in %s out of range.", s.c_str()));
	  cob_combinations.push_back(make_tuple(tf1, tf2));
	}
      }
    }
    int number_of_cobs = cob_combinations.size();
    std::string cob_combinations_string;
    for (int i=0; i < number_of_cobs; ++i) {
      std::string c = to_string("%i-%i", cob_combinations[i].get<0>(), cob_combinations[i].get<1>());
      if (i == 0)
	cob_combinations_string = c;
      else
	cob_combinations_string.append("," + c);
    }
    printf("Cob combinations are %s\n", cob_combinations_string.c_str());
    if (vm.count("dmin")) {
      std::vector<std::string> dmin_array = split(vm["dmin"].as<std::string>(), ',');

      if (dmin_array.size() == 1 and number_of_cobs > 0) {
	int value = atoi(dmin_array[0]);
	error(value > 0, "dmin values must be non-positive");
	dmina.assign(number_of_cobs, value);
      }
      else {
	error(dmin_array.size() != number_of_cobs, "Different number of cob tables and dmin values");
	BOOST_FOREACH(std::string s, dmin_array) {
	  int value = atoi(s);
	  error(value > 0, "dmin values must be non-positive");
	  dmina.push_back(value);
	}
      }
      dmin_given = true;
    }

    if (vm.count("dmax")) {
      std::vector<std::string> dmax_array = split(vm["dmax"].as<std::string>(), ',');

      if (dmax_array.size() == 1 and number_of_cobs > 0) {
	int value = atoi(dmax_array[0]);
	error(value < 0, "dmax values must be non-negative");
	dmaxa.assign(number_of_cobs, value);
      }
      else {
	error(dmax_array.size() != number_of_cobs, "Different number of cob tables and dmax values");
	BOOST_FOREACH(std::string s, dmax_array) {
	  int value = atoi(s);
	  error(value < 0, "dmax values must be non-negative");
	  dmaxa.push_back(value);
	}
      }
      dmax_given = true;
    }

    if (vm.count("max-gap-learned")) {
      std::vector<std::string> max_dist_for_deviation_array = split(vm["max-gap-learned"].as<std::string>(), ',');

      if (max_dist_for_deviation_array.size() == 1 and number_of_cobs > 0) {
	int value = atoi(max_dist_for_deviation_array[0]);
	//error(value < 0, "dmax values must be non-negative");
	max_dist_for_deviationa.assign(number_of_cobs, value);
      }
      else {
	error(max_dist_for_deviation_array.size() != number_of_cobs,
	      "Different number of cob tables and max-gap-learned values");
	BOOST_FOREACH(std::string s, max_dist_for_deviation_array) {
	  int value = atoi(s);
	  //error(value < 0, "dmax values must be non-negative");
	  max_dist_for_deviationa.push_back(value);
	}
      }
      max_dist_for_deviation_given = true;
    }

    if (vm.count("names")) {         // Names given for the monomer models/seeds
      std::vector<std::string> namelist = split(vm["names"].as< string >(), ',');
      error(namelist.size() != fixed_p, "For each seed/pfm a name should be given");
      names = namelist;
      // BOOST_FOREACH(std::string& seed, fixedseedlist) {
      // }
    }
    else {
      names.clear();
      for (int i=0; i < fixed_p; ++i)
	names.push_back(to_string("TF%i", i));
    }

    fixed_lambda.assign(fixed_p, 0.0);  // Will be assigned later

    if (vm.count("keep-monomer-fixed")) {
      std::string param = vm["keep-monomer-fixed"].as< std::string >();
      if (param == "all") 
	keep_monomer_fixed.assign(fixed_p, true);
      else {
	keep_monomer_fixed.assign(fixed_p, false);
	std::vector<std::string> param_list = split(param, ',');
	BOOST_FOREACH(std::string s, param_list) {
	  int tf = atoi(s);
	  if (tf < 0 || tf >= fixed_p) {
	    fprintf(stderr, "Parameter %s to option --keep-monomer-fixed has to be an integer in the range [0,%i)\n", s.c_str(), fixed_p);
	    exit(1);
	  }
	  keep_monomer_fixed[tf] = true;
	}
      }
    }
    else
      keep_monomer_fixed.assign(fixed_p, false);

    if (vm.count("epsilon")) {
      epsilon    = vm["epsilon"].as< double >();
      error(epsilon <= 0.0, "Epsilon must be positive real number.");
    }

    if (vm.count("extension-threshold")) {
      extension_threshold = vm["extension-threshold"].as< double >();
      allow_extension = true;
      error(extension_threshold <= 0.0, "Epsilon must be positive real number.");
    }

  }
  catch (std::exception& e) {
    std::cerr << e.what() << std::endl;
    std::cerr << desc << "\n";
    exit(1);
  }


  char hostname[200+1];
  hostname[200] = 0;
  gethostname(hostname, 200);
  time_t now;
  time(&now);
  printf("Starting program at %s", asctime(localtime(&now))); 
  printf("MODER version %s\n", PACKAGE_VERSION);
  printf("Running on host: %s\n", hostname);
  printf("Using %i openmp threads.\n", number_of_threads);
  
  if (combine_seeds_func == combine_seeds_OR)
    printf("Using OR combine function.\n");
  else if (combine_seeds_func == combine_seeds_AND)
    printf("Using AND combine function.\n");
  else if (combine_seeds_func == combine_seeds_masked)
    printf("Using masked (N) combine function.\n");
  else {
    error(true, "Error: combine_seeds_func not initialized.\n");
  }
  
  double background_lambda_fraction = 0.5;
  double PWM_lambda_fraction = cob_combinations.size() == 0 ? 0.5 : 0.3;
  double per_PWM_lambda_fraction = PWM_lambda_fraction / fixed_p;
  double dimer_lambda_fraction = 
    1.0 - (background_lambda_fraction + fixed_p * per_PWM_lambda_fraction);


  background_lambda = background_lambda_fraction;

  for (int k=0; k < fixed_p; ++k) {
    fixed_lambda[k] = per_PWM_lambda_fraction;
  }

  if (use_rna) {
    orients = rna_orients;
    use_two_strands = false;
  }
  
  int lines, bad_lines;


  std::vector<std::string> parts = split(seqsfile, '.');
  std::string extension = boost::to_lower_copy(parts.back());

  if (extension == "fasta" or extension == "fa") {
    printf("Reading from fasta file %s\n", seqsfile.c_str());
    boost::tie(lines, bad_lines) = read_fasta_sequences(seqsfile, sequences);
  }
  else if (extension == "fastq") {
    printf("Reading from fastq file %s\n", seqsfile.c_str());
    boost::tie(lines, bad_lines) = read_fastq_sequences(seqsfile, sequences);
  }
  else {
    printf("Reading from sequence-per-line file %s\n", seqsfile.c_str());
    boost::tie(lines, bad_lines) = read_sequences(seqsfile, sequences);
  }

  if (use_rna)
    check_data(sequences, "ACGU");
  else    
    check_data(sequences);
  
  if (use_reverse_strand) {
    if (use_rna)
      reverse_complement_rna_sequences(sequences);
    else
      reverse_complement_sequences(sequences);
  }

  printf("Read %zu good lines from file %s\n", sequences.size(), seqsfile.c_str());
  printf("Discarded %i bad lines\n", bad_lines);
  if (unique_param >= 0) {
    TIME_START(s1);
    sequences = remove_duplicate_reads_faster(sequences, unique_param);
    TIME_PRINT("\tRemoving Hamming duplicates took %.2f seconds.\n", s1);
  }
  printf("Using %zu sequences\n", sequences.size());
  std::vector<int> L(lines);
  int Lmin = std::numeric_limits<int>::max();
  int Lmax = std::numeric_limits<int>::min();
  for (int i=0; i < lines; ++i) {
    L[i] = sequences[i].length();
    if (L[i] < Lmin)
      Lmin = L[i];
    if (L[i] > Lmax)
      Lmax = L[i];
  }
  printf("Minimum sequence length is %i\n", Lmin);
  printf("Maximum sequence length is %i\n", Lmax);
  printf("Use markov model for background: %s\n", 
	 yesno(use_markov_background));
  printf("Use positional model for background: %s\n", 
	 yesno(use_positional_background));
  printf("Use RNA alphabet: %s\n", yesno(use_rna));
  printf("Use two dna strands: %s\n", yesno(use_two_strands));
  printf("Avoid palindromes: %s\n", yesno(avoid_palindromes));
  printf("Use reverse strand: %s\n", yesno(use_reverse_strand));
  printf("Use multinomial model: %s\n", yesno(use_multinomial));
  if (use_multinomial)
    printf("Hamming radius is %i\n", hamming_radius);  
  printf("Use pseudo counts: %s\n", yesno(use_pseudo_counts));
  printf("Get full flanks: %s\n", yesno(get_full_flanks));
  printf("Allow extension: %s\n", yesno(allow_extension));
  if (allow_extension)
    printf("extension threshold: %.2f\n", extension_threshold);
  printf("Minimum distance for learning is %i\n", minimum_distance_for_learning);
  printf("Minimum fraction for learning is %f\n", learning_fraction);
  printf("Information content threshold is %f\n", ic_threshold);
  printf("Cob cutoff constant is %f\n", cob_cutoff);
  
  
  printf("Epsilon is %g\n", epsilon);
  printf("Maximum number of iterations is %i\n", max_iter);
  std::vector<int> character_frequencies;
  
  boost::tie(background_frequencies, background_frequency_matrix, character_frequencies) = count_background(sequences, use_rna);
  int CG = background_frequencies[1] + background_frequencies[2];
  printf("CG content: %lg\n", (double)CG/sum(background_frequencies));
  printf("Sequences contain %i characters\n", character_count);

  //if (use_markov_background) {
  printf("Background frequency matrix:\n");
  background_frequency_matrix.print(10);
  printf("Di-nucleotide count is %i\n", digram_count);
  background_probability_matrix = background_frequency_matrix;
  normalize_matrix_rows(background_probability_matrix);
  printf("Background probility matrix:\n");
  background_probability_matrix.print(10);
  assert(is_row_stochastic_matrix(background_probability_matrix));
    //}

  // normalize background vector
  background_probabilities = background_frequencies;
  normalize_vector(background_probabilities);
  
  printf("Empirical background probability: ");
  printf("[%g,%g,%g,%g]\n", 
	 background_probabilities[0],background_probabilities[1],
	 background_probabilities[2],background_probabilities[3]);

  printf("Using background probability: ");
  printf("[%g,%g,%g,%g]\n", background_probabilities[0],background_probabilities[1],
	 background_probabilities[2],background_probabilities[3]);

  if (prior_parameter == "addone")
    pseudo_counts.use_add_one(0.000001);
  else if (prior_parameter == "dirichlet")
    pseudo_counts.use_dirichlet(0.01, background_probabilities);

  if (use_pseudo_counts) {
    printf("Use prior: %s\n", prior_parameter.c_str());
    printf("\twith probabilities: ");
    std::vector<double> d = pseudo_counts.get();
    for (int i=0; i < 4; ++i)
      printf("%.4e ", d[i]);
    printf("\n");
  }

  //  positional_background = count_positional_background(sequences);

  //if (use_pseudo_counts)
  //  add_pseudo_counts(positional_background);
  normalize_matrix_columns(positional_background);
  assert(is_column_stochastic_matrix(positional_background));
  // assert(is_palindromic_matrix(positional_background));
  // write_matrix(stdout, positional_background, "Positional background probility matrix:\n", "%10lf");

  printf("Keep monomer fixed: %s\n", print_vector(keep_monomer_fixed).c_str());

  // initial motifs
  std::vector<dmatrix> fixed_M(fixed_p);

  if (seeds_given) {
    printf("Initial fixed seeds are %s\n", print_vector(fixedseedlist).c_str());

    char forbidden_char = use_rna ? 'T' : 'U';
    for (int k=0; k < fixed_p; ++k) {
      error(fixedseedlist[k].length() > Lmin, "Seed is longer than the sequences");
      int count = std::count(fixedseedlist[k].begin(), fixedseedlist[k].end(), forbidden_char);
      if (count > 0) {
	fprintf(stderr, "Nucleotide %c forbidden in current alphabet\n", forbidden_char);
	exit(1);
      }
      if (not use_meme_init) {
	fixed_M[k] = multinomial1_multimer_bernoulli_corrected(fixedseedlist[k], sequences);
	//fixed_M[k] = find_snips_multimer_helper(fixedseedlist[k], sequences).get<0>();
	write_matrix(stdout, fixed_M[k], to_string("Unnormalized initial fixed matrix %i from seed %s:\n", 
						   k, fixedseedlist[k].c_str()), "%.6f");
	if (use_pseudo_counts)
	  pseudo_counts.add(fixed_M[k]);
	normalize_matrix_columns(fixed_M[k]);
	if (use_multinomial and adjust_seeds)
	  fixedseedlist[k] = string_giving_max_score(fixed_M[k], use_rna);
      }
      else
	fixed_M[k] = get_meme_init_pwm(fixedseedlist[k]);
      write_matrix(stdout, fixed_M[k], to_string("Initial fixed matrix %i from seed %s:\n", 
						 k, fixedseedlist[k].c_str()), "%.6f");
    }
  } else {
    fixedseedlist.resize(fixed_p);
    for (int k=0; k < fixed_p; ++k) {
      fixed_M[k] = read_matrix_file(fixedmatrixfilelist[k]);
      error(fixed_M[k].get_columns() > Lmin, "Matrix is wider than the sequences");
      if (use_pseudo_counts)
	pseudo_counts.add(fixed_M[k]);
      normalize_matrix_columns(fixed_M[k]);
      write_matrix(stdout, fixed_M[k], to_string("Initial matrix %i from file %s:\n", k, fixedmatrixfilelist[k].c_str()), "%.6f");
      assert(is_column_stochastic_matrix(fixed_M[k]));
      fixedseedlist[k] = string_giving_max_score(fixed_M[k], use_rna);
      //if (use_multinomial)
      //	assert(M[k].get_columns() == seedlist[k].length());
    }
    printf("Initial fixed seeds are %s\n", print_vector(fixedseedlist).c_str());

    printf("Read %i matrices\n", fixed_p);
    
  }



  gapped_kmer_context my_gapped_kmer_context(sequences);

  if (outputdir != ".")
    mkdir(outputdir.c_str(), S_IRWXU);
  //pwm_list_to_logos(to_string("%s/init.png", outputdir.c_str()), M);

  if (cob_combinations.size() > 0)
    dimer_lambda_fraction /= cob_combinations.size();

    
  std::vector<cob_params_t> my_cob_params;
  int i=0;
  BOOST_FOREACH(cob_combination_t cob_combination, cob_combinations) {
    int tf1, tf2;
    boost::tie(tf1, tf2) = cob_combination;
    int dmin, dmax;
    int max_dist_for_deviation;   // The deviation tables will be learned for distances in [dmin,max_dist_for_deviation]
    int k1 = fixedseedlist[tf1].length();
    int k2 = fixedseedlist[tf2].length();
    int mink = std::min(k1, k2);

    if (dmin_given)
      dmin = dmina[i];
    else
      dmin = -mink/2;

    if (dmax_given)
      dmax = dmaxa[i];
    else
      dmax = global_dmax;

    if (max_dist_for_deviation_given)
      max_dist_for_deviation = max_dist_for_deviationa[i];
    else
      max_dist_for_deviation = global_max_dist_for_deviation;
    
    // Make sure these are within reasonable limits
    dmin = std::max(-mink + 1, dmin);
    //    dmax = std::min(dmax, Lmax - k1 - k2);
    dmax = std::min(dmax, Lmin - k1 - k2);
    error(dmax < dmin, "Requested dimeric cases do not fit into the input sequence");
    max_dist_for_deviation = std::min(max_dist_for_deviation, dmax);
    cob_params_t cp = create_cob(cob_combination, fixedseedlist, fixed_M, dimer_lambda_fraction, 
				 sequences, L, my_gapped_kmer_context, dmin, dmax, max_dist_for_deviation);
    cp.update_oriented_matrices(fixed_M, fixedseedlist);
    cp.compute_expected_matrices(fixed_M);
    cp.compute_deviation_matrices();
    printf("Maximum distance for gap learning for cob case %s is %i\n", cp.name().c_str(), max_dist_for_deviation);

    for (int o=0; o < cp.number_of_orientations; ++o) {
      for (int d=cp.dmin; d < std::min(0, cp.dmax+1); ++d) {
	write_matrix(stdout, cp.deviation[o][d], 
		       to_string("Initial deviation matrix %s %s %i:\n", cp.name().c_str(),
				 orients[o], d).c_str(), "%.6f");
      }
    }

    my_cob_params.push_back(cp);
    ++i;
  }

  double dimer_lambdas_sum = 0.0;
  for (int r=0; r < my_cob_params.size(); ++r)
    dimer_lambdas_sum += sum(my_cob_params[r].dimer_lambdas);
  double temp_sum = sum(fixed_lambda) + dimer_lambdas_sum + background_lambda;
  assert(fabs(temp_sum - 1.0) < 0.001);



  printf("Initial fixed lambdas are %s\n", print_vector(fixed_lambda).c_str());  

  if (use_dimers) {
    for (int r=0; r < my_cob_params.size(); ++r) {
      print_cob(stdout, my_cob_params[r].dimer_lambdas, 
		to_string("Initial dimer lambdas %s:\n", my_cob_params[r].name().c_str()), "%.5f");
      printf("Initial sum of dimer lambdas %s is %.2f\n", my_cob_params[r].name().c_str(), sum(my_cob_params[r].dimer_lambdas));
    }
  }
  
  printf("Initial background lambda is %.4f\n", background_lambda);


  // run the algorithm
  std::vector<dmatrix> res=
    multi_profile_em_algorithm(sequences, 
			       fixed_M,
			       keep_monomer_fixed,
			       background_probability_matrix, background_probabilities, 
			       background_lambda, 
			       fixed_lambda, fixedseedlist, 
			       my_cob_params,
			       epsilon, extension_threshold, my_gapped_kmer_context);
  
  TIME_PRINT("Whole program took %.2f seconds cpu-time\n", t);
  WALL_TIME_PRINT("Whole program took %.2f seconds wall-time\n", t2);

  if (false)
    SSS("dummy");   // Just make sure that code for SSS is included in the executable. For debugging purpose.
  return 0;
}
back to top