Content - 7eef112e73f26d724b7c8b3e7c44e769b8f3ee90 - 9b150e1/matrix_tools.cpp

visit type:
Tip revision: c485231e5b468ae509306e1aaeebaa0f3004572d authored by Jarkko Toivonen on 31 March 2020, 18:10:18 UTC
Fixed indexing bug.
Tip revision: c485231
matrix_tools.cpp
/*

    MODER is a program to learn DNA binding motifs from SELEX datasets.
    Copyright (C) 2016, 2017  Jarkko Toivonen,
    Department of Computer Science, University of Helsinki

    MODER is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    MODER is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

*/
#include "matrix_tools.hpp"
#include "common.hpp"

std::vector<double>
parse_row(char* tmp)
{
  std::vector<double> v;
  int m;
  double d;
  while (true) {
    if (sscanf(tmp, "%lg%n", &d, &m) != 1)
      break;
    v.push_back(d);
    tmp += m;
  }
  return v;
}

std::vector<double>
read_row(FILE* fp)
{
  char* line=NULL;
  size_t n;
  if (getline(&line, &n, fp) == -1)
    return std::vector<double>();
  std::vector<double> v = parse_row(line);
  free(line);

  return v;
}

dmatrix
dimensionless_read(FILE* fp, char* line)
{
  std::vector<double> v = parse_row(line);
  free(line);

  int cols = v.size();
  if (cols == 0)
    throw fileformat_error("Couldn't read matrix elements");

  std::vector<std::vector<double> > rows;
  rows.push_back(v);

  while (true) {
    v = read_row(fp);
    if (v.size() == cols)
      rows.push_back(v);
    else 
      break;
  }
  
  matrix<double> m(rows.size(), cols);
  for (int i=0; i < rows.size(); ++i)
    m.set_row(i, rows[i]);
  return m;

}

matrix<double>
read_matrix(FILE* fp)
{
  assert(fp != NULL);
  int rows, cols;
  double d;
  char* line=NULL;
  size_t n;
  if (getline(&line, &n, fp) == -1)
    throw fileformat_error("Couldn't read matrix elements");

  int count=sscanf(line, "%ix%i\n", &rows, &cols);
  if (count != 2)
    return dimensionless_read(fp, line);
  free(line);

  assert(count==2);
  //printf("%ix%i\n", rows, cols);

  matrix<double> m(rows, cols);
  for (int i=0; i<rows; ++i)
    for (int j=0; j<cols; ++j) {
      if (fscanf(fp, "%lg", &d) != 1)
	throw fileformat_error("Couldn't read matrix elements");
      m(i,j)=d;
    }
  return m;
}



void
write_matrix(FILE* fp, const matrix<double>& m, const std::string& tag, 
	     std::string format, bool dimensions)
{
  fprintf(fp, "%s", tag.c_str());

  int r = m.get_rows();
  int c = m.get_columns();
  if (dimensions)
    fprintf(fp, "%ix%i\n", r, c);

  if (format != "") 
    m.print3(fp, format);
  else
    m.print3(fp, "%10lf", "\t");

}

void
write_matrix_file(const std::string& matrixfile, const dmatrix& M, std::string format)
{
  FILE* fp=fopen(matrixfile.c_str(), "w");
  if (fp == NULL) {
    fprintf(stderr, "Couldn't create matrixfile %s\n", matrixfile.c_str());
    perror("write_matrix_file");
    exit(1);
  }
  write_matrix(fp, M, "", format, false); // write used to file
  fclose(fp);
  
  // This should be conditional to some log level request
  //printf("Wrote resulting matrix to file %s\n", matrixfile.c_str());
}

dmatrix
read_matrix_file(const std::string& matrixfile)
{
  FILE* fp = fopen(matrixfile.c_str(), "r");
  if (fp == NULL) {
    fprintf(stderr, "Couldn't open file %s\n", matrixfile.c_str());
    exit(1);
  }
  matrix<double> M = read_matrix(fp);
  fclose(fp);
  return M;
}

    




// void
// normalize_matrix(matrix<double>& m)
// {
//   // sums of rows should be 1
//   for (int i=0; i < m.get_rows(); ++i) {
//     double sum = 0;
//     for (int j=0; j < m.get_columns(); ++j)
//       sum += m(i,j);
    
//     for (int j=0; j < m.get_columns(); ++j)
//       m(i,j) /= sum;
//   }

// }

// void
// normalize_matrix2(matrix<double>& m)
// {
//   // sums of cols should be 1
//   for (int i=0; i < m.get_columns(); ++i) {
//     double sum = 0;
//     for (int j=0; j < m.get_rows(); ++j)
//       sum += m(j,i);
    
//     for (int j=0; j < m.get_rows(); ++j)
//       m(j,i) /= sum;
//   }

// }

void
normalize_whole_matrix(matrix<double>& m)
{
  // sums of cols should be 1
  double sum = 0;
  for (int i=0; i < m.get_columns(); ++i) {
    for (int j=0; j < m.get_rows(); ++j)
      sum += m(j,i);
  }
    
  for (int i=0; i < m.get_columns(); ++i) {
    for (int j=0; j < m.get_rows(); ++j)
      m(j,i) /= sum;
  }

}

void
normalize_matrix_rows(matrix<double>& m)
{
  // sums of rows should be 1
  for (int i=0; i < m.get_rows(); ++i) {
    double sum = 0;
    for (int j=0; j < m.get_columns(); ++j)
      sum += m(i,j);
    
    for (int j=0; j < m.get_columns(); ++j)
      m(i,j) /= sum;
  }
}

void
normalize_matrix_columns(matrix<double>& m)
{
  // sums of cols should be 1
  for (int i=0; i < m.get_columns(); ++i) {
    double sum = 0;
    for (int j=0; j < m.get_rows(); ++j)
      sum += m(j,i);
    
    for (int j=0; j < m.get_rows(); ++j)
      m(j,i) /= sum;
  }
}

dmatrix
normalize_matrix_columns_copy(matrix<double> m)
{
  normalize_matrix_columns(m);
  return m;
}

dmatrix
normalize_matrix_rows_copy(matrix<double> m)
{
  normalize_matrix_rows(m);
  return m;
}

bool
is_stochastic_matrix(const matrix<double>& m)
{
  //const double delta = 0.000001;
  const double delta = 0.00001;
  for (int i=0; i < m.get_rows(); ++i) {
    double sum = 0;
    for (int j=0; j < m.get_columns(); ++j)
      sum += m(i,j);
    if (fabs(1.0 - sum) >= delta)  // isn't close enough to 1
      return false;
  }
  return true;
}

bool
is_column_stochastic_matrix(const matrix<double>& m)
{
  return is_stochastic_matrix(transpose(m));
}

bool
is_row_stochastic_matrix(const matrix<double>& m)
{
  return is_stochastic_matrix(m);
}

bool
is_palindromic_matrix(const matrix<double>& m)
{
  int rows=m.get_rows();
  int cols=m.get_columns();
  assert(rows==4);

  int ret=1;

  int middle=static_cast<int>(ceil(cols/2.0));
  for (int j=0; j < middle; ++j) {
    ret *= (m(0, j) == m(3, cols-j-1));
    ret *= (m(1, j) == m(2, cols-j-1));
  }

  return ret;
}

dmatrix
matrix_sum(const dmatrix& m1, const dmatrix& m2, int d)
{
  int width = m1.get_columns() + d + m2.get_columns();

  dmatrix result(4, width);

  result.inject(m1, 0, 0);

  for (int i=0; i < m2.get_columns(); ++i)
    for (int c=0; c < 4; ++c)
      result(c, m1.get_columns()+d+i) += m2(c, i);

  return result;
}

dmatrix
matrix_product(const dmatrix& m1, const dmatrix& m2, int d)
{
  int width = m1.get_columns() + d + m2.get_columns();

  dmatrix result(4, width);
  result.fill_with(1.0);
  result.inject(m1, 0, 0);

  for (int i=0; i < m2.get_columns(); ++i)
    for (int c=0; c < 4; ++c)
      result(c, m1.get_columns()+d+i) *= m2(c, i);

  return result;
}
Browse the archive

https://github.com/jttoivon/MODER