https://github.com/jttoivon/MODER
Raw File
Tip revision: c485231e5b468ae509306e1aaeebaa0f3004572d authored by Jarkko Toivonen on 31 March 2020, 18:10:18 UTC
Fixed indexing bug.
Tip revision: c485231
iupac.hpp
/*

    MODER is a program to learn DNA binding motifs from SELEX datasets.
    Copyright (C) 2016, 2017  Jarkko Toivonen,
    Department of Computer Science, University of Helsinki

    MODER is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    MODER is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

*/
#include <string>
#include <cassert>
#include <vector>

typedef std::vector<double> dvector;

extern std::string iupac_chars;

class iupac_class_type
{
public:

  iupac_class_type()
  {
    for (int i=0; i < 256; ++i)
      char_to_class[i] = 0;

    int size = sizeof(char_classes)/sizeof(char_class_t);
    for (int i=0; i < size; ++i) {
      char_to_class[(unsigned char)char_classes[i].c] = char_classes[i].str;

      char_to_bits_[(unsigned char)char_bits[i].c] = char_bits[i].bits;
      bits_to_char_[char_bits[i].bits] = char_bits[i].c;
    }
  }

  int
  char_to_bits(char c) {
    return char_to_bits_[(unsigned char)c];
  }
  
  char
  bits_to_char(int i) {
    return bits_to_char_[i];
  }

  bool
  is_iupac_code(char c) const
  {
    return char_to_class[(unsigned char)c] != 0;
  }

  // function from iupac code to the corresponding subset of {A,C,G,T}
  const char*
  operator()(char c)
  {
    assert(char_to_class[(unsigned char)c] != 0);
    
    return char_to_class[(unsigned char)c];
  }

private:
  const char* char_to_class[256];  // If null then character is not an iupac character

  int char_to_bits_[256];
  char bits_to_char_[16];
  
  typedef struct {char c; const char* str;} char_class_t;
  typedef struct {char c; int bits;}             char_bits_t;

  static char_class_t char_classes[16];
  static char_bits_t  char_bits[16];
};

static iupac_class_type iupac_class;

dvector
iupac_probability(char c);

bool
iupac_match(char c, char char_class);

std::string
complement_set(char char_class);


bool
is_iupac_string(const std::string& str);

bool
iupac_string_match(const std::string& str, const std::string& pattern);

char complement(char c);

char complement_rna(char c);
back to top