https://github.com/orex/openbabel
Raw File
Tip revision: cb7416912e97477501e77b143f904bd39501836f authored by No Author on 27 November 2001, 18:50:36 UTC
This commit was manufactured by cvs2svn to create branch 'openeye'.
Tip revision: cb74169
pdb.cpp
/**********************************************************************
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/

#include "mol.h"
#include "version.h"
#include "typer.h"
#include "resdata.h"

namespace OpenEye {

extern OEAtomTyper atomtyper;

class OEResidueData : public OEGlobalDataBase
{
    int                                _resnum;
    vector<string>                     _resname;
    vector<vector<string> >            _resatoms;
    vector<vector<pair<string,int> > > _resbonds;

	//variables used only for parsing resdata.txt
    vector<string>                     _vatmtmp;
    vector<pair<string,int> >          _vtmp;
public:
    OEResidueData();
    bool SetResName(string &);
    int  LookupBO(string &);
    int  LookupBO(string &,string&);
    bool LookupType(string &,string&,int&);
    bool AssignBonds(OEMol &,OEBitVec &);
	void ParseLine(char*);
};

static bool ParseAtomRecord(char *, OEMol &,int);
static bool ParseConectRecord(char *,OEMol &);

static OEResidueData resdat;

bool ReadPDB(istream &ifs,OEMol &mol,char *title)
{
  resdat.Init();
  int chainNum = 1;
  char buffer[BUFF_SIZE];
  OEBitVec bs;

  mol.BeginModify();
  while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3))
  {
    if (EQn(buffer,"TER",3)) chainNum++;
    if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
      {
	ParseAtomRecord(buffer,mol,chainNum);
	if (EQn(buffer,"ATOM",4))     
	  bs.SetBitOn(mol.NumAtoms());
      }

    if (EQn(buffer,"CONECT",6))       
      ParseConectRecord(buffer,mol);
  }
  
  resdat.AssignBonds(mol,bs);
  /*assign hetatm bonds based on distance*/
  mol.ConnectTheDots();

  mol.EndModify();
  mol.SetAtomTypesPerceived();
  atomtyper.AssignImplicitValence(mol);

  if (!mol.NumAtoms()) return(false);
  return(true);
}

bool ReadTerTermPDB(istream &ifs,OEMol &mol,char *title)
{
  resdat.Init();
  int chainNum = 1;
  char buffer[BUFF_SIZE];
  OEBitVec bs;

  mol.BeginModify();
  while (ifs.getline(buffer,BUFF_SIZE) && !EQn(buffer,"END",3) && !EQn(buffer,"TER",3))
  {
    if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
      {
	ParseAtomRecord(buffer,mol,chainNum);
	if (EQn(buffer,"ATOM",4))     
	  bs.SetBitOn(mol.NumAtoms());
      }

    if (EQn(buffer,"CONECT",6))       
      ParseConectRecord(buffer,mol);
  }
  
  resdat.AssignBonds(mol,bs);
  /*assign hetatm bonds based on distance*/
  mol.ConnectTheDots();

  mol.EndModify();
  mol.SetAtomTypesPerceived();
  atomtyper.AssignImplicitValence(mol);

  if (!mol.NumAtoms()) return(false);
  return(true);
}

bool ReadPDB(vector<string> &vpdb,OEMol &mol,char *title)
{
  resdat.Init();
  int chainNum = 1;
  char buffer[BUFF_SIZE];
  vector<string>::iterator i;
  OEBitVec bs;

  mol.BeginModify();
  
  for (i = vpdb.begin();i != vpdb.end();i++)
    {
      strcpy(buffer,i->c_str());
      if (EQn(buffer,"END",3)) break;

      if (EQn(buffer,"TER",3)) chainNum++;
      if (EQn(buffer,"ATOM",4) || EQn(buffer,"HETATM",6))
	{
	  ParseAtomRecord(buffer,mol,chainNum);
	  if (EQn(buffer,"ATOM",4))     
	    bs.SetBitOn(mol.NumAtoms());
	}

      if (EQn(buffer,"CONECT",6))       
	ParseConectRecord(buffer,mol);
    }
  
  resdat.AssignBonds(mol,bs);
  /*assign hetatm bonds based on distance*/

  mol.EndModify();

  if (!mol.NumAtoms()) return(false);
  return(true);
}

bool WriteDelphiPDB(ostream &ofs,OEMol &mol)
{
  OEAtom *atom;
  vector<OEAtom*>::iterator i;
  char buffer[BUFF_SIZE];
  
  for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
    {
      sprintf(buffer,"ATOM   %4d %4s %3s    %1d    %8.3f%8.3f%8.3f%6.2f %6.3f",
	      atom->GetIdx(),
	      etab.GetSymbol(atom->GetAtomicNum()),
	      "UNK",0,
	      atom->GetX(),atom->GetY(),atom->GetZ(),
	      etab.GetVdwRad(atom->GetAtomicNum()),
	      atom->GetPartialCharge());
      ofs << buffer << endl;
    }

  int k,bo,count;
  int bond[10];
  OEAtom *nbr;
  vector<OEBond*>::iterator j;
  for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
    {
      count=0;
      memset(bond,'\0',sizeof(int)*5);
      bond[count++] = atom->GetIdx();
      for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
	{
	  bo = ((*j)->GetBO() < 4) ?(*j)->GetBO():1;
	  for (k = 0;k < bo;k++)
	    bond[count++] = nbr->GetIdx();
	}
      sprintf(buffer,"CONECT  %3d  %3d  %3d  %3d  %3d",
	      bond[0],bond[1],bond[2],bond[3],bond[4]);

      ofs << buffer << endl;
    }

  ofs << "TER" << endl;
  return(true);
}

static bool ParseAtomRecord(char *buffer, OEMol &mol,int chainNum)
/* ATOMFORMAT "(i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2,1x,i3)" */
{
  string sbuf = &buffer[6];
  if (sbuf.size() < 48) return(false);

  bool hetatm = (EQn(buffer,"HETATM",6)) ? true : false;

  /* serial number */
  string serno = sbuf.substr(0,5);
  //SerialNum(the_atom) = atoi(tmp_str);

  /* atom name */
  string atmid = sbuf.substr(6,4);

  //trim spaces on the right and left sides
  while (!atmid.empty() && atmid[0] == ' ') 
    atmid = atmid.substr(1,atmid.size()-1);

  while (!atmid.empty() && atmid[atmid.size()-1] == ' ') 
    atmid = atmid.substr(0,atmid.size()-1);

  /* residue name */
  
  string resname = sbuf.substr(11,3);
  if (resname == "   ") 
    resname = "UNK";
  else
  {
    while (!resname.empty() && resname[0] == ' ') 
      resname = resname.substr(1,resname.size()-1);

    while (!resname.empty() && resname[resname.size()-1] == ' ') 
      resname = resname.substr(0,resname.size()-1);
  }  

  /* residue sequence number */

  string resnum = sbuf.substr(16,4);

  /* X, Y, Z */
  string xstr = sbuf.substr(24,8);
  string ystr = sbuf.substr(32,8);
  string zstr = sbuf.substr(40,8);

  string type;

  if (EQn(buffer,"ATOM",4))
  {
    type = atmid.substr(0,1);
    if (isdigit(type[0]))
       type = atmid.substr(1,1);

    if (resname.substr(0,2) == "AS" || resname[0] == 'N')
    {
      if (atmid == "AD1") type = "O";
      if (atmid == "AD2") type = "N";
    }
    if (resname.substr(0,3) == "HIS" || resname[0] == 'H')
    {
      if (atmid == "AD1" || atmid == "AE2") type = "N";
      if (atmid == "AE1" || atmid == "AD2") type = "C";
    }
    if (resname.substr(0,2) == "GL" || resname[0] == 'Q')
    {
      if (atmid == "AE1") type = "O";
      if (atmid == "AE2") type = "N";
    }
  }
  else //must be hetatm record
  {
    if (isalpha(atmid[0])) type = atmid.substr(0,1);
    else                   type = atmid.substr(1,1);
    if (atmid == resname)
      {
	type = atmid;
	if (type.size() == 2) type[1] = tolower(type[1]);
      }
    else
    if (resname == "ADR" || resname == "COA" || resname == "FAD" ||
	resname == "GPG" || resname == "NAD" || resname == "NAL" ||
	resname == "NDP")
      {
	if (type.size() > 1)
	  type = type.substr(0,1);
	//type.erase(1,type.size()-1);
      }
    else
      if (isdigit(type[0]))
	{
	  type = type.substr(1,1);
	  //type.erase(0,1);
	  //if (type.size() > 1) type.erase(1,type.size()-1);
	}
      else
	if (type.size() > 1 && isdigit(type[1]))
	  type = type.substr(0,1);
    //type.erase(1,1);
	else
	  if (type.size() > 1 && isalpha(type[1]) && isupper(type[1]))
	    type[1] = tolower(type[1]);

  }

  OEAtom atom;
  Vector v(atof(xstr.c_str()),atof(ystr.c_str()),atof(zstr.c_str()));
  atom.SetVector(v);
  
  atom.SetAtomicNum(etab.GetAtomicNum(type.c_str()));
  atom.SetType(type);

  int        rnum = atoi(resnum.c_str());
  OEResidue *res  = (mol.NumResidues() > 0) ? mol.GetResidue(mol.NumResidues()-1) : NULL;
  if (res == NULL || res->GetName() != resname || res->GetNum() != rnum)
  {
      vector<OEResidue*>::iterator ri;
      for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
          if (res->GetName() == resname && res->GetNum() == rnum)
              break;

      if (res == NULL)
      {
          res = mol.NewResidue();
          res->SetChainNum(chainNum);
          res->SetName(resname);
          res->SetNum(rnum);
      }
  }

  if (!mol.AddAtom(atom)) 
      return(false);
  else
  {
      OEAtom *atom = mol.GetAtom(mol.NumAtoms());

      res->AddAtom(atom);
      res->SetSerialNum(atom, atoi(serno.c_str()));
      res->SetAtomID(atom, atmid);
      res->SetHetAtom(atom, hetatm);

      return(true);
  }
}

static bool ParseConectRecord(char *buffer,OEMol &mol)
{
  vector<string> vs;
  
  buffer[70] = '\0';
  tokenize(vs,buffer);
  if (vs.empty()) return(false);
  vs.erase(vs.begin());
  int con1,con2,con3,con4;
  con1 = con2 = con3 = con4 = 0;
  int start = atoi(vs[0].c_str());

  if (vs.size() > 1) con1 = atoi(vs[1].c_str());
  if (vs.size() > 2) con2 = atoi(vs[2].c_str());
  if (vs.size() > 3) con3 = atoi(vs[3].c_str());
  if (vs.size() > 4) con4 = atoi(vs[4].c_str());
  if (!con1) return(false);

  OEAtom *a1,*a2;
  OEResidue *r1,*r2;
  vector<OEAtom*>::iterator i,j;
  for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
    {
      r1 = a1->GetResidue();
      if (r1->GetSerialNum(a1) == start)
	  for (a2 = mol.BeginAtom(j);a2;a2 = mol.NextAtom(j))
	    {
	      r2 = a2->GetResidue();
	      if (con1 && r2->GetSerialNum(a2) == con1) 
		mol.AddBond(a1->GetIdx(),a2->GetIdx(),1);
	      if (con2 && r2->GetSerialNum(a2) == con2) 
		mol.AddBond(a1->GetIdx(),a2->GetIdx(),1);
	      if (con3 && r2->GetSerialNum(a2) == con3) 
		mol.AddBond(a1->GetIdx(),a2->GetIdx(),1);
	      if (con4 && r2->GetSerialNum(a2) == con4) 
		mol.AddBond(a1->GetIdx(),a2->GetIdx(),1);
	    }
    }

  return(true);
}

OEResidueData::OEResidueData()
{
  _init = false;
  _dir = "";
  _envvar = "OE_DIR";
  _filename = "residue.txt";
  _subdir = "data";
  _dataptr = ResidueData;
}

bool OEResidueData::AssignBonds(OEMol &mol,OEBitVec &bv)
{
  OEAtom *a1,*a2;
  OEResidue *r1,*r2;
  vector<OEAtom*>::iterator i,j;
  
  //assign alpha peptide bonds
  for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
    if (bv.BitIsOn(a1->GetIdx()))
      {
	r1 = a1->GetResidue();
	if (!(r1->GetAtomID(a1) == "C")) continue;
	for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j))
	  {
	    r2 = a2->GetResidue();
	    if (!(r2->GetAtomID(a2) == "N")) continue;
	    if (r1->GetNum() < r2->GetNum()-1) break;
	    if (r1->GetChainNum() == r2->GetChainNum())
	      {
		mol.AddBond(a1->GetIdx(),a2->GetIdx(),1);
		break;
	      }
	  }
      }

  Vector v;
  int bo,skipres=0;
  string rname = "";
  //assign other residue bonds
  for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
    {
      r1 = a1->GetResidue();
      if (skipres && r1->GetNum() == skipres) continue;

      if (r1->GetName() != rname)
	{
	  skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); 
	  rname = r1->GetName();
	}
      //assign bonds for each atom
      for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j))
	{
	  r2 = a2->GetResidue();
	  if (r1->GetNum() != r2->GetNum()) break;
	  if (r1->GetName() != r2->GetName()) break;

	  if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2))))
	    {
	      v = a1->GetVector() - a2->GetVector();
	      if (v.length_2() < 3.5) //float check by distance
		  mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo);
	    }
	}
    }

  int hyb;
  string type;

  //types and hybridization
  for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
    {
      if (a1->IsOxygen() && !a1->GetValence())
	{
	  a1->SetType("O3");
	  continue;
	}

      if (a1->IsHydrogen())
	{
	  a1->SetType("H");
	  continue;
	}

      r1 = a1->GetResidue();
      if (skipres && r1->GetNum() == skipres) continue;

      if (r1->GetName() != rname)
	{
	  skipres = SetResName(r1->GetName()) ? 0 : r1->GetNum(); 
	  rname = r1->GetName();
	}

      //***valence rule for O-
      if (a1->IsOxygen() && a1->GetValence() == 1)
	{
	  OEBond *bond;
	  bond = *(a1->BeginBonds());
	  if (bond->GetBO() == 2)
	    {
	      a1->SetType("O2"); a1->SetHyb(2);
	    }
	  if (bond->GetBO() == 1)
	    {
	      a1->SetType("O-"); a1->SetHyb(3);
	      a1->SetFormalCharge(-1);
	    }
	}
      else
	if (LookupType(r1->GetAtomID(a1),type,hyb))
	  {
	    a1->SetType(type);
	    a1->SetHyb(hyb);
	  }
	else // try to figure it out by bond order ???
	  {
	  }
    }

  return(true);
}

void OEResidueData::ParseLine(char *buffer)
{
   int bo;
    string s;
    vector<string> vs;

	tokenize(vs,buffer);
	if (!vs.empty())
	  {
	    if (vs[0] == "BOND")
	      {
			s = (vs[1] < vs[2]) ? vs[1] + " " + vs[2] : 
			vs[2] + " " + vs[1];
			bo = atoi(vs[3].c_str());
			_vtmp.push_back(pair<string,int> (s,bo));
	      }

	    if (vs[0] == "ATOM" && vs.size() == 4)
	      {
			_vatmtmp.push_back(vs[1]);
			_vatmtmp.push_back(vs[2]);
			_vatmtmp.push_back(vs[3]);
	      }

	    if (vs[0] == "RES")	_resname.push_back(vs[1]);

	    if (vs[0]== "END")
	      {
			_resatoms.push_back(_vatmtmp);
			_resbonds.push_back(_vtmp);
			_vtmp.clear();
			_vatmtmp.clear();
	      }
	  }
}

bool OEResidueData::SetResName(string &s)
{
  unsigned int i;
    for (i = 0;i < _resname.size();i++)
	if (_resname[i] == s)
	{
	    _resnum = i;
	    return(true);
	}

    _resnum = -1;
    return(false);
}

int OEResidueData::LookupBO(string &s)
{
    if (_resnum == -1) return(0);
    
    unsigned int i;
    for (i = 0;i < _resbonds[_resnum].size();i++)
	if (_resbonds[_resnum][i].first == s)
	    return(_resbonds[_resnum][i].second);

    return(0);
}

int OEResidueData::LookupBO(string &s1,string &s2)
{
    if (_resnum == -1) return(0);
    string s;

    s = (s1 < s2) ? s1 + " " + s2 : s2 + " " + s1;

    unsigned int i;
    for (i = 0;i < _resbonds[_resnum].size();i++)
	if (_resbonds[_resnum][i].first == s)
	    return(_resbonds[_resnum][i].second);

    return(0);
}

bool OEResidueData::LookupType(string &atmid,string &type,int &hyb)
{
    if (_resnum == -1) return(false);

    string s;
    vector<string>::iterator i;

    for (i = _resatoms[_resnum].begin();i != _resatoms[_resnum].end();i+=3)
      if (atmid == *i)
	{
	  i++;	  type = *i;
	  i++;	  hyb = atoi((*i).c_str());
	  return(true);
	}

    return(false);
}

bool ReadBox(vector<string> &vbox, OEMol &mol,char *)
{
  char buffer[BUFF_SIZE];
  vector<string> vs;
  vector<string>::iterator i,j;
  OEAtom atom;

  mol.BeginModify();
  
  for (i = vbox.begin();i != vbox.end();i++)
  {
    strcpy(buffer,i->c_str());

    if (EQn(buffer,"END",3)) break;
    if (EQn(buffer,"ATOM",4))
      {
	string sbuf = &buffer[6]; 
	/* X, Y, Z */
	string x = sbuf.substr(24,8);
	string y = sbuf.substr(32,8);
	string z = sbuf.substr(40,8);
	Vector v(atof(x.c_str()),atof(y.c_str()),atof(z.c_str()));
	atom.SetVector(v);
	if (!mol.AddAtom(atom)) return(false);
      }
    
    if (EQn(buffer,"CONECT",6))
      {
	tokenize(vs,buffer);
	if (!vs.empty() && vs.size() > 2)
	  for (j = vs.begin(),j+=2;j != vs.end();j++)
	    mol.AddBond(atoi(vs[1].c_str()),atoi((*j).c_str()),1);
      }
  }

  mol.EndModify();
  
  return(true);
}

bool WritePDB(ostream &ofs,OEMol &mol)
{
  unsigned int i;
  char buffer[BUFF_SIZE];
  char type_name[10], padded_name[10];
  char the_res[10];
  int res_num;

  sprintf(buffer,"HEADER    PROTEIN");
  ofs << buffer << endl;

  if (strlen(mol.GetTitle()) > 0) 
    sprintf(buffer,"COMPND    %s ",mol.GetTitle());
  else   sprintf(buffer,"COMPND    UNNAMED");
  ofs << buffer << endl;

  sprintf(buffer,"AUTHOR    GENERATED BY BABEL %s",BABEL_VERSION);
  ofs << buffer << endl;

  OEAtom *atom;
  OEResidue *res;
  for (i = 1; i <= mol.NumAtoms(); i++)
  {
    atom = mol.GetAtom(i);
    strcpy(type_name,etab.GetSymbol(atom->GetAtomicNum()));
    if (strlen(type_name) > 1) type_name[1] = toupper(type_name[1]);

    if (atom->HasResidue())
      {
	res = atom->GetResidue();
	strcpy(the_res,(char*)res->GetName().c_str());
	strcpy(type_name,(char*)res->GetAtomID(atom).c_str());
	res_num = res->GetNum();
      }
    else
      {
	strcpy(the_res,"UNK");
	sprintf(padded_name,"%2s",type_name);
	strcpy(type_name,padded_name);
	res_num = 1;
      }
    sprintf(buffer,"ATOM   %4d  %-4s%3s%3s%3d   %9.3f%8.3f%8.3f  1.00  0.00 \n",
	    i,
	    type_name,
	    the_res,
	    "",
	    res_num,
	    atom->GetX(),
	    atom->GetY(),
	    atom->GetZ());
    ofs << buffer;
  }

  OEAtom *nbr;
  vector<OEBond*>::iterator k;
  for (i = 1; i <= mol.NumAtoms(); i ++)
  {
    atom = mol.GetAtom(i);
    if (atom->GetValence())
      {
	sprintf(buffer,"CONECT %5d",i);
	ofs << buffer;
	for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k))
	  {
	    sprintf(buffer,"%5d",nbr->GetIdx());
	    ofs << buffer;
	  }
	ofs << endl;
      }
  }
  sprintf(buffer,"MASTER        0    0    0    0    0    0    0    0 ");
  ofs << buffer;
  sprintf(buffer,"%4d    0 %4d    0",mol.NumAtoms(),mol.NumAtoms());          
  ofs << buffer << endl;
  sprintf(buffer,"END"); ofs << buffer << endl;
  return(true);
}

}
back to top