https://github.com/delalamo/af2_conformations
Revision 17a1b97b3c81ed0172184043dac1d7d63d7ae191 authored by Diego del Alamo on 22 November 2021, 16:11:39 UTC, committed by GitHub on 22 November 2021, 16:11:39 UTC
1 parent 0fd5e0e
Raw File
Tip revision: 17a1b97b3c81ed0172184043dac1d7d63d7ae191 authored by Diego del Alamo on 22 November 2021, 16:11:39 UTC
Fixing typos in README
Tip revision: 17a1b97
util.py
import os
import numpy as np

from typing import List, NoReturn

from alphafold.data import pipeline
from alphafold.data import templates
from alphafold.data.tools import hhsearch

def mk_mock_template(
    seq: str
  ) -> dict:
  
  r"""Generates mock templates that will not influence prediction
  Taken from ColabFold version 62d7558c91a9809712b022faf9d91d8b183c328c

  Parameters
  ----------
  seq: Query sequence

  Returns
  ----------
  Dictionary with blank/empty/meaningless features

  """

  # Define constants
  lentype = templates.residue_constants.atom_type_num
  lseq = len( seq )

  # Since alphafold's model requires a template input
  # We create a blank example w/ zero input, confidence -1
  aatypes = np.array(
      templates.residue_constants.sequence_to_onehot(
          "-" * lseq,
          templates.residue_constants.HHBLITS_AA_TO_ID
        )
    )

  return {
      'template_all_atom_positions': np.zeros( ( lseq, lentype, 3 ) )[ None ],
      'template_all_atom_masks': np.zeros( ( lseq, lentype ) )[ None ],
      'template_sequence': [ f'none'.encode() ],
      'template_aatype': aatypes[ None ],
      'template_confidence_scores': np.full( lseq, -1 )[ None ],
      'template_domain_names': [ f'none'.encode() ],
      'template_release_date': [ f'none'.encode() ]
  }

###############################

def mk_template(
    seq: str,
    a3m_lines = str,
    path = str
  ) -> dict:
  
  r""" Parses templates into features

  Parameters
  ----------
  seq : Query sequence
  a3m_lines : Lines form MMSeqs2 alignment
  path : Path to templates fetched using MMSeqs2

  Returns
  ----------
  Dictionary with features

  """

  result = hhsearch.HHSearch(
      binary_path="hhsearch",
      databases=[ f"{ path }/pdb70" ]
  ).query( a3m_lines )

  return templates.TemplateHitFeaturizer(
      mmcif_dir=path,
      max_template_date="2100-01-01",
      max_hits=20,
      kalign_binary_path="kalign",
      release_dates_path=None,
      obsolete_pdbs_path=None
    ).get_templates(
      query_sequence=seq,
      query_pdb_code=None,
      query_release_date=None,
      hits=pipeline.parsers.parse_hhr( result )
    )

###############################

def setup_features(
    seq: str,
    a3m_lines: list,
    tfeatures_in: dict
  ) -> dict:
  
  r""" Set up features for alphafold

  Parameters
  ----------
  seq : Sequence (string)
  a3m_lines : Sequence alignment lines
  tfeatures_in : Template features

  Returns
  ----------
  Alphafold features object

  """

  msa = pipeline.parsers.parse_a3m( a3m_lines )

  # Assemble the dictionary of input features
  return {
      **pipeline.make_sequence_features(
            sequence = seq,
            description = "none",
            num_res = len( seq )
          ), **pipeline.make_msa_features(
            msas = [ msa ],
            #deletion_matrices = [ delmat ]
          ), **tfeatures_in
        }
back to top