https://github.com/jferrie3/FusionProteinEnsemble
Tip revision: 9a8ffe946a20d8efb6c4eb531b22dd96e7431e28 authored by jferrie3 on 13 January 2022, 17:50:16 UTC
Update Files
Update Files
Tip revision: 9a8ffe9
FastFloppyLinker.py
#FORMAT OF ORIGINAL FLOPPY TAIL [KLEIGER, G ET. AL. "RAPID E2-E3 ASSEMBLY AND DISASSEMBLY ENABLE PROCESSICE UBIQUITYLATION OF CULLIN-RING UBIQUITIN LIGASE SUBSTRATES" CELL. 139(5): 957-968 2009]
##CENTROID MODE [NOT DISCLOSED POTENTIALLY RAMA ONLY]
###19 CYCLES: RANDOM: SMALL_180(40%), SHEAR_180(40%), FRAGMENT_3MER(20%)
###20TH CYCLE: MINIMIZATION
####5000 TOTAL CYCLES -> RECOVER LOW
##FULL-ATOM MODE [SCORE12]
###START BY REPACKING ALL IN TAIL/VICINITY OF TAIL FOLLOWED BY MINIMIZATION
###14 CYCLES: RANDOM: SMALL_4(50%), SHEAR_4(50%) FOLLOWED BY SINGLE ROTAMER_TRIALS
###15TH CYCLE:MINIMIZATION
###30TH CYCLE:REPACK THEN MINIMIZATION
####3000 CYCLES -> RECOVER LOW
# Parsing Arguements
import argparse
parser = argparse.ArgumentParser(description='Program')
parser.add_argument('-in', '--Input_FASTA_File', action='store', type=str, required=False,
help='Name of the text file containing the FASTA sequence of the protein of interest. Carot should not be in same line as sequence, UniProt format preferred.')
parser.add_argument('-ftnstruct', '--Number_of_FloppyTail_Structures', action='store', type=str, required=False, default=400,
help='Number of structures to sample during FloppyTail portion. Default = 400')
parser.add_argument('-t_frag', '--Three_Mer_Frag_Library', action='store', type=str, required=False,
help='Name of the file containing the three-mer fragment library generated by disorder corrected method')
parser.add_argument('-linkers', '--Linker_File', action='store', type=str, required=False,
help='File containing the starting and ending residue number for the linkers in the fusion protein')
parser.add_argument('-loops', '--Loops_File', action='store', type=str, required=False,
help='File containing the starting and ending residue numbers for the loops in the fusion protein')
parser.add_argument('-m_num', '--Multimer_Number', action='store', type=int, required=False, default=1,
help='Number of multimers whose linkers need to be sampled. Only supply for multimers not being run under symmetry')
parser.add_argument('-m_anchor', '--Multimer_Anchor', action='store', type=str, required=False,
help='Side of the fusion which forms the multimer. Options N or C for N or C-terminal fusion')
parser.add_argument('-symm', '--Symmetry_File', action='store', type=str, required=False,
help='Supply the symmetry file if you want the protein to be run under symmetry (all moves perfectly mirrored)')
parser.add_argument('-cycles', '--FloppyTail_Cycles', action='store', type=str, required=False, default=1,
help='Number of sampling cycles within each stage of FloppyTail, identical to increase_cycles flag in C++ ClassicAbInitio and AbRelax. Default 0')
parser.add_argument('-refinesubset', '--Refine_Subset', action='store', type=int, required=False, default=0,
help='Only subjects the lowest X% of structures to Relax refinement where X is specified as input following flag. Default 0')
parser.add_argument('-relnstruct', '--Number_of_Relax_Structures', action='store', type=str, required=False, default=0,
help='Number of independent full-atom Relax sampling trajectories from a single AbInitio structure. Default 1')
parser.add_argument('-diso', '--Disorder_Probability_Prediction_File', action='store', type=str, required=False,
help='File containing per residue disorder probability prediction in RaptorX format. Generally acquired from RaptorX prediciton.')
parser.add_argument('-inpdb', '--Input_PDB_File', action='store', type=str, required=False,
help='Name of the text file containing the PDB structure of the protein of interest. All residues are required, missing residues are not constructed')
parser.add_argument('-pymol', '--PYMOL', action='store_true', required=False,
help='Running with this flag will utilize PyMOL Mover for visualization')
parser.add_argument('-rg', '--RG', action='store_true', required=False,
help='Ability to provide radius of gyration via Ferrie et. al. JPCB 2020 rg score term')
parser.add_argument('-clusterid', '--Cluster_ID', action='store', type=int, required=False,
help='Batch submit script ID for output record keeping')
parser.add_argument('-verbose', '--Verbose', action='store_true', required=False,
help='Will unmute information coming from Rosetta')
parser.add_argument('-dumpall', '--DumpAll', action='store_true', required=False,
help='Dumps both the pre-relaxed and relaxed structures')
args = parser.parse_args()
# The Start-Up
from pyrosetta import * #Comment
if args.Verbose:
init(extra_options = '-extra_patch_fa ../OGQ_SidechainConjugation.txt ../TMR_SidechainConjugation.txt')
else:
init(extra_options = '-mute all -extra_patch_fa ../OGQ_SidechainConjugation.txt ../TMR_SidechainConjugation.txt')
#init()
from pyrosetta.rosetta.core.scoring import *
from pyrosetta.rosetta.core.scoring.methods import *
from pyrosetta.rosetta.core.scoring.methods import EnergyMethodOptions
from pyrosetta.rosetta.protocols.grafting import *
from pyrosetta.rosetta.protocols.simple_moves import *
from pyrosetta.rosetta.protocols.moves import *
from pyrosetta.rosetta.core.fragment import *
from pyrosetta.rosetta.protocols.minimization_packing import *
from math import exp, log, pi, sqrt
import random
from random import randint
from random import random as rnd
import sys
import os
import numpy as np
from numpy.random import choice
import re
import scipy.optimize as op
from scipy.interpolate import interp1d
import fcntl
## Imports from Parser and Defaults
if args.Cluster_ID:
ftnstruct = 1
elif args.Number_of_FloppyTail_Structures:
ftnstruct = int(args.Number_of_FloppyTail_Structures)
else:
ftnstruct = 400
if args.FloppyTail_Cycles:
cycles = int(args.FloppyTail_Cycles)
else:
cycles = 1
if args.Number_of_Relax_Structures:
relnstruct = int(args.Number_of_Relax_Structures)
else:
relnstruct = 0
if args.Refine_Subset:
refine_number = int((args.Refine_Subset/100)*int(ftnstruct))
else:
refine_number = 0
# Importing sequence from FASTA
if args.Input_FASTA_File:
fasta_file = open(args.Input_FASTA_File, 'r')
fasta_lines = fasta_file.readlines()
fasta_counter = 0
fasta_sequence = ' '
for fasta_line in fasta_lines:
if '>' not in fasta_line:
if fasta_counter == 0:
if '\n' in fasta_line:
fasta_sequence = fasta_line.split('\n')[0]
else:
fasta_sequence = fasta_line
fasta_counter = 1
else:
if '\n' in fasta_line:
fasta_sequence = fasta_sequence + fasta_line.split('\n')[0]
else:
fasta_sequence = fasta_sequence + fasta_line
# Determining the Per Residue Disorder Probability and Number of Segments
if args.Disorder_Probability_Prediction_File:
disorder_dtypes = [('res_num', np.float_), ('AA', np.unicode_, 2), ('ast', np.unicode_, 1), ('disprob', np.float_)]
disorder_dat = np.genfromtxt(args.Disorder_Probability_Prediction_File, dtype=disorder_dtypes, delimiter=' ', skip_header=3)
diso_dat = np.empty((len(disorder_dat),1))
diso_segments = []
diso_cutoff = 0.5
seg_cutoff = 2
seg_res_counter = 0
per_residue_disorder = 0
diso_current = disorder_dat[0][3]
segment_break_list = []
segment_switch = False
for seg_residue in range(len(disorder_dat)):
if disorder_dat[seg_residue][3] >= diso_cutoff and diso_current >= diso_cutoff:
seg_res_counter = 0
elif disorder_dat[seg_residue][3] < diso_cutoff and diso_current < diso_cutoff:
seg_res_counter = 0
elif disorder_dat[seg_residue][3] < diso_cutoff and diso_current >= diso_cutoff:
seg_res_counter = seg_res_counter + 1
elif disorder_dat[seg_residue][3] >= diso_cutoff and diso_current < diso_cutoff:
seg_res_counter = seg_res_counter + 1
if seg_res_counter > 1:
seg_res_counter = 0
diso_current = disorder_dat[seg_residue][3]
if diso_current >= diso_cutoff:
transition_id = 'order' # meaning all residues prior to end are ordered
else:
transition_id = 'disorder' # meaning all residues prior to end are disordered
segment_break_list.append((seg_residue-9, transition_id))
per_residue_disorder = per_residue_disorder + (disorder_dat[seg_residue][3]/float(len(disorder_dat)))
if disorder_dat[len(disorder_dat)-1][3] >= diso_cutoff:
segment_break_list.append((len(disorder_dat), 'disorder'))
else:
segment_break_list.append((len(disorder_dat), 'order'))
# Adding PTMs
def MakePTMMutations(pose, fasta_sequence):
ptm_mutation_list = re.findall("\[(.*?)\]", fasta_sequence)
residue_mutation_list = []
pose_sequence = pose.sequence()
residue_number = 0
ptm_number = 0
count_residue = True
for res in fasta_sequence:
if res == '[':
count_residue = False
residue_mutation_list.append(residue_number)
if count_residue == True:
residue_number +=1
if res == ']':
count_residue = True
print(ptm_mutation_list)
print(residue_mutation_list)
for res_ptm_idx, res_ptm in enumerate(ptm_mutation_list):
mutate = MutateResidue(residue_mutation_list[res_ptm_idx], res_ptm)
mutate.apply(pose)
return pose
# The Poses
p=Pose()
if args.Input_FASTA_File:
p = pose_from_sequence(fasta_sequence, "centroid")
if args.Input_PDB_File:
p = pose_from_pdb(str(args.Input_PDB_File))
if args.Input_FASTA_File:
if p.sequence() != fasta_sequence:
if '_p:' in fasta_sequence:
p = MakePTMMutations(p, fasta_sequence)
## Write the FoldTree so to account for the multimerization domain
if args.Multimer_Anchor:
ft = FoldTree()
if args.Multimer_Anchor == 'C':
c_term_residue = p.total_residue() / args.Multimer_Number
c_term_residue_list = []
for multimer_number in range(args.Multimer_Number):
c_term_res = multimer_number (multimer_number * p.total_residue() / args.Multimer_Number)
c_term_residue_list.append(c_term_res)
if multimer_number == 0:
ft.add_edge(c_term_res, 1, -1)
else:
ft.add_edge(c_term_res, c_term_residue_list[multimer_number - 1] + 1, -1)
ft.add_edge(c_term_residue_list[multimer_number - 1], c_term_res, 1)
# Setup the MoveMaps
cenmap = MoveMap()
cenmap.set_bb(True)
fullmap = MoveMap()
fullmap.set_bb(True)
fullmap.set_chi(True)
cenmap_min = MoveMap()
cenmap_min.set_bb(True)
fullmap_min = MoveMap()
fullmap_min.set_bb(True)
fullmap_min.set_chi(True)
relaxmap = MoveMap()
relaxmap.set_bb(True)
relaxmap.set_chi(True)
sampled_residues = []
if args.Disorder_Probability_Prediction_File:
cenmap.set_bb(False)
fullmap.set_bb(False)
fullmap.set_chi(False)
for res_seg_idx, res_seg_item in enumerate(segment_break_list):
start_res = 1
if res_seg_idx > 0:
start_res = segment_break_list[res_seg_idx-1][0] + 1
end_res = res_seg_item[0] + 1
if res_seg_item[1] == 'disorder':
for res_idx in range(start_res,end_res+1):
cenmap.set_bb(res_idx, True)
fullmap.set_bb(res_idx, True)
fullmap.set_chi(res_idx, True)
sampled_residues.append(res_idx)
else:
continue
# Only sample residues within the linker
if args.Linker_File:
cenmap.set_bb(False)
fullmap.set_bb(False)
fullmap.set_chi(False)
linker_file = open(args.Linker_File, 'r')
linker_lines = linker_file.readlines()
linker_residues = []
for linker_line in linker_lines:
start_res = int(linker_line.split(' ')[0])
end_res = int(linker_line.split(' ')[1])
### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers
for multimer_number in range(args.Multimer_Number):
start_res_m = int(start_res + (multimer_number * p.total_residue() / args.Multimer_Number))
end_res_m = int(end_res + (multimer_number * p.total_residue() / args.Multimer_Number))
for residue in range(start_res, end_res + 1):
cenmap.set_bb(residue, True)
fullmap.set_bb(residue, True)
fullmap.set_chi(residue, True)
sampled_residues.append(residue)
linker_residues.append(residue)
# Only sample residues within the loops
loop_residues = []
if args.Loops_File:
loops_file = open(args.Loops_File, 'r')
loops_lines = loops_file.readlines()
for loops_line in loops_lines:
start_res = int(loops_line.split(' ')[0])
end_res = int(loops_line.split(' ')[1])
### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers
for multimer_number in range(args.Multimer_Number):
start_res_m = int(start_res + (multimer_number * p.total_residue() / args.Multimer_Number))
end_res_m = int(end_res + (multimer_number * p.total_residue() / args.Multimer_Number))
for residue in range(start_res, end_res + 1):
loop_residues.append(residue)
sampled_residues.append(residue)
'''
if args.Start_Residue and args.End_Residue:
cenmap.set_bb(False)
fullmap.set_bb(False)
fullmap.set_chi(False)
### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers
for multimer_number in range(args.Multimer_Number):
start_res = int(args.Start_Residue + (multimer_number * p.total_residue() / args.Multimer_Number))
end_res = int(args.End_Residue + (multimer_number * p.total_residue() / args.Multimer_Number))
for residue in range(start_res, end_res + 1):
cenmap.set_bb(residue, True)
fullmap.set_bb(residue, True)
fullmap.set_chi(residue, True)
'''
## If protein is multimeric and is using a symmetry operation
if args.Symmetry_File:
symmetrize = pyrosetta.rosetta.protocols.symmetry.SetupForSymmetryMover(args.Symmetry_File)
symmetrize.apply(p)
## Make Remaining Necessary Poses
starting_p = Pose()
starting_p.assign(p)
restore_chi_p = Pose()
restore_chi_p.assign(p)
pvdw = Pose()
pvdwc = Pose()
pcen = Pose()
# The Score Functions
# Adding an RG Constraint
if args.RG:
#### Computing the Mean Hydrophobicity per Residue
scaling_info_holder = np.zeros([len(segment_break_list),5]) # Need spots for segH,segQ,frac_pos,frac_neg,seq_len -> scalvQ,scalvH,scalv,rad_gyr,seq_len => scalvQ -> sf_rg_term_potential
##### Kyte, J. Doolitte, R.F. J. Mol. Biol. (1982) 157, 105-132
##### Normalization proposed in Uversky, V.N. et. al. Proteins: Struc. Func. Gen. (2000), 41, 415-427
aa_hydro_idx = {'I':4.5, 'V':4.2, 'L':3.8, 'F':2.8, 'C':2.5, 'M':1.9, 'A':1.8, 'G':-0.4, 'T':-0.7, 'W':-0.9, 'S':-0.8, 'Y':-1.3, 'P':-1.6, 'H':-3.2, 'E':-3.5, 'Q':-3.5, 'D':-3.5, 'N':-3.5, 'K':-3.9, 'R':-4.5}
for diso_segment_idx, diso_segment_item in enumerate(segment_break_list):
start_res = 1
if diso_segment_idx > 0:
start_res = segment_break_list[diso_segment_idx-1][0] + 1
end_res = diso_segment_item[0] + 1
seq_len = end_res-start_res
pro_len = p.total_residue()
scaling_info_holder[diso_segment_idx][4] = seq_len
sequence_avg_H = 0
for res_idx in range(start_res-1, end_res-1, 1):
window_start = 0
window_end = 0
window_H_avg = 0
if res_idx < 5:
window_start = 0
window_end = res_idx + 1
elif pro_len - res_idx < 5:
window_start = res_idx
window_end = pro_len
else:
window_start = res_idx - 2
window_end = res_idx + 3
window = p.sequence()[window_start:window_end]
for window_res_idx, window_res_item in enumerate(window):
window_H_avg = window_H_avg + ((aa_hydro_idx[str(window_res_item)]+4.5)/9.0)
window_H_avg = window_H_avg/(len(window))
sequence_avg_H = sequence_avg_H + window_H_avg/scaling_info_holder[diso_segment_idx][4]
scaling_info_holder[diso_segment_idx][0] = sequence_avg_H
#### Computing the Mean Net Charge per Residue
for diso_segment_idx, diso_segment_item in enumerate(segment_break_list):
start_res = 1
if diso_segment_idx > 0:
start_res = segment_break_list[diso_segment_idx-1][0] + 1
end_res = diso_segment_item[0] + 1
seq_len = end_res-start_res
scaling_info_holder[diso_segment_idx][4] = seq_len
sequence_Q = 0
frac_pos = 0 # fraction of positively charged residues Arg Lys
frac_neg = 0 # fraction of negatively charged residues Asp Glu
for res_idx in range(start_res-1, end_res-1, 1):
res_id = p.sequence()[res_idx]
if res_id == 'E' or res_id == 'D':
frac_neg = frac_neg + 1/seq_len
sequence_Q = sequence_Q - 1/seq_len
elif res_id == 'K' or res_id == 'R':
frac_pos = frac_pos + 1/seq_len
sequence_Q = sequence_Q + 1/seq_len
else:
continue
sequence_Q = np.sqrt(sequence_Q**2)
scaling_info_holder[diso_segment_idx][1] = sequence_Q
scaling_info_holder[diso_segment_idx][2] = frac_pos
scaling_info_holder[diso_segment_idx][3] = frac_neg
#### Calculations from Hofmann, H. et al. PNAS (2012) 109, 40, 16155-16160
##### Compute charge and hydrophobicity scaling options
###### Constants
scalv = 0
con_a = 0.394
con_z = 0.09
con_x0 = 0.114
con_c = 1.72
con_d = 0.9
###### Calculations
###### Folded vs Unfolded
###### From Uversky, if sequence_Q < 2.785*sequence_avg_H - 1.151 -> Folded, but fails for synuclein
for QH_set_idx in range(len(scaling_info_holder)):
if 'order' in segment_break_list[QH_set_idx]:
scaling_info_holder[QH_set_idx][0] = 0.33
scaling_info_holder[QH_set_idx][1] = 0.33
else:
scalv_Q = (1.0/3.0) + con_a*((1+np.exp(con_x0-scaling_info_holder[QH_set_idx][1]/con_z))**(-1))
scaling_info_holder[QH_set_idx][1] = scalv_Q
scalv_H = (1.0/3.0) + con_a*((1+np.exp((con_x0+con_c*scaling_info_holder[QH_set_idx][0]-con_d)/con_z))**(-1))
scaling_info_holder[QH_set_idx][0] = scalv_H
###### Determining the appropriate scaling value
####### Constants
elemchar = 1.602*10**(-19)
ss_eo = 8.854*10**(-12)
ss_er = 78.7 #permittivity of water at 298 K
ss_kb = 1.38*10**(-23) # boltzmann constant
ss_T = 298 # in Kelvin
ss_I = 0.15 # in Molar
ss_kappa = 1/(0.304*np.sqrt(ss_I)) # in nanometers
ss_lB = (elemchar**2)/(4*np.pi*ss_eo*ss_er*ss_kb*ss_T)
####### Calculation
for QH_set_idx in range(len(scaling_info_holder)):
scalvstar = ((4*np.pi*ss_lB*((scaling_info_holder[QH_set_idx][2]-scaling_info_holder[QH_set_idx][3])**2))/(ss_kappa**2)) - ((np.pi*ss_lB*((scaling_info_holder[QH_set_idx][2]+scaling_info_holder[QH_set_idx][3])**2))/(ss_kappa))
if scaling_info_holder[QH_set_idx][2] == 0:
if scaling_info_holder[QH_set_idx][3] == 0:
scalv = scaling_info_holder[QH_set_idx][0]
else:
scalv = scaling_info_holder[QH_set_idx][1]
elif scaling_info_holder[QH_set_idx][3] == 0:
if scaling_info_holder[QH_set_idx][2] == 0:
scalv = scaling_info_holder[QH_set_idx][0]
else:
scalv = scaling_info_holder[QH_set_idx][1]
elif scalvstar < 0:
scalv = scaling_info_holder[QH_set_idx][1]
else:
scalv = scaling_info_holder[QH_set_idx][0]
scaling_info_holder[QH_set_idx][2] = scalv
##### Computing the Expected Radius of Gyration
rg_b = 0.38 # in nanometers
rg_lp = 0.53 # in nanometers
for QH_set_idx in range(len(scaling_info_holder)):
rad_gyr = (np.sqrt((2*rg_lp*rg_b)/((2*scaling_info_holder[QH_set_idx][2]+1)*(2*scaling_info_holder[QH_set_idx][2]+2)))*(scaling_info_holder[QH_set_idx][4]**scaling_info_holder[QH_set_idx][2]))*10 # in Angstroms
scaling_info_holder[QH_set_idx][3] = rad_gyr
## Making the Actual Potential Energy Function
## Constants for Probability
gamma = 1.1615
## Concocting the Potential
def pr_saw(var_a, input_vars):
r, rg, g, delta = input_vars
return (var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta))
def solve_pr_saw(var_a, *input_vars):
r, rg, g, delta = input_vars
return (np.sum((var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta)))-1, np.sum((var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta))*(r**2))-rg**2)
rg_sf_term_potential_list = []
for QH_set_idx in range(len(scaling_info_holder)):
var_g = (gamma-1)/scaling_info_holder[QH_set_idx][2]
var_delta = 1/(1-scaling_info_holder[QH_set_idx][2])
r_set=np.arange(0.0,7*scaling_info_holder[QH_set_idx][3],0.01)
saw_inputs = (r_set, scaling_info_holder[QH_set_idx][3], var_g, var_delta)
a0 = [1.0, 1.0]
a1 = op.fsolve(solve_pr_saw,a0,args=saw_inputs)
rg_sf_term_potential = interp1d(r_set, (1-(pr_saw(a1, saw_inputs)/np.max(pr_saw(a1, saw_inputs)))))
rg_sf_term_potential_list.append(rg_sf_term_potential)
## Making the Actual Score Term
from pyrosetta.rosetta.core.scoring.methods import ContextIndependentOneBodyEnergy ## newer versions make this pyrosetta.rosetta
@pyrosetta.EnergyMethod() ## for newer versions make pyrosetta.EnergyMethod()
class SeqCorrRgMethod(WholeStructureEnergy):
"""A scoring method that using a predicted radius of gyration from the
primary sequence to construct a polymer-scaled potential
"""
def __init__(self):
"""Construct LengthScoreMethod."""
WholeStructureEnergy.__init__(self, self.creator())
def finalize_total_energy(self, pose, sfxn, emap):
"""Calculate energy of res of pose and emap"""
pose = pose ## for newer versions remove line
e_val = 0
for segment_idx in range(len(segment_break_list)):
if segment_break_list[segment_idx][1] == 'order':
continue
else:
r_xyz = np.zeros([int(scaling_info_holder[segment_idx][4]), 3])
rg_sq = np.zeros([int(scaling_info_holder[segment_idx][4]), 1])
start_res = 1
if segment_idx > 0:
start_res = segment_break_list[segment_idx-1][0] + 1
end_res = segment_break_list[segment_idx][0] + 1
for res_num in range(start_res, end_res, 1):
for res_xyz in range(len(r_xyz[0])):
r_xyz[res_num-start_res][res_xyz] = pose.residue(res_num).nbr_atom_xyz()[res_xyz]
r_cen_mass = np.average(r_xyz, axis=0)
for res_num in range(len(r_xyz)):
rg_sq[res_num] = (np.linalg.norm(r_xyz[res_num] - r_cen_mass))**2
rg_val = np.sqrt(np.average(rg_sq))
if rg_val < 7*scaling_info_holder[segment_idx][3]:
rg_sf_term_potential = rg_sf_term_potential_list[segment_idx]
e_val = e_val + float(rg_sf_term_potential(rg_val))
else:
rg_sf_term_potential = rg_sf_term_potential_list[segment_idx]
e_val = e_val + float(rg_sf_term_potential(6.9*scaling_info_holder[segment_idx][3]))
emap.set(self.scoreType, e_val) ## for newer versions remove .get()
new_rg_score = SeqCorrRgMethod.scoreType
## VDW Repulsive Score Function
sf_stage_0 = create_score_function('score0')
if args.RG:
sf_stage_0.set_weight(new_rg_score, (400/24)*5)
## Centroid Score Functions
sf_stage_1 = create_score_function('cen_std')
sf_stage_1.set_weight(rama, 1.0)
sf_stage_1.set_weight(cenpack, 1.0)
sf_stage_1.set_weight(hbond_lr_bb, 1.0)
sf_stage_1.set_weight(hbond_sr_bb, 1.0)
if args.RG:
sf_stage_1.set_weight(new_rg_score, (400/24)*5)
## Full Atom Score Functions
sf_stage_2 = create_score_function('ref2015')
if args.RG:
sf_stage_2.set_weight(new_rg_score, (400/24)*500)
sf_relax = create_score_function('ref2015_cart')
## Radius of Gyration Reference Score Function
sfrg = ScoreFunction()
sfrg.set_weight(rg, 1.0)
# The Movers
## Fragment Movers
# Importing the Fragment Files and Constructing the Fragment Mover
if args. Three_Mer_Frag_Library:
fragset3 = ConstantLengthFragSet(3)
fragset3.read_fragment_file(args.Three_Mer_Frag_Library)
fragmover3 = ClassicFragmentMover(fragset3, cenmap)
## Minimization Movers
vdwmin = MinMover()
vdwmin.movemap(cenmap)
vdwmin.score_function(sf_stage_0)
vdwmin.min_type('linmin')
cenmin = MinMover()
cenmin.movemap(cenmap)
cenmin.score_function(sf_stage_1)
cenmin.min_type('linmin')
fullmin = MinMover()
fullmin.movemap(fullmap)
fullmin.score_function(sf_stage_2)
fullmin.min_type('linmin')
## SIDECHAIN MANAGER FOR SIDECHAIN CONJUGATION
class SidechainManager(pyrosetta.rosetta.protocols.moves.Mover):
"""Helps for switching between FA and CEN"""
def __init__(self, pose):
"""Actions to perform for initialization"""
self.modified_residues = []
self.chi_angles = []
for res_i in range(p.total_residue()):
res_name = p.residue(res_i + 1).name()
if res_name == 'ASP:TMR' or res_name == 'CYS:OGQ':
self.modified_residues.append([res_name, res_i + 1, p.residue(res_i + 1).chi()])
def apply(self, pose):
if pose.residue(self.modified_residues[0][1]).name() == self.modified_residues[0][0]:
for modified_residue in self.modified_residues:
mutate_to = modified_residue[0].split(':')[0]
make_mutation = pyrosetta.rosetta.protocols.simple_moves.MutateResidue(modified_residue[1], mutate_to)
make_mutation.apply(pose)
else:
for modified_residue in self.modified_residues:
mutate_to = modified_residue[0].split(':')[0] + '_p:' + modified_residue[0].split(':')[1]
make_mutation = pyrosetta.rosetta.protocols.simple_moves.MutateResidue(modified_residue[1], mutate_to)
make_mutation.apply(pose)
pose.residue(modified_residue[1]).set_all_chi(modified_residue[2])
## SIDECHAIN MANAGER FOR SIDECHAIN CONJUGATION
class SidechainRestorer(pyrosetta.rosetta.protocols.moves.Mover):
"""Helps for switching between FA and CEN"""
def __init__(self, restore_pose, vect_bool):
"""Actions to perform for initialization"""
self.restore_pose = restore_pose
self.vect_bool = vect_bool
self.chi_mover = pyrosetta.rosetta.protocols.simple_moves.sidechain_moves.SetChiMover()
def apply(self, pose):
if pose.total_residue() == self.restore_pose.total_residue():
for res_i in range(1, pose.total_residue()+1):
if self.vect_bool[res_i] == False:
#pose.residue(res_i).set_all_chi(self.restore_pose.residue(res_i).chi())
chi_set = self.restore_pose.residue(res_i).chi()
for chi_idx,chi_angle in enumerate(chi_set):
self.chi_mover.angle(chi_angle)
self.chi_mover.chinum(chi_idx+1)
self.chi_mover.resnum(res_i)
self.chi_mover.apply(pose)
else:
continue
## BACKRUB PROTOCOL FOR LOOPS
class BackrubLoop(pyrosetta.rosetta.protocols.moves.Mover):
"""A customized version of the Backrub protocol that operates in PyRosetta"""
def __init__(self, pose, residues, is_fullatom = True):
"""Actions to perform for initialization"""
rosetta.protocols.moves.Mover.__init__(self)
## PROTOCOL OPTIONS
self.pose = pose
self.is_fullatom = is_fullatom
self.residues = residues
if self.is_fullatom == False:
self.prob_rotamer_only = 0
self.prob_rotamer_post_backrub = 0
self.prob_rotamer_pbr = 0
else:
self.prob_rotamer_only = 0.05 #0.25
self.prob_rotamer_post_backrub = 0.25
self.prob_rotamer_pbr = 0.1 # 0.25
self.num_moves = 10
## SCORE FUNCTION
if self.is_fullatom == False:
self.score_function = create_score_function('cen_std')
self.score_function.set_weight(rama, 1.0)
self.score_function.set_weight(cenpack, 1.0)
self.score_function.set_weight(hbond_lr_bb, 1.0)
self.score_function.set_weight(hbond_sr_bb, 1.0)
else:
self.score_function = create_score_function('ref2015')
## MONTE CARLO OBJECTS
self.mc1 = MonteCarlo(pose, self.score_function, 5.0)
self.mc2 = MonteCarlo(pose, self.score_function, 10.0)
## SEETING UP THE BASE BACKRUB MOVER
starting_pose = Pose()
starting_pose.assign(pose)
self.backrub_mover = pyrosetta.rosetta.protocols.backrub.BackrubMover()
self.backrub_mover.set_max_angle_disp_4(3.0)
self.backrub_mover.set_max_angle_disp_7(1.5)
self.backrub_mover.set_max_angle_disp_slope(-0.03)
self.backrub_mover.set_max_atoms(80)
self.backrub_mover.test_move(pose)
pose.assign(starting_pose)
self.residue_1 = []
self.residue_2 = []
self.backrub_seg_ids = []
### IDENTIFYING MOVABLE SEGMENTS
for res1 in self.residues:
for res2 in self.residues:
self.residue_1.append(res1)
self.residue_2.append(res2)
self.res1ca = AtomID(2,res1)
self.res2ca = AtomID(2,res2)
self.backrub_seg_ids.append(self.backrub_mover.segment_id(self.res1ca,self.res2ca))
self.active_backrub_seg_ids = [i for i, seg_id in enumerate(self.backrub_seg_ids) if seg_id != 0]
### SETTING WEIGHTING TO BIAS SAMPLING OF LARGE MOTIONS
self.sample_weights = []
for res_i in self.active_backrub_seg_ids:
self.sample_weights.append(abs(self.residue_1[res_i] - self.residue_2[res_i]))
total_weights = sum(self.sample_weights)
for weight_idx,weight_i in enumerate(self.sample_weights):
self.sample_weights[weight_idx] = weight_i / total_weights
### PERFORM INTIAL SAMPLING TO FINALIZE INITILAIZATION
self.backrub_mover.set_next_segment_id(self.backrub_seg_ids[choice(self.active_backrub_seg_ids, p=self.sample_weights)])
self.backrub_mover.apply(pose)
### SETTING THE TASK
if self.is_fullatom == True:
self.task = standard_packer_task(pose)
self.task.restrict_to_repacking()
if args.Disorder_Probability_Prediction_File or args.Linker_File or args.Loops_File:
task_residues = pyrosetta.rosetta.utility.vector1_bool()
for resi in range(1, p.total_residue() + 1):
if resi in sampled_residues:
task_residues.append(1)
else:
task_residues.append(0)
self.task.restrict_to_residues(task_residues)
### SETTING UP THE SIDECHAIN MOVER
self.sidechainMC = pyrosetta.rosetta.protocols.simple_moves.sidechain_moves.SidechainMover()
self.sidechainMC.set_task(self.task)
self.sidechainMC.set_change_chi_without_replacing_residue(True)
#sidechainMC.set_prob_random_pert_current(0.0) # 0.8
#sidechainMC.set_prob_uniform(0.55) # 0.2
#sidechainMC.set_prob_withinrot(0.0) # 0.0
self.packed_residue_list = self.sidechainMC.packed_residues()
## CONFIGURING THE ASSOCIATED SIDECHAIN MOVER
def backrub_sidechain_mover(self, pose):
last_backrub_seg_id = self.backrub_seg_ids.index(self.backrub_mover.last_segment_id())
side_move_start_res = self.residue_1[last_backrub_seg_id]
side_move_end_res = self.residue_2[last_backrub_seg_id]
is_pack_res = False
res_attempt = 0
sc_res_num = 0
while is_pack_res == False:
if res_attempt < 10:
res_attempt = res_attempt + 1
#print(res_attempt)
sc_res_num = randint(side_move_start_res,side_move_end_res)
if sc_res_num in self.packed_residue_list:
is_pack_res = True
else:
sc_res_num_rand = randint(1,len(self.packed_residue_list))
sc_res_num = self.packed_residue_list[sc_res_num_rand]
is_pack_res = True
self.sidechainMC.next_resnum(sc_res_num)
self.sidechainMC.apply(pose)
def n_moves(self, moves):
self.num_moves = moves
def score_fxn(self, score_fxn):
self.score_function = score_fxn
self.mc1 = MonteCarlo(self.pose, self.score_function, 5.0)
self.mc2 = MonteCarlo(self.pose, self.score_function, 10.0)
def apply(self, pose):
self.mc2.reset(pose)
for i in range(self.num_moves):
if rnd() < self.prob_rotamer_only:
self.sidechainMC.apply(pose)
else:
self.mc1.reset(pose)
self.backrub_mover.set_next_segment_id(self.backrub_seg_ids[choice(self.active_backrub_seg_ids, p=self.sample_weights)])
self.backrub_mover.apply(pose)
self.mc1.boltzmann(pose)
if rnd() < self.prob_rotamer_post_backrub:
self.backrub_sidechain_mover(pose)
if rnd() < self.prob_rotamer_pbr:
self.backrub_sidechain_mover(pose)
self.mc2.boltzmann(pose)
def inter_fluor_distance(pose):
modified_residues = []
for res_i in range(pose.total_residue()):
res_name = pose.residue(res_i + 1).name()
if res_name == 'ASP:TMR':
atom1 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('O4')).xyz()
atom1_xyz = np.array((atom1[0], atom1[1], atom1[2]))
atom2 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('C6')).xyz()
atom2_xyz = np.array((atom2[0], atom2[1], atom2[2]))
mp_atom = (atom1_xyz + atom2_xyz) / 2
modified_residues.append([res_name, res_i + 1, mp_atom])
elif res_name == 'CYS:OGQ':
atom1 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('O2')).xyz()
atom1_xyz = np.array((atom1[0], atom1[1], atom1[2]))
atom2 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('C6')).xyz()
atom2_xyz = np.array((atom2[0], atom2[1], atom2[2]))
mp_atom = (atom1_xyz + atom2_xyz) / 2
modified_residues.append([res_name, res_i + 1, mp_atom])
else:
continue
if len(modified_residues) == 2:
distance = np.linalg.norm(modified_residues[0][2] - modified_residues[1][2])
return distance
else:
return 0.0
## Phi-Psi Movers
vdw_small_mover = SmallMover(cenmap, 1.0, 1)
vdw_shear_mover = ShearMover(cenmap, 1.0, 1)
vdw_small_mover.angle_max(180)
vdw_small_mover.angle_max("H", 180)
vdw_small_mover.angle_max("E", 180)
vdw_small_mover.angle_max("L", 180)
vdw_shear_mover.angle_max(180)
vdw_shear_mover.angle_max("H", 180)
vdw_shear_mover.angle_max("E", 180)
vdw_shear_mover.angle_max("L", 180)
random_stage_0 = RandomMover()
random_stage_0.add_mover(vdw_small_mover, 4)
random_stage_0.add_mover(vdw_shear_mover, 4)
vdwrepeat = RepeatMover(random_stage_0, 5)
cen_small_mover = SmallMover(cenmap, 0.8, 1)
cen_shear_mover = ShearMover(cenmap, 0.8, 1)
cen_small_mover.angle_max(180)
cen_small_mover.angle_max("H", 180)
cen_small_mover.angle_max("E", 180)
cen_small_mover.angle_max("L", 180)
cen_shear_mover.angle_max(180)
cen_shear_mover.angle_max("H", 180)
cen_shear_mover.angle_max("E", 180)
cen_shear_mover.angle_max("L", 180)
random_stage_1 = RandomMover()
random_stage_1.add_mover(cen_small_mover, 4)
random_stage_1.add_mover(cen_shear_mover, 4)
if args. Three_Mer_Frag_Library:
random_stage_1.add_mover(fragmover3, 2)
full_small_mover = SmallMover(fullmap, 0.8, 1)
full_shear_mover = ShearMover(fullmap, 0.8, 1)
full_small_mover.angle_max(4)
full_small_mover.angle_max("H", 4)
full_small_mover.angle_max("E", 4)
full_small_mover.angle_max("L", 4)
full_shear_mover.angle_max(4)
full_shear_mover.angle_max("H", 4)
full_shear_mover.angle_max("E", 4)
full_shear_mover.angle_max("L", 4)
full_random = RandomMover()
full_random.add_mover(full_small_mover, 4)
full_random.add_mover(full_shear_mover, 4)
## Packing/Rotamer Movers
## Converting the Pose
switch = SwitchResidueTypeSetMover('fa_standard')
switch_cen = SwitchResidueTypeSetMover('centroid')
### The Task Operations
if p.is_centroid() == True:
switch.apply(p)
else:
sidechain_switch = SidechainManager(p)
sidechain_switch.apply(starting_p)
switch_cen.apply(starting_p)
fulltask = standard_packer_task(p)
fulltask.restrict_to_repacking()
if args.Disorder_Probability_Prediction_File or args.Linker_File or args.Loops_File:
task_residues = pyrosetta.rosetta.utility.vector1_bool()
for resi in range(1, p.total_residue() + 1):
if resi in sampled_residues:
task_residues.append(1)
else:
task_residues.append(0)
fulltask.restrict_to_residues(task_residues)
sidechain_restore = SidechainRestorer(restore_chi_p, task_residues)
if args.Loops_File:
loop_backrub = BackrubLoop(p, loop_residues)
full_random.add_mover(loop_backrub, 2)
sidechain_switch.apply(p)
switch_cen.apply(p)
if args.Loops_File:
loop_backrub_vdw = BackrubLoop(p, loop_residues, False)
loop_backrub_vdw.score_fxn(sf_stage_0)
random_stage_0.add_mover(loop_backrub_vdw, 1)
vdwrepeat = RepeatMover(random_stage_0, 7)
loop_backrub_centroid = BackrubLoop(p, loop_residues, False)
random_stage_1.add_mover(loop_backrub_centroid, 2)
### The Rotamer Movers
fullpack = PackRotamersMover(sf_stage_2, fulltask)
fullrottrial = RotamerTrialsMover(sf_stage_2, fulltask)
## Sequence Movers
sequence_stage_0 = SequenceMover()
sequence_stage_0.add_mover(vdwrepeat)
sequence_stage_0.add_mover(vdwmin)
## Setting up the Monte-Carlo
mc_stage_0 = MonteCarlo(p, sf_stage_0, 10.0)
mc_stage_1 = MonteCarlo(p, sf_stage_1, 1.0)
switch.apply(p)
sidechain_switch.apply(p)
sidechain_restore.apply(p)
mc_stage_2 = MonteCarlo(p, sf_stage_2, 1.0)
sidechain_switch.apply(p)
switch_cen.apply(p)
## Setting up Trial Movers
trial_stage_0 = TrialMover(sequence_stage_0, mc_stage_0)
trial_stage_1a = TrialMover(random_stage_1, mc_stage_1)
trial_stage_1b = TrialMover(cenmin, mc_stage_1)
trial_stage_2a = TrialMover(full_random, mc_stage_2)
trial_stage_2b = TrialMover(fullmin, mc_stage_2)
trial_stage_2c = TrialMover(fullrottrial, mc_stage_2)
trial_stage_0.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
trial_stage_1a.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
trial_stage_1b.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
trial_stage_2a.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
trial_stage_2b.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
trial_stage_2c.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats)
## Setting up Repeat Movers
stage_0 = RepeatMover(trial_stage_0, 100)
stage_1a = RepeatMover(trial_stage_1a, 19)
stage_1b = RepeatMover(trial_stage_1b, 1)
stage_2a = RepeatMover(trial_stage_2a, 14)
stage_2b = RepeatMover(trial_stage_2b, 1)
stage_2c = RepeatMover(trial_stage_2c, 1)
# Setting up FastRelax
relax = rosetta.protocols.relax.FastRelax()
relax.min_type('lbfgs_armijo_nonmonotone')
relax.dualspace(True)
relax.set_scorefxn(sf_relax)
relax.max_iter(200)
relax.set_movemap(fullmap)
if args.PYMOL:
pmm = PyMOLMover()
# The Simulation and Output
for i in range(ftnstruct):
p = Pose()
p.assign(starting_p)
for resi in linker_residues:
p.set_phi(resi, 180)
p.set_psi(resi, 180)
if args.PYMOL:
pmm.apply(p)
mc_stage_0.reset(p)
stage_0.apply(p)
if args.PYMOL:
pmm.apply(p)
mc_stage_1.reset(p)
stage_1_last_j_score = 0
stage_1_last_j = 0
for j in range(cycles*250):
stage_1a.apply(p)
mc_stage_1.recover_low(p)
if args.PYMOL:
pmm.apply(p)
stage_1b.apply(p)
mc_stage_1.recover_low(p)
if args.PYMOL:
pmm.apply(p)
if j % 50 == 0:
sf_stage_1.show(p)
print('Current Rg: ' + str(sfrg(p)))
## Move on to the next sampling step if sampling has converged
if j == 0:
stage_1_last_j_score = sf_stage_1.score(p)
stage_1_last_j = j
else:
if stage_1_last_j_score != sf_stage_1.score(p):
stage_1_last_j_score = sf_stage_1.score(p)
stage_1_last_j = j
else:
if j-stage_1_last_j > 3:
break
mc_stage_1.recover_low(p)
if args.PYMOL:
pmm.apply(p)
switch.apply(p)
sidechain_switch.apply(p)
sidechain_restore.apply(p)
mc_stage_2.reset(p)
stage_1_last_k_score = 0
stage_1_last_k = 0
for k in range(cycles*100):
stage_2a.apply(p)
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
stage_2b.apply(p)
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
stage_2a.apply(p)
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
stage_2c.apply(p)
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
stage_2b.apply(p)
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
if k % 25 == 0:
sf_stage_2.show(p)
print('Current Rg: ' + str(sfrg(p)))
## Move on to the next sampling step if sampling has converged
if k == 0:
stage_1_last_k_score = sf_stage_2.score(p)
stage_1_last_k = k
else:
if stage_1_last_k_score != sf_stage_2.score(p):
stage_1_last_k_score = sf_stage_2.score(p)
stage_1_last_k = k
else:
break
mc_stage_2.recover_low(p)
if args.PYMOL:
pmm.apply(p)
if args.DumpAll:
if args.Cluster_ID:
pdb_out = "FastFloppyLinker_out_" + str(args.Cluster_ID) + ".pdb"
outf = open("FastFloppyLinker.sc", 'a')
fcntl.flock (outf.fileno(), fcntl.LOCK_EX)
outf.seek(0, 2)
outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2)))
outf.close()
else:
outf = open("FloppyTail.sc", 'a')
pdb_out = "FloppyTail_out_%i.pdb" %i
outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2)))
p.dump_pdb(pdb_out)
relax.apply(p) # THIS IS NEW AND NOT IN THE ORIGINAL FASTFLOPPYTAIL
if args.Cluster_ID:
pdb_out = "FastFloppyLinker_Relaxed_out_" + str(args.Cluster_ID) + ".pdb"
outf = open("FastFloppyLinker_Relaxed.sc", 'a')
fcntl.flock (outf.fileno(), fcntl.LOCK_EX)
outf.seek(0, 2)
outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2)))
outf.close()
else:
outf = open("FloppyTail_Relaxed.sc", 'a')
pdb_out = "FloppyTail_Relaxed_out_%i.pdb" %i
outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2)))
p.dump_pdb(pdb_out)
outf.close()