#FORMAT OF ORIGINAL FLOPPY TAIL [KLEIGER, G ET. AL. "RAPID E2-E3 ASSEMBLY AND DISASSEMBLY ENABLE PROCESSICE UBIQUITYLATION OF CULLIN-RING UBIQUITIN LIGASE SUBSTRATES" CELL. 139(5): 957-968 2009] ##CENTROID MODE [NOT DISCLOSED POTENTIALLY RAMA ONLY] ###19 CYCLES: RANDOM: SMALL_180(40%), SHEAR_180(40%), FRAGMENT_3MER(20%) ###20TH CYCLE: MINIMIZATION ####5000 TOTAL CYCLES -> RECOVER LOW ##FULL-ATOM MODE [SCORE12] ###START BY REPACKING ALL IN TAIL/VICINITY OF TAIL FOLLOWED BY MINIMIZATION ###14 CYCLES: RANDOM: SMALL_4(50%), SHEAR_4(50%) FOLLOWED BY SINGLE ROTAMER_TRIALS ###15TH CYCLE:MINIMIZATION ###30TH CYCLE:REPACK THEN MINIMIZATION ####3000 CYCLES -> RECOVER LOW # Parsing Arguements import argparse parser = argparse.ArgumentParser(description='Program') parser.add_argument('-in', '--Input_FASTA_File', action='store', type=str, required=False, help='Name of the text file containing the FASTA sequence of the protein of interest. Carot should not be in same line as sequence, UniProt format preferred.') parser.add_argument('-ftnstruct', '--Number_of_FloppyTail_Structures', action='store', type=str, required=False, default=400, help='Number of structures to sample during FloppyTail portion. Default = 400') parser.add_argument('-t_frag', '--Three_Mer_Frag_Library', action='store', type=str, required=False, help='Name of the file containing the three-mer fragment library generated by disorder corrected method') parser.add_argument('-linkers', '--Linker_File', action='store', type=str, required=False, help='File containing the starting and ending residue number for the linkers in the fusion protein') parser.add_argument('-loops', '--Loops_File', action='store', type=str, required=False, help='File containing the starting and ending residue numbers for the loops in the fusion protein') parser.add_argument('-m_num', '--Multimer_Number', action='store', type=int, required=False, default=1, help='Number of multimers whose linkers need to be sampled. Only supply for multimers not being run under symmetry') parser.add_argument('-m_anchor', '--Multimer_Anchor', action='store', type=str, required=False, help='Side of the fusion which forms the multimer. Options N or C for N or C-terminal fusion') parser.add_argument('-symm', '--Symmetry_File', action='store', type=str, required=False, help='Supply the symmetry file if you want the protein to be run under symmetry (all moves perfectly mirrored)') parser.add_argument('-cycles', '--FloppyTail_Cycles', action='store', type=str, required=False, default=1, help='Number of sampling cycles within each stage of FloppyTail, identical to increase_cycles flag in C++ ClassicAbInitio and AbRelax. Default 0') parser.add_argument('-refinesubset', '--Refine_Subset', action='store', type=int, required=False, default=0, help='Only subjects the lowest X% of structures to Relax refinement where X is specified as input following flag. Default 0') parser.add_argument('-relnstruct', '--Number_of_Relax_Structures', action='store', type=str, required=False, default=0, help='Number of independent full-atom Relax sampling trajectories from a single AbInitio structure. Default 1') parser.add_argument('-diso', '--Disorder_Probability_Prediction_File', action='store', type=str, required=False, help='File containing per residue disorder probability prediction in RaptorX format. Generally acquired from RaptorX prediciton.') parser.add_argument('-inpdb', '--Input_PDB_File', action='store', type=str, required=False, help='Name of the text file containing the PDB structure of the protein of interest. All residues are required, missing residues are not constructed') parser.add_argument('-pymol', '--PYMOL', action='store_true', required=False, help='Running with this flag will utilize PyMOL Mover for visualization') parser.add_argument('-rg', '--RG', action='store_true', required=False, help='Ability to provide radius of gyration via Ferrie et. al. JPCB 2020 rg score term') parser.add_argument('-clusterid', '--Cluster_ID', action='store', type=int, required=False, help='Batch submit script ID for output record keeping') parser.add_argument('-verbose', '--Verbose', action='store_true', required=False, help='Will unmute information coming from Rosetta') parser.add_argument('-dumpall', '--DumpAll', action='store_true', required=False, help='Dumps both the pre-relaxed and relaxed structures') args = parser.parse_args() # The Start-Up from pyrosetta import * #Comment if args.Verbose: init(extra_options = '-extra_patch_fa ../OGQ_SidechainConjugation.txt ../TMR_SidechainConjugation.txt') else: init(extra_options = '-mute all -extra_patch_fa ../OGQ_SidechainConjugation.txt ../TMR_SidechainConjugation.txt') #init() from pyrosetta.rosetta.core.scoring import * from pyrosetta.rosetta.core.scoring.methods import * from pyrosetta.rosetta.core.scoring.methods import EnergyMethodOptions from pyrosetta.rosetta.protocols.grafting import * from pyrosetta.rosetta.protocols.simple_moves import * from pyrosetta.rosetta.protocols.moves import * from pyrosetta.rosetta.core.fragment import * from pyrosetta.rosetta.protocols.minimization_packing import * from math import exp, log, pi, sqrt import random from random import randint from random import random as rnd import sys import os import numpy as np from numpy.random import choice import re import scipy.optimize as op from scipy.interpolate import interp1d import fcntl ## Imports from Parser and Defaults if args.Cluster_ID: ftnstruct = 1 elif args.Number_of_FloppyTail_Structures: ftnstruct = int(args.Number_of_FloppyTail_Structures) else: ftnstruct = 400 if args.FloppyTail_Cycles: cycles = int(args.FloppyTail_Cycles) else: cycles = 1 if args.Number_of_Relax_Structures: relnstruct = int(args.Number_of_Relax_Structures) else: relnstruct = 0 if args.Refine_Subset: refine_number = int((args.Refine_Subset/100)*int(ftnstruct)) else: refine_number = 0 # Importing sequence from FASTA if args.Input_FASTA_File: fasta_file = open(args.Input_FASTA_File, 'r') fasta_lines = fasta_file.readlines() fasta_counter = 0 fasta_sequence = ' ' for fasta_line in fasta_lines: if '>' not in fasta_line: if fasta_counter == 0: if '\n' in fasta_line: fasta_sequence = fasta_line.split('\n')[0] else: fasta_sequence = fasta_line fasta_counter = 1 else: if '\n' in fasta_line: fasta_sequence = fasta_sequence + fasta_line.split('\n')[0] else: fasta_sequence = fasta_sequence + fasta_line # Determining the Per Residue Disorder Probability and Number of Segments if args.Disorder_Probability_Prediction_File: disorder_dtypes = [('res_num', np.float_), ('AA', np.unicode_, 2), ('ast', np.unicode_, 1), ('disprob', np.float_)] disorder_dat = np.genfromtxt(args.Disorder_Probability_Prediction_File, dtype=disorder_dtypes, delimiter=' ', skip_header=3) diso_dat = np.empty((len(disorder_dat),1)) diso_segments = [] diso_cutoff = 0.5 seg_cutoff = 2 seg_res_counter = 0 per_residue_disorder = 0 diso_current = disorder_dat[0][3] segment_break_list = [] segment_switch = False for seg_residue in range(len(disorder_dat)): if disorder_dat[seg_residue][3] >= diso_cutoff and diso_current >= diso_cutoff: seg_res_counter = 0 elif disorder_dat[seg_residue][3] < diso_cutoff and diso_current < diso_cutoff: seg_res_counter = 0 elif disorder_dat[seg_residue][3] < diso_cutoff and diso_current >= diso_cutoff: seg_res_counter = seg_res_counter + 1 elif disorder_dat[seg_residue][3] >= diso_cutoff and diso_current < diso_cutoff: seg_res_counter = seg_res_counter + 1 if seg_res_counter > 1: seg_res_counter = 0 diso_current = disorder_dat[seg_residue][3] if diso_current >= diso_cutoff: transition_id = 'order' # meaning all residues prior to end are ordered else: transition_id = 'disorder' # meaning all residues prior to end are disordered segment_break_list.append((seg_residue-9, transition_id)) per_residue_disorder = per_residue_disorder + (disorder_dat[seg_residue][3]/float(len(disorder_dat))) if disorder_dat[len(disorder_dat)-1][3] >= diso_cutoff: segment_break_list.append((len(disorder_dat), 'disorder')) else: segment_break_list.append((len(disorder_dat), 'order')) # Adding PTMs def MakePTMMutations(pose, fasta_sequence): ptm_mutation_list = re.findall("\[(.*?)\]", fasta_sequence) residue_mutation_list = [] pose_sequence = pose.sequence() residue_number = 0 ptm_number = 0 count_residue = True for res in fasta_sequence: if res == '[': count_residue = False residue_mutation_list.append(residue_number) if count_residue == True: residue_number +=1 if res == ']': count_residue = True print(ptm_mutation_list) print(residue_mutation_list) for res_ptm_idx, res_ptm in enumerate(ptm_mutation_list): mutate = MutateResidue(residue_mutation_list[res_ptm_idx], res_ptm) mutate.apply(pose) return pose # The Poses p=Pose() if args.Input_FASTA_File: p = pose_from_sequence(fasta_sequence, "centroid") if args.Input_PDB_File: p = pose_from_pdb(str(args.Input_PDB_File)) if args.Input_FASTA_File: if p.sequence() != fasta_sequence: if '_p:' in fasta_sequence: p = MakePTMMutations(p, fasta_sequence) ## Write the FoldTree so to account for the multimerization domain if args.Multimer_Anchor: ft = FoldTree() if args.Multimer_Anchor == 'C': c_term_residue = p.total_residue() / args.Multimer_Number c_term_residue_list = [] for multimer_number in range(args.Multimer_Number): c_term_res = multimer_number (multimer_number * p.total_residue() / args.Multimer_Number) c_term_residue_list.append(c_term_res) if multimer_number == 0: ft.add_edge(c_term_res, 1, -1) else: ft.add_edge(c_term_res, c_term_residue_list[multimer_number - 1] + 1, -1) ft.add_edge(c_term_residue_list[multimer_number - 1], c_term_res, 1) # Setup the MoveMaps cenmap = MoveMap() cenmap.set_bb(True) fullmap = MoveMap() fullmap.set_bb(True) fullmap.set_chi(True) cenmap_min = MoveMap() cenmap_min.set_bb(True) fullmap_min = MoveMap() fullmap_min.set_bb(True) fullmap_min.set_chi(True) relaxmap = MoveMap() relaxmap.set_bb(True) relaxmap.set_chi(True) sampled_residues = [] if args.Disorder_Probability_Prediction_File: cenmap.set_bb(False) fullmap.set_bb(False) fullmap.set_chi(False) for res_seg_idx, res_seg_item in enumerate(segment_break_list): start_res = 1 if res_seg_idx > 0: start_res = segment_break_list[res_seg_idx-1][0] + 1 end_res = res_seg_item[0] + 1 if res_seg_item[1] == 'disorder': for res_idx in range(start_res,end_res+1): cenmap.set_bb(res_idx, True) fullmap.set_bb(res_idx, True) fullmap.set_chi(res_idx, True) sampled_residues.append(res_idx) else: continue # Only sample residues within the linker if args.Linker_File: cenmap.set_bb(False) fullmap.set_bb(False) fullmap.set_chi(False) linker_file = open(args.Linker_File, 'r') linker_lines = linker_file.readlines() linker_residues = [] for linker_line in linker_lines: start_res = int(linker_line.split(' ')[0]) end_res = int(linker_line.split(' ')[1]) ### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers for multimer_number in range(args.Multimer_Number): start_res_m = int(start_res + (multimer_number * p.total_residue() / args.Multimer_Number)) end_res_m = int(end_res + (multimer_number * p.total_residue() / args.Multimer_Number)) for residue in range(start_res, end_res + 1): cenmap.set_bb(residue, True) fullmap.set_bb(residue, True) fullmap.set_chi(residue, True) sampled_residues.append(residue) linker_residues.append(residue) # Only sample residues within the loops loop_residues = [] if args.Loops_File: loops_file = open(args.Loops_File, 'r') loops_lines = loops_file.readlines() for loops_line in loops_lines: start_res = int(loops_line.split(' ')[0]) end_res = int(loops_line.split(' ')[1]) ### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers for multimer_number in range(args.Multimer_Number): start_res_m = int(start_res + (multimer_number * p.total_residue() / args.Multimer_Number)) end_res_m = int(end_res + (multimer_number * p.total_residue() / args.Multimer_Number)) for residue in range(start_res, end_res + 1): loop_residues.append(residue) sampled_residues.append(residue) ''' if args.Start_Residue and args.End_Residue: cenmap.set_bb(False) fullmap.set_bb(False) fullmap.set_chi(False) ### Need to add the length of a monomer to each of the linker residue numbers to get it to sample all linkers for multimer_number in range(args.Multimer_Number): start_res = int(args.Start_Residue + (multimer_number * p.total_residue() / args.Multimer_Number)) end_res = int(args.End_Residue + (multimer_number * p.total_residue() / args.Multimer_Number)) for residue in range(start_res, end_res + 1): cenmap.set_bb(residue, True) fullmap.set_bb(residue, True) fullmap.set_chi(residue, True) ''' ## If protein is multimeric and is using a symmetry operation if args.Symmetry_File: symmetrize = pyrosetta.rosetta.protocols.symmetry.SetupForSymmetryMover(args.Symmetry_File) symmetrize.apply(p) ## Make Remaining Necessary Poses starting_p = Pose() starting_p.assign(p) restore_chi_p = Pose() restore_chi_p.assign(p) pvdw = Pose() pvdwc = Pose() pcen = Pose() # The Score Functions # Adding an RG Constraint if args.RG: #### Computing the Mean Hydrophobicity per Residue scaling_info_holder = np.zeros([len(segment_break_list),5]) # Need spots for segH,segQ,frac_pos,frac_neg,seq_len -> scalvQ,scalvH,scalv,rad_gyr,seq_len => scalvQ -> sf_rg_term_potential ##### Kyte, J. Doolitte, R.F. J. Mol. Biol. (1982) 157, 105-132 ##### Normalization proposed in Uversky, V.N. et. al. Proteins: Struc. Func. Gen. (2000), 41, 415-427 aa_hydro_idx = {'I':4.5, 'V':4.2, 'L':3.8, 'F':2.8, 'C':2.5, 'M':1.9, 'A':1.8, 'G':-0.4, 'T':-0.7, 'W':-0.9, 'S':-0.8, 'Y':-1.3, 'P':-1.6, 'H':-3.2, 'E':-3.5, 'Q':-3.5, 'D':-3.5, 'N':-3.5, 'K':-3.9, 'R':-4.5} for diso_segment_idx, diso_segment_item in enumerate(segment_break_list): start_res = 1 if diso_segment_idx > 0: start_res = segment_break_list[diso_segment_idx-1][0] + 1 end_res = diso_segment_item[0] + 1 seq_len = end_res-start_res pro_len = p.total_residue() scaling_info_holder[diso_segment_idx][4] = seq_len sequence_avg_H = 0 for res_idx in range(start_res-1, end_res-1, 1): window_start = 0 window_end = 0 window_H_avg = 0 if res_idx < 5: window_start = 0 window_end = res_idx + 1 elif pro_len - res_idx < 5: window_start = res_idx window_end = pro_len else: window_start = res_idx - 2 window_end = res_idx + 3 window = p.sequence()[window_start:window_end] for window_res_idx, window_res_item in enumerate(window): window_H_avg = window_H_avg + ((aa_hydro_idx[str(window_res_item)]+4.5)/9.0) window_H_avg = window_H_avg/(len(window)) sequence_avg_H = sequence_avg_H + window_H_avg/scaling_info_holder[diso_segment_idx][4] scaling_info_holder[diso_segment_idx][0] = sequence_avg_H #### Computing the Mean Net Charge per Residue for diso_segment_idx, diso_segment_item in enumerate(segment_break_list): start_res = 1 if diso_segment_idx > 0: start_res = segment_break_list[diso_segment_idx-1][0] + 1 end_res = diso_segment_item[0] + 1 seq_len = end_res-start_res scaling_info_holder[diso_segment_idx][4] = seq_len sequence_Q = 0 frac_pos = 0 # fraction of positively charged residues Arg Lys frac_neg = 0 # fraction of negatively charged residues Asp Glu for res_idx in range(start_res-1, end_res-1, 1): res_id = p.sequence()[res_idx] if res_id == 'E' or res_id == 'D': frac_neg = frac_neg + 1/seq_len sequence_Q = sequence_Q - 1/seq_len elif res_id == 'K' or res_id == 'R': frac_pos = frac_pos + 1/seq_len sequence_Q = sequence_Q + 1/seq_len else: continue sequence_Q = np.sqrt(sequence_Q**2) scaling_info_holder[diso_segment_idx][1] = sequence_Q scaling_info_holder[diso_segment_idx][2] = frac_pos scaling_info_holder[diso_segment_idx][3] = frac_neg #### Calculations from Hofmann, H. et al. PNAS (2012) 109, 40, 16155-16160 ##### Compute charge and hydrophobicity scaling options ###### Constants scalv = 0 con_a = 0.394 con_z = 0.09 con_x0 = 0.114 con_c = 1.72 con_d = 0.9 ###### Calculations ###### Folded vs Unfolded ###### From Uversky, if sequence_Q < 2.785*sequence_avg_H - 1.151 -> Folded, but fails for synuclein for QH_set_idx in range(len(scaling_info_holder)): if 'order' in segment_break_list[QH_set_idx]: scaling_info_holder[QH_set_idx][0] = 0.33 scaling_info_holder[QH_set_idx][1] = 0.33 else: scalv_Q = (1.0/3.0) + con_a*((1+np.exp(con_x0-scaling_info_holder[QH_set_idx][1]/con_z))**(-1)) scaling_info_holder[QH_set_idx][1] = scalv_Q scalv_H = (1.0/3.0) + con_a*((1+np.exp((con_x0+con_c*scaling_info_holder[QH_set_idx][0]-con_d)/con_z))**(-1)) scaling_info_holder[QH_set_idx][0] = scalv_H ###### Determining the appropriate scaling value ####### Constants elemchar = 1.602*10**(-19) ss_eo = 8.854*10**(-12) ss_er = 78.7 #permittivity of water at 298 K ss_kb = 1.38*10**(-23) # boltzmann constant ss_T = 298 # in Kelvin ss_I = 0.15 # in Molar ss_kappa = 1/(0.304*np.sqrt(ss_I)) # in nanometers ss_lB = (elemchar**2)/(4*np.pi*ss_eo*ss_er*ss_kb*ss_T) ####### Calculation for QH_set_idx in range(len(scaling_info_holder)): scalvstar = ((4*np.pi*ss_lB*((scaling_info_holder[QH_set_idx][2]-scaling_info_holder[QH_set_idx][3])**2))/(ss_kappa**2)) - ((np.pi*ss_lB*((scaling_info_holder[QH_set_idx][2]+scaling_info_holder[QH_set_idx][3])**2))/(ss_kappa)) if scaling_info_holder[QH_set_idx][2] == 0: if scaling_info_holder[QH_set_idx][3] == 0: scalv = scaling_info_holder[QH_set_idx][0] else: scalv = scaling_info_holder[QH_set_idx][1] elif scaling_info_holder[QH_set_idx][3] == 0: if scaling_info_holder[QH_set_idx][2] == 0: scalv = scaling_info_holder[QH_set_idx][0] else: scalv = scaling_info_holder[QH_set_idx][1] elif scalvstar < 0: scalv = scaling_info_holder[QH_set_idx][1] else: scalv = scaling_info_holder[QH_set_idx][0] scaling_info_holder[QH_set_idx][2] = scalv ##### Computing the Expected Radius of Gyration rg_b = 0.38 # in nanometers rg_lp = 0.53 # in nanometers for QH_set_idx in range(len(scaling_info_holder)): rad_gyr = (np.sqrt((2*rg_lp*rg_b)/((2*scaling_info_holder[QH_set_idx][2]+1)*(2*scaling_info_holder[QH_set_idx][2]+2)))*(scaling_info_holder[QH_set_idx][4]**scaling_info_holder[QH_set_idx][2]))*10 # in Angstroms scaling_info_holder[QH_set_idx][3] = rad_gyr ## Making the Actual Potential Energy Function ## Constants for Probability gamma = 1.1615 ## Concocting the Potential def pr_saw(var_a, input_vars): r, rg, g, delta = input_vars return (var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta)) def solve_pr_saw(var_a, *input_vars): r, rg, g, delta = input_vars return (np.sum((var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta)))-1, np.sum((var_a[0]*4*np.pi/rg)*((r/rg)**(2+g))*np.exp(-var_a[1]*((r/rg)**delta))*(r**2))-rg**2) rg_sf_term_potential_list = [] for QH_set_idx in range(len(scaling_info_holder)): var_g = (gamma-1)/scaling_info_holder[QH_set_idx][2] var_delta = 1/(1-scaling_info_holder[QH_set_idx][2]) r_set=np.arange(0.0,7*scaling_info_holder[QH_set_idx][3],0.01) saw_inputs = (r_set, scaling_info_holder[QH_set_idx][3], var_g, var_delta) a0 = [1.0, 1.0] a1 = op.fsolve(solve_pr_saw,a0,args=saw_inputs) rg_sf_term_potential = interp1d(r_set, (1-(pr_saw(a1, saw_inputs)/np.max(pr_saw(a1, saw_inputs))))) rg_sf_term_potential_list.append(rg_sf_term_potential) ## Making the Actual Score Term from pyrosetta.rosetta.core.scoring.methods import ContextIndependentOneBodyEnergy ## newer versions make this pyrosetta.rosetta @pyrosetta.EnergyMethod() ## for newer versions make pyrosetta.EnergyMethod() class SeqCorrRgMethod(WholeStructureEnergy): """A scoring method that using a predicted radius of gyration from the primary sequence to construct a polymer-scaled potential """ def __init__(self): """Construct LengthScoreMethod.""" WholeStructureEnergy.__init__(self, self.creator()) def finalize_total_energy(self, pose, sfxn, emap): """Calculate energy of res of pose and emap""" pose = pose ## for newer versions remove line e_val = 0 for segment_idx in range(len(segment_break_list)): if segment_break_list[segment_idx][1] == 'order': continue else: r_xyz = np.zeros([int(scaling_info_holder[segment_idx][4]), 3]) rg_sq = np.zeros([int(scaling_info_holder[segment_idx][4]), 1]) start_res = 1 if segment_idx > 0: start_res = segment_break_list[segment_idx-1][0] + 1 end_res = segment_break_list[segment_idx][0] + 1 for res_num in range(start_res, end_res, 1): for res_xyz in range(len(r_xyz[0])): r_xyz[res_num-start_res][res_xyz] = pose.residue(res_num).nbr_atom_xyz()[res_xyz] r_cen_mass = np.average(r_xyz, axis=0) for res_num in range(len(r_xyz)): rg_sq[res_num] = (np.linalg.norm(r_xyz[res_num] - r_cen_mass))**2 rg_val = np.sqrt(np.average(rg_sq)) if rg_val < 7*scaling_info_holder[segment_idx][3]: rg_sf_term_potential = rg_sf_term_potential_list[segment_idx] e_val = e_val + float(rg_sf_term_potential(rg_val)) else: rg_sf_term_potential = rg_sf_term_potential_list[segment_idx] e_val = e_val + float(rg_sf_term_potential(6.9*scaling_info_holder[segment_idx][3])) emap.set(self.scoreType, e_val) ## for newer versions remove .get() new_rg_score = SeqCorrRgMethod.scoreType ## VDW Repulsive Score Function sf_stage_0 = create_score_function('score0') if args.RG: sf_stage_0.set_weight(new_rg_score, (400/24)*5) ## Centroid Score Functions sf_stage_1 = create_score_function('cen_std') sf_stage_1.set_weight(rama, 1.0) sf_stage_1.set_weight(cenpack, 1.0) sf_stage_1.set_weight(hbond_lr_bb, 1.0) sf_stage_1.set_weight(hbond_sr_bb, 1.0) if args.RG: sf_stage_1.set_weight(new_rg_score, (400/24)*5) ## Full Atom Score Functions sf_stage_2 = create_score_function('ref2015') if args.RG: sf_stage_2.set_weight(new_rg_score, (400/24)*500) sf_relax = create_score_function('ref2015_cart') ## Radius of Gyration Reference Score Function sfrg = ScoreFunction() sfrg.set_weight(rg, 1.0) # The Movers ## Fragment Movers # Importing the Fragment Files and Constructing the Fragment Mover if args. Three_Mer_Frag_Library: fragset3 = ConstantLengthFragSet(3) fragset3.read_fragment_file(args.Three_Mer_Frag_Library) fragmover3 = ClassicFragmentMover(fragset3, cenmap) ## Minimization Movers vdwmin = MinMover() vdwmin.movemap(cenmap) vdwmin.score_function(sf_stage_0) vdwmin.min_type('linmin') cenmin = MinMover() cenmin.movemap(cenmap) cenmin.score_function(sf_stage_1) cenmin.min_type('linmin') fullmin = MinMover() fullmin.movemap(fullmap) fullmin.score_function(sf_stage_2) fullmin.min_type('linmin') ## SIDECHAIN MANAGER FOR SIDECHAIN CONJUGATION class SidechainManager(pyrosetta.rosetta.protocols.moves.Mover): """Helps for switching between FA and CEN""" def __init__(self, pose): """Actions to perform for initialization""" self.modified_residues = [] self.chi_angles = [] for res_i in range(p.total_residue()): res_name = p.residue(res_i + 1).name() if res_name == 'ASP:TMR' or res_name == 'CYS:OGQ': self.modified_residues.append([res_name, res_i + 1, p.residue(res_i + 1).chi()]) def apply(self, pose): if pose.residue(self.modified_residues[0][1]).name() == self.modified_residues[0][0]: for modified_residue in self.modified_residues: mutate_to = modified_residue[0].split(':')[0] make_mutation = pyrosetta.rosetta.protocols.simple_moves.MutateResidue(modified_residue[1], mutate_to) make_mutation.apply(pose) else: for modified_residue in self.modified_residues: mutate_to = modified_residue[0].split(':')[0] + '_p:' + modified_residue[0].split(':')[1] make_mutation = pyrosetta.rosetta.protocols.simple_moves.MutateResidue(modified_residue[1], mutate_to) make_mutation.apply(pose) pose.residue(modified_residue[1]).set_all_chi(modified_residue[2]) ## SIDECHAIN MANAGER FOR SIDECHAIN CONJUGATION class SidechainRestorer(pyrosetta.rosetta.protocols.moves.Mover): """Helps for switching between FA and CEN""" def __init__(self, restore_pose, vect_bool): """Actions to perform for initialization""" self.restore_pose = restore_pose self.vect_bool = vect_bool self.chi_mover = pyrosetta.rosetta.protocols.simple_moves.sidechain_moves.SetChiMover() def apply(self, pose): if pose.total_residue() == self.restore_pose.total_residue(): for res_i in range(1, pose.total_residue()+1): if self.vect_bool[res_i] == False: #pose.residue(res_i).set_all_chi(self.restore_pose.residue(res_i).chi()) chi_set = self.restore_pose.residue(res_i).chi() for chi_idx,chi_angle in enumerate(chi_set): self.chi_mover.angle(chi_angle) self.chi_mover.chinum(chi_idx+1) self.chi_mover.resnum(res_i) self.chi_mover.apply(pose) else: continue ## BACKRUB PROTOCOL FOR LOOPS class BackrubLoop(pyrosetta.rosetta.protocols.moves.Mover): """A customized version of the Backrub protocol that operates in PyRosetta""" def __init__(self, pose, residues, is_fullatom = True): """Actions to perform for initialization""" rosetta.protocols.moves.Mover.__init__(self) ## PROTOCOL OPTIONS self.pose = pose self.is_fullatom = is_fullatom self.residues = residues if self.is_fullatom == False: self.prob_rotamer_only = 0 self.prob_rotamer_post_backrub = 0 self.prob_rotamer_pbr = 0 else: self.prob_rotamer_only = 0.05 #0.25 self.prob_rotamer_post_backrub = 0.25 self.prob_rotamer_pbr = 0.1 # 0.25 self.num_moves = 10 ## SCORE FUNCTION if self.is_fullatom == False: self.score_function = create_score_function('cen_std') self.score_function.set_weight(rama, 1.0) self.score_function.set_weight(cenpack, 1.0) self.score_function.set_weight(hbond_lr_bb, 1.0) self.score_function.set_weight(hbond_sr_bb, 1.0) else: self.score_function = create_score_function('ref2015') ## MONTE CARLO OBJECTS self.mc1 = MonteCarlo(pose, self.score_function, 5.0) self.mc2 = MonteCarlo(pose, self.score_function, 10.0) ## SEETING UP THE BASE BACKRUB MOVER starting_pose = Pose() starting_pose.assign(pose) self.backrub_mover = pyrosetta.rosetta.protocols.backrub.BackrubMover() self.backrub_mover.set_max_angle_disp_4(3.0) self.backrub_mover.set_max_angle_disp_7(1.5) self.backrub_mover.set_max_angle_disp_slope(-0.03) self.backrub_mover.set_max_atoms(80) self.backrub_mover.test_move(pose) pose.assign(starting_pose) self.residue_1 = [] self.residue_2 = [] self.backrub_seg_ids = [] ### IDENTIFYING MOVABLE SEGMENTS for res1 in self.residues: for res2 in self.residues: self.residue_1.append(res1) self.residue_2.append(res2) self.res1ca = AtomID(2,res1) self.res2ca = AtomID(2,res2) self.backrub_seg_ids.append(self.backrub_mover.segment_id(self.res1ca,self.res2ca)) self.active_backrub_seg_ids = [i for i, seg_id in enumerate(self.backrub_seg_ids) if seg_id != 0] ### SETTING WEIGHTING TO BIAS SAMPLING OF LARGE MOTIONS self.sample_weights = [] for res_i in self.active_backrub_seg_ids: self.sample_weights.append(abs(self.residue_1[res_i] - self.residue_2[res_i])) total_weights = sum(self.sample_weights) for weight_idx,weight_i in enumerate(self.sample_weights): self.sample_weights[weight_idx] = weight_i / total_weights ### PERFORM INTIAL SAMPLING TO FINALIZE INITILAIZATION self.backrub_mover.set_next_segment_id(self.backrub_seg_ids[choice(self.active_backrub_seg_ids, p=self.sample_weights)]) self.backrub_mover.apply(pose) ### SETTING THE TASK if self.is_fullatom == True: self.task = standard_packer_task(pose) self.task.restrict_to_repacking() if args.Disorder_Probability_Prediction_File or args.Linker_File or args.Loops_File: task_residues = pyrosetta.rosetta.utility.vector1_bool() for resi in range(1, p.total_residue() + 1): if resi in sampled_residues: task_residues.append(1) else: task_residues.append(0) self.task.restrict_to_residues(task_residues) ### SETTING UP THE SIDECHAIN MOVER self.sidechainMC = pyrosetta.rosetta.protocols.simple_moves.sidechain_moves.SidechainMover() self.sidechainMC.set_task(self.task) self.sidechainMC.set_change_chi_without_replacing_residue(True) #sidechainMC.set_prob_random_pert_current(0.0) # 0.8 #sidechainMC.set_prob_uniform(0.55) # 0.2 #sidechainMC.set_prob_withinrot(0.0) # 0.0 self.packed_residue_list = self.sidechainMC.packed_residues() ## CONFIGURING THE ASSOCIATED SIDECHAIN MOVER def backrub_sidechain_mover(self, pose): last_backrub_seg_id = self.backrub_seg_ids.index(self.backrub_mover.last_segment_id()) side_move_start_res = self.residue_1[last_backrub_seg_id] side_move_end_res = self.residue_2[last_backrub_seg_id] is_pack_res = False res_attempt = 0 sc_res_num = 0 while is_pack_res == False: if res_attempt < 10: res_attempt = res_attempt + 1 #print(res_attempt) sc_res_num = randint(side_move_start_res,side_move_end_res) if sc_res_num in self.packed_residue_list: is_pack_res = True else: sc_res_num_rand = randint(1,len(self.packed_residue_list)) sc_res_num = self.packed_residue_list[sc_res_num_rand] is_pack_res = True self.sidechainMC.next_resnum(sc_res_num) self.sidechainMC.apply(pose) def n_moves(self, moves): self.num_moves = moves def score_fxn(self, score_fxn): self.score_function = score_fxn self.mc1 = MonteCarlo(self.pose, self.score_function, 5.0) self.mc2 = MonteCarlo(self.pose, self.score_function, 10.0) def apply(self, pose): self.mc2.reset(pose) for i in range(self.num_moves): if rnd() < self.prob_rotamer_only: self.sidechainMC.apply(pose) else: self.mc1.reset(pose) self.backrub_mover.set_next_segment_id(self.backrub_seg_ids[choice(self.active_backrub_seg_ids, p=self.sample_weights)]) self.backrub_mover.apply(pose) self.mc1.boltzmann(pose) if rnd() < self.prob_rotamer_post_backrub: self.backrub_sidechain_mover(pose) if rnd() < self.prob_rotamer_pbr: self.backrub_sidechain_mover(pose) self.mc2.boltzmann(pose) def inter_fluor_distance(pose): modified_residues = [] for res_i in range(pose.total_residue()): res_name = pose.residue(res_i + 1).name() if res_name == 'ASP:TMR': atom1 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('O4')).xyz() atom1_xyz = np.array((atom1[0], atom1[1], atom1[2])) atom2 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('C6')).xyz() atom2_xyz = np.array((atom2[0], atom2[1], atom2[2])) mp_atom = (atom1_xyz + atom2_xyz) / 2 modified_residues.append([res_name, res_i + 1, mp_atom]) elif res_name == 'CYS:OGQ': atom1 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('O2')).xyz() atom1_xyz = np.array((atom1[0], atom1[1], atom1[2])) atom2 = pose.residue(res_i + 1).atom(pose.residue(res_i + 1).atom_index('C6')).xyz() atom2_xyz = np.array((atom2[0], atom2[1], atom2[2])) mp_atom = (atom1_xyz + atom2_xyz) / 2 modified_residues.append([res_name, res_i + 1, mp_atom]) else: continue if len(modified_residues) == 2: distance = np.linalg.norm(modified_residues[0][2] - modified_residues[1][2]) return distance else: return 0.0 ## Phi-Psi Movers vdw_small_mover = SmallMover(cenmap, 1.0, 1) vdw_shear_mover = ShearMover(cenmap, 1.0, 1) vdw_small_mover.angle_max(180) vdw_small_mover.angle_max("H", 180) vdw_small_mover.angle_max("E", 180) vdw_small_mover.angle_max("L", 180) vdw_shear_mover.angle_max(180) vdw_shear_mover.angle_max("H", 180) vdw_shear_mover.angle_max("E", 180) vdw_shear_mover.angle_max("L", 180) random_stage_0 = RandomMover() random_stage_0.add_mover(vdw_small_mover, 4) random_stage_0.add_mover(vdw_shear_mover, 4) vdwrepeat = RepeatMover(random_stage_0, 5) cen_small_mover = SmallMover(cenmap, 0.8, 1) cen_shear_mover = ShearMover(cenmap, 0.8, 1) cen_small_mover.angle_max(180) cen_small_mover.angle_max("H", 180) cen_small_mover.angle_max("E", 180) cen_small_mover.angle_max("L", 180) cen_shear_mover.angle_max(180) cen_shear_mover.angle_max("H", 180) cen_shear_mover.angle_max("E", 180) cen_shear_mover.angle_max("L", 180) random_stage_1 = RandomMover() random_stage_1.add_mover(cen_small_mover, 4) random_stage_1.add_mover(cen_shear_mover, 4) if args. Three_Mer_Frag_Library: random_stage_1.add_mover(fragmover3, 2) full_small_mover = SmallMover(fullmap, 0.8, 1) full_shear_mover = ShearMover(fullmap, 0.8, 1) full_small_mover.angle_max(4) full_small_mover.angle_max("H", 4) full_small_mover.angle_max("E", 4) full_small_mover.angle_max("L", 4) full_shear_mover.angle_max(4) full_shear_mover.angle_max("H", 4) full_shear_mover.angle_max("E", 4) full_shear_mover.angle_max("L", 4) full_random = RandomMover() full_random.add_mover(full_small_mover, 4) full_random.add_mover(full_shear_mover, 4) ## Packing/Rotamer Movers ## Converting the Pose switch = SwitchResidueTypeSetMover('fa_standard') switch_cen = SwitchResidueTypeSetMover('centroid') ### The Task Operations if p.is_centroid() == True: switch.apply(p) else: sidechain_switch = SidechainManager(p) sidechain_switch.apply(starting_p) switch_cen.apply(starting_p) fulltask = standard_packer_task(p) fulltask.restrict_to_repacking() if args.Disorder_Probability_Prediction_File or args.Linker_File or args.Loops_File: task_residues = pyrosetta.rosetta.utility.vector1_bool() for resi in range(1, p.total_residue() + 1): if resi in sampled_residues: task_residues.append(1) else: task_residues.append(0) fulltask.restrict_to_residues(task_residues) sidechain_restore = SidechainRestorer(restore_chi_p, task_residues) if args.Loops_File: loop_backrub = BackrubLoop(p, loop_residues) full_random.add_mover(loop_backrub, 2) sidechain_switch.apply(p) switch_cen.apply(p) if args.Loops_File: loop_backrub_vdw = BackrubLoop(p, loop_residues, False) loop_backrub_vdw.score_fxn(sf_stage_0) random_stage_0.add_mover(loop_backrub_vdw, 1) vdwrepeat = RepeatMover(random_stage_0, 7) loop_backrub_centroid = BackrubLoop(p, loop_residues, False) random_stage_1.add_mover(loop_backrub_centroid, 2) ### The Rotamer Movers fullpack = PackRotamersMover(sf_stage_2, fulltask) fullrottrial = RotamerTrialsMover(sf_stage_2, fulltask) ## Sequence Movers sequence_stage_0 = SequenceMover() sequence_stage_0.add_mover(vdwrepeat) sequence_stage_0.add_mover(vdwmin) ## Setting up the Monte-Carlo mc_stage_0 = MonteCarlo(p, sf_stage_0, 10.0) mc_stage_1 = MonteCarlo(p, sf_stage_1, 1.0) switch.apply(p) sidechain_switch.apply(p) sidechain_restore.apply(p) mc_stage_2 = MonteCarlo(p, sf_stage_2, 1.0) sidechain_switch.apply(p) switch_cen.apply(p) ## Setting up Trial Movers trial_stage_0 = TrialMover(sequence_stage_0, mc_stage_0) trial_stage_1a = TrialMover(random_stage_1, mc_stage_1) trial_stage_1b = TrialMover(cenmin, mc_stage_1) trial_stage_2a = TrialMover(full_random, mc_stage_2) trial_stage_2b = TrialMover(fullmin, mc_stage_2) trial_stage_2c = TrialMover(fullrottrial, mc_stage_2) trial_stage_0.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) trial_stage_1a.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) trial_stage_1b.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) trial_stage_2a.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) trial_stage_2b.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) trial_stage_2c.keep_stats_type(pyrosetta.rosetta.protocols.moves.StatsType.no_stats) ## Setting up Repeat Movers stage_0 = RepeatMover(trial_stage_0, 100) stage_1a = RepeatMover(trial_stage_1a, 19) stage_1b = RepeatMover(trial_stage_1b, 1) stage_2a = RepeatMover(trial_stage_2a, 14) stage_2b = RepeatMover(trial_stage_2b, 1) stage_2c = RepeatMover(trial_stage_2c, 1) # Setting up FastRelax relax = rosetta.protocols.relax.FastRelax() relax.min_type('lbfgs_armijo_nonmonotone') relax.dualspace(True) relax.set_scorefxn(sf_relax) relax.max_iter(200) relax.set_movemap(fullmap) if args.PYMOL: pmm = PyMOLMover() # The Simulation and Output for i in range(ftnstruct): p = Pose() p.assign(starting_p) for resi in linker_residues: p.set_phi(resi, 180) p.set_psi(resi, 180) if args.PYMOL: pmm.apply(p) mc_stage_0.reset(p) stage_0.apply(p) if args.PYMOL: pmm.apply(p) mc_stage_1.reset(p) stage_1_last_j_score = 0 stage_1_last_j = 0 for j in range(cycles*250): stage_1a.apply(p) mc_stage_1.recover_low(p) if args.PYMOL: pmm.apply(p) stage_1b.apply(p) mc_stage_1.recover_low(p) if args.PYMOL: pmm.apply(p) if j % 50 == 0: sf_stage_1.show(p) print('Current Rg: ' + str(sfrg(p))) ## Move on to the next sampling step if sampling has converged if j == 0: stage_1_last_j_score = sf_stage_1.score(p) stage_1_last_j = j else: if stage_1_last_j_score != sf_stage_1.score(p): stage_1_last_j_score = sf_stage_1.score(p) stage_1_last_j = j else: if j-stage_1_last_j > 3: break mc_stage_1.recover_low(p) if args.PYMOL: pmm.apply(p) switch.apply(p) sidechain_switch.apply(p) sidechain_restore.apply(p) mc_stage_2.reset(p) stage_1_last_k_score = 0 stage_1_last_k = 0 for k in range(cycles*100): stage_2a.apply(p) mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) stage_2b.apply(p) mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) stage_2a.apply(p) mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) stage_2c.apply(p) mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) stage_2b.apply(p) mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) if k % 25 == 0: sf_stage_2.show(p) print('Current Rg: ' + str(sfrg(p))) ## Move on to the next sampling step if sampling has converged if k == 0: stage_1_last_k_score = sf_stage_2.score(p) stage_1_last_k = k else: if stage_1_last_k_score != sf_stage_2.score(p): stage_1_last_k_score = sf_stage_2.score(p) stage_1_last_k = k else: break mc_stage_2.recover_low(p) if args.PYMOL: pmm.apply(p) if args.DumpAll: if args.Cluster_ID: pdb_out = "FastFloppyLinker_out_" + str(args.Cluster_ID) + ".pdb" outf = open("FastFloppyLinker.sc", 'a') fcntl.flock (outf.fileno(), fcntl.LOCK_EX) outf.seek(0, 2) outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2))) outf.close() else: outf = open("FloppyTail.sc", 'a') pdb_out = "FloppyTail_out_%i.pdb" %i outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2))) p.dump_pdb(pdb_out) relax.apply(p) # THIS IS NEW AND NOT IN THE ORIGINAL FASTFLOPPYTAIL if args.Cluster_ID: pdb_out = "FastFloppyLinker_Relaxed_out_" + str(args.Cluster_ID) + ".pdb" outf = open("FastFloppyLinker_Relaxed.sc", 'a') fcntl.flock (outf.fileno(), fcntl.LOCK_EX) outf.seek(0, 2) outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2))) outf.close() else: outf = open("FloppyTail_Relaxed.sc", 'a') pdb_out = "FloppyTail_Relaxed_out_%i.pdb" %i outf.write("%s\t%.3f\t%.3f\n" % (pdb_out, sf_stage_2(p), round(inter_fluor_distance(p), 2))) p.dump_pdb(pdb_out) outf.close()