https://github.com/smolkelab/Ribozyme_switch_generator
Tip revision: 786f72a93791e6d2e7bd35b2bb1ee66ed21a1680 authored by Calvin M Schmidt on 25 May 2020, 20:45:40 UTC
Initial commit
Initial commit
Tip revision: 786f72a
Util_functions.py
import time
def find_hairpins(structure):
'''
Finds the start and end indices of all the hairpins in a given dotbracket structure.
:param structure: String denoting secondary structure in dotbracket notation.
:return: List of lists. Each list contains integers denoting the start and end indices of each hairpin.
'''
starts = []
ends = []
# Iterates through each bond in a structure.
for index, bond in enumerate(structure):
# Every time a forward facing bond is hit, looks to see if a backwards facing bond is the next one. If so, then
# at a hairpin. Records start and stop indices.
if bond == '(' and (structure.find(')', index + 1) < structure.find('(', index + 1) or structure.find('(', index + 1) < 0):
starts.append(index)
ends.append(structure.find(')', index + 1))
return [starts, ends]
def get_index_of_bonded(structure, starting_index):
'''
Given a dotbracket structure and an index of a nucleotide, finds the index of the nucleotide it is is bonding with.
The structure must be valid (equal numbers of ( and ), in correct order).
:param structure: String denoting the structure being evaluated, in dotbracket notation.
:param starting_index: Integer denoting nucleotide to start from. Must be less than the length of the structure.
:return: Integer denoting index of bonded nucleotide. Returns -1 for unbonded nucleotides.
'''
# Deals with unbonded nucleotides.
if structure[starting_index] == '.':
return -1
# Looks forward through the structure until no more forward looking bonds need a mate.
if structure[starting_index] == '(':
bonds_to_go = 0
for index in range(starting_index + 1, len(structure)):
if structure[index] == ')' and bonds_to_go == 0:
return index
elif structure[index] == '(':
bonds_to_go += 1
elif structure[index] == ')':
bonds_to_go -= 1
# Looks backward through the structure until no more backward looking bonds need a mate.
if structure[starting_index] == ')':
bonds_to_go = 0
for index in range(starting_index - 1, -1, -1):
if structure[index] == '(' and bonds_to_go == 0:
return index
elif structure[index] == ')':
bonds_to_go += 1
elif structure[index] == '(':
bonds_to_go -= 1
def get_ribozyme_stem_length(sequence, structure, ribozyme_parts):
'''
Given a formed ribozyme, gets the length of stem 1 and stem 2.
:param sequence: String denoting the sequence being evaluated.
:param structure: String denoting the structure being evaluated, in dotbracket notation.
:param ribozyme_parts: List of lists containing information on the different parts of the ribozyme. Each list has a
sequence and structure as a string of the part. Must have 3 parts: A left side that includes the stem of the
first loop, a top side that includes the stems of both loops and the catalytic core, and a right side that
includes the stem of the second loop.
:return: List of lists. Fist list is a list of integers denoting the base lengths of the ribozyme stems. The second
list contains integers denoting the tested structures' deviation from the base lengths.
'''
# Gets the base length of stem 1 and stem 2.
stem1_length = 0
# Goes to the start of the loop and works backward until there is no more bonding.
for five_stem1, three_stem1 in zip(reversed(ribozyme_parts[0][1]), ribozyme_parts[1][1]):
if (five_stem1 != '(' or three_stem1 != ')'):
break
stem1_length += 1
stem2_length = 0
for five_stem2, three_stem2 in zip(reversed(ribozyme_parts[1][1]), ribozyme_parts[2][1]):
if (five_stem2 != '(' or three_stem2 != ')'):
break
stem2_length += 1
stem_lengths = [stem1_length, stem2_length]
modifications = []
# Checks each stem for an extended or reduced stem.
for i in range(2):
#Starts with a change of zero.
modification = 0
loop_start = sequence.find(ribozyme_parts[i][0]) + len(ribozyme_parts[i][0]) - 1
loop_end = sequence.find(ribozyme_parts[i + 1][0])
# Checks to see if the stem is reduced.
if structure[loop_start] == '.':
# Runs down the stem, checking each nucleotide for a bond.
for j in range(1, stem_lengths[i]):
if structure[loop_start - j] == '(' and structure[loop_end + j] == ')':
# Makes sure that the bond is to the correct nucleotide.
if get_index_of_bonded(structure, loop_start - j) == loop_end + j:
modification = -j
break
else:
return [[0, 0], [0, 0]]
elif not (structure[loop_start - j] == '.' and structure[loop_end + j]) == '.':
return [[0, 0], [0, 0]]
# Checks to see if the stem is extended.
else:
added_length = 0
# Runs up the stem, checking each nucleotide for a bond.
while True:
if structure[loop_start + added_length + 1] == '(' and structure[loop_end - added_length - 1] == ')':
# Makes sure that the bond is to the correct nucleotide.
if get_index_of_bonded(structure, loop_start + added_length) == loop_end - added_length:
added_length += 1
else:
modification = added_length
break
else:
modification = added_length
break
modifications.append(modification)
# Checks that the whole first part of the ribozyme is correct, except for any reduced stem.
end_index = sequence.find(ribozyme_parts[0][0]) + len(ribozyme_parts[0][0]) - 1
for i in range(end_index + modifications[0],
sequence.find(ribozyme_parts[0][0]) - 1, -1):
if -(end_index - i + 1) < 0:
if structure[i] != ribozyme_parts[0][1][-(end_index - i + 1)]:
return [[0, 0], [0, 0]]
# Checks that the whole second part of the ribozyme is correct, except for any reduced stem.
end_index = sequence.find(ribozyme_parts[1][0]) + len(ribozyme_parts[1][0]) - 1
start_index = sequence.find(ribozyme_parts[1][0]) - 1
for i in range(end_index + modifications[1],
start_index - modifications[0], -1):
if -(end_index - i + 1) < 0 and end_index - i + 1 < len(ribozyme_parts[1][0]):
if structure[i] != ribozyme_parts[1][1][-(end_index - i + 1)]:
return [[0, 0], [0, 0]]
# Checks that the whole third part of the ribozyme is correct, except for any reduced stem.
end_index = sequence.find(ribozyme_parts[2][0]) + len(ribozyme_parts[2][0]) - 1
start_index = sequence.find(ribozyme_parts[2][0]) - 1
for i in range(end_index,
start_index - modifications[1], -1):
if -(end_index - i + 1) < 0 and end_index - i + 1 < len(ribozyme_parts[2][0]):
if structure[i] != ribozyme_parts[2][1][-(end_index - i + 1)]:
return [[0, 0], [0, 0]]
return [stem_lengths, modifications]
class ProgressBar:
'''
Class that can be used to keep track of how far along a process has gotten, and can give a text progress bar and
estimate the amount of time left of the process.
'''
def __init__(self, full_count):
'''
Initializes with a count of 0 and an idea of how many times the process will loop before completion. Also
records the time the process began.
:param full_count: Integer denoting how many times the process will loop before completion.
:return: None.
'''
self.full_count = full_count
self.count = 0
self.start_time = time.time()
def update(self):
'''
Updates the count to reflect more progress.
:return: None.
'''
self.count += 1
def get_bar(self):
'''
Calculates how far the process has gotten to completion and gives a text bar for display.
:return: String of a text progress bar.
'''
# Calculates how many progress bars to use.
progress = int(50 * (self.count / float(self.full_count)))
progress_bar = (progress * "=") + ((50 - progress) * "_")
return "[" + str(progress_bar) + "] " + str(self.count) + "/" + str(self.full_count)
def get_time_remaining(self):
'''
Calculates how much time the process has left by calculating the average time per increment and multiplying that
by the number of increments left.
:return: Float value denoting how many seconds remain.
'''
time_spent = time.time() - self.start_time
# Avoids a divide by zero error if called before updating.
if self.count > 0:
return time_spent / self.count * (self.full_count - self.count)
else:
return time_spent / 1 * (self.full_count - self.count)