https://github.com/smolkelab/Ribozyme_switch_generator
Raw File
Tip revision: 786f72a93791e6d2e7bd35b2bb1ee66ed21a1680 authored by Calvin M Schmidt on 25 May 2020, 20:45:40 UTC
Initial commit
Tip revision: 786f72a
Util_functions.py
import time

def find_hairpins(structure):
    '''
    Finds the start and end indices of all the hairpins in a given dotbracket structure.
    :param structure: String denoting secondary structure in dotbracket notation.
    :return: List of lists. Each list contains integers denoting the start and end indices of each hairpin.
    '''

    starts = []
    ends = []
    # Iterates through each bond in a structure.
    for index, bond in enumerate(structure):

        # Every time a forward facing bond is hit, looks to see if a backwards facing bond is the next one. If so, then
        # at a hairpin. Records start and stop indices.
        if bond == '(' and (structure.find(')', index + 1) < structure.find('(', index + 1) or structure.find('(', index + 1) < 0):
            starts.append(index)
            ends.append(structure.find(')', index + 1))

    return [starts, ends]

def get_index_of_bonded(structure, starting_index):
    '''
    Given a dotbracket structure and an index of a nucleotide, finds the index of the nucleotide it is is bonding with.
    The structure must be valid (equal numbers of ( and ), in correct order).
    :param structure: String denoting the structure being evaluated, in dotbracket notation.
    :param starting_index: Integer denoting nucleotide to start from. Must be less than the length of the structure.
    :return: Integer denoting index of bonded nucleotide. Returns -1 for unbonded nucleotides.
    '''

    # Deals with unbonded nucleotides.
    if structure[starting_index] == '.':
        return -1

    # Looks forward through the structure until no more forward looking bonds need a mate.
    if structure[starting_index] == '(':
        bonds_to_go = 0
        for index in range(starting_index + 1, len(structure)):
            if structure[index] == ')' and bonds_to_go == 0:
                return index
            elif structure[index] == '(':
                bonds_to_go += 1
            elif structure[index] == ')':
                bonds_to_go -= 1

    # Looks backward through the structure until no more backward looking bonds need a mate.
    if structure[starting_index] == ')':
        bonds_to_go = 0
        for index in range(starting_index - 1, -1, -1):
            if structure[index] == '(' and bonds_to_go == 0:
                return index
            elif structure[index] == ')':
                bonds_to_go += 1
            elif structure[index] == '(':
                bonds_to_go -= 1

def get_ribozyme_stem_length(sequence, structure, ribozyme_parts):
    '''
    Given a formed ribozyme, gets the length of stem 1 and stem 2.
    :param sequence: String denoting the sequence being evaluated.
    :param structure: String denoting the structure being evaluated, in dotbracket notation.
    :param ribozyme_parts: List of lists containing information on the different parts of the ribozyme. Each list has a
        sequence and structure as a string of the part. Must have 3 parts: A left side that includes the stem of the
        first loop, a top side that includes the stems of both loops and the catalytic core, and a right side that
        includes the stem of the second loop.
    :return: List of lists. Fist list is a list of integers denoting the base lengths of the ribozyme stems. The second
        list contains integers denoting the tested structures' deviation from the base lengths.
    '''

    # Gets the base length of stem 1 and stem 2.
    stem1_length = 0
    # Goes to the start of the loop and works backward until there is no more bonding.
    for five_stem1, three_stem1 in zip(reversed(ribozyme_parts[0][1]), ribozyme_parts[1][1]):
        if (five_stem1 != '(' or three_stem1 != ')'):
            break
        stem1_length += 1

    stem2_length = 0
    for five_stem2, three_stem2 in zip(reversed(ribozyme_parts[1][1]), ribozyme_parts[2][1]):
        if (five_stem2 != '(' or three_stem2 != ')'):
            break
        stem2_length += 1

    stem_lengths = [stem1_length, stem2_length]
    modifications = []

    # Checks each stem for an extended or reduced stem.
    for i in range(2):

        #Starts with a change of zero.
        modification = 0
        loop_start = sequence.find(ribozyme_parts[i][0]) + len(ribozyme_parts[i][0]) - 1
        loop_end = sequence.find(ribozyme_parts[i + 1][0])

        # Checks to see if the stem is reduced.
        if structure[loop_start] == '.':

            # Runs down the stem, checking each nucleotide for a bond.
            for j in range(1, stem_lengths[i]):
                if structure[loop_start - j] == '(' and structure[loop_end + j] == ')':

                    # Makes sure that the bond is to the correct nucleotide.
                    if get_index_of_bonded(structure, loop_start - j) == loop_end + j:
                        modification = -j
                        break
                    else:
                        return [[0, 0], [0, 0]]

                elif not (structure[loop_start - j] == '.' and structure[loop_end + j]) == '.':
                    return [[0, 0], [0, 0]]

        # Checks to see if the stem is extended.
        else:
            added_length = 0

            # Runs up the stem, checking each nucleotide for a bond.
            while True:
                if structure[loop_start + added_length + 1] == '(' and structure[loop_end - added_length - 1] == ')':

                    # Makes sure that the bond is to the correct nucleotide.
                    if get_index_of_bonded(structure, loop_start + added_length) == loop_end - added_length:
                        added_length += 1
                    else:
                        modification = added_length
                        break
                else:
                    modification = added_length
                    break


        modifications.append(modification)

    # Checks that the whole first part of the ribozyme is correct, except for any reduced stem.
    end_index = sequence.find(ribozyme_parts[0][0]) + len(ribozyme_parts[0][0]) - 1
    for i in range(end_index + modifications[0],
                   sequence.find(ribozyme_parts[0][0]) - 1, -1):

        if -(end_index - i + 1) < 0:
            if structure[i] != ribozyme_parts[0][1][-(end_index - i + 1)]:
                return [[0, 0], [0, 0]]

    # Checks that the whole second part of the ribozyme is correct, except for any reduced stem.
    end_index = sequence.find(ribozyme_parts[1][0]) + len(ribozyme_parts[1][0]) - 1
    start_index = sequence.find(ribozyme_parts[1][0]) - 1
    for i in range(end_index + modifications[1],
                   start_index - modifications[0], -1):

        if -(end_index - i + 1) < 0 and end_index - i + 1 < len(ribozyme_parts[1][0]):
            if structure[i] != ribozyme_parts[1][1][-(end_index - i + 1)]:
                return [[0, 0], [0, 0]]

    # Checks that the whole third part of the ribozyme is correct, except for any reduced stem.
    end_index = sequence.find(ribozyme_parts[2][0]) + len(ribozyme_parts[2][0]) - 1
    start_index = sequence.find(ribozyme_parts[2][0]) - 1
    for i in range(end_index,
                   start_index - modifications[1], -1):

        if -(end_index - i + 1) < 0 and end_index - i + 1 < len(ribozyme_parts[2][0]):
            if structure[i] != ribozyme_parts[2][1][-(end_index - i + 1)]:
                return [[0, 0], [0, 0]]

    return [stem_lengths, modifications]

class ProgressBar:
    '''
    Class that can be used to keep track of how far along a process has gotten, and can give a text progress bar and
    estimate the amount of time left of the process.
    '''

    def __init__(self, full_count):
        '''
        Initializes with a count of 0 and an idea of how many times the process will loop before completion. Also
        records the time the process began.
        :param full_count: Integer denoting how many times the process will loop before completion.
        :return: None.
        '''

        self.full_count = full_count
        self.count = 0
        self.start_time = time.time()

    def update(self):
        '''
        Updates the count to reflect more progress.
        :return: None.
        '''

        self.count += 1

    def get_bar(self):
        '''
        Calculates how far the process has gotten to completion and gives a text bar for display.
        :return: String of a text progress bar.
        '''

        # Calculates how many progress bars to use.
        progress = int(50 * (self.count / float(self.full_count)))
        progress_bar = (progress * "=") + ((50 - progress) * "_")
        return "[" + str(progress_bar) + "] " + str(self.count) + "/" + str(self.full_count)

    def get_time_remaining(self):
        '''
        Calculates how much time the process has left by calculating the average time per increment and multiplying that
        by the number of increments left.
        :return: Float value denoting how many seconds remain.
        '''

        time_spent = time.time() - self.start_time

        # Avoids a divide by zero error if called before updating.
        if self.count > 0:
            return time_spent / self.count * (self.full_count - self.count)
        else:
            return time_spent / 1 * (self.full_count - self.count)
back to top