Source code for src.read.strand_motifs_trajectory

import numpy as np
from typing import Union
from .config import symbol_config as read_symbol_config
from ..obj.motif_trajectory import MotifTrajectory
from ..obj.motif_vector import MotifVector, are_compatible_motif_vectors
from ..obj.times_vector import TimesVector
from ..obj.units import Unit
from warnings import warn

[docs] def strand_motifs_trajectory(filepaths : list, alphabet : list, motiflength : int = 4, times_unit : Unit = None, skiprows : int =2 ) -> MotifTrajectory: """ reads from the complexes.txt of the RNAReactor simulation output and returns corresponding concentration vectors in motif space. PARAMETERS: ----------- filepaths : string or list of strings Output file of the RNAReactor simulation skiprow : int, optional Skip the first `skiprow` lines, including comments when reading the file; default : 2 RETURN: ------- strand_motifs_trajectory : MotifTrajectory """ if filepaths is str: filepaths = [filepaths,] if not isinstance(filepaths, list): raise ValueError("filepaths needs to be list.") motif_vectors = [] times = [] if times_unit is None: times_unit = read_symbol_config('time', unitformat=True) for filepath in filepaths: _, current_times, sequence_trajectory = steps_and_times_and_sequence_trajectory_from_complexes_txt( filepath, skiprows=skiprows) if len(current_times) == 0: continue motif_vectors += _transform_sequence_trajectory_into_motif_vector_list(sequence_trajectory, alphabet, motiflength) if (len(times)>1): if not are_compatible_motif_vectors(motif_vectors[0],motif_vectors[-1]): raise ValueError("Non compatible motif vectors.") if (times[-1]>current_times[0]): warn("Times of initialized motif trajectory will not be chronologically.") times += list(current_times) times = TimesVector(times,times_unit) return MotifTrajectory(motif_vectors,times)
[docs] def steps_and_times_and_complexes_from_complexes_txt(filepath : str, skiprows : int = 2, ) -> Union[np.array, np.array, list]: """ Parameters: ----------- filepath : str, skiprows : int = 2, skip the first <skiprows> lines of the complex.txt file Returns: -------- steps : nd-array with dtype int total_physical_time : nd-array with dtype np.float64 complexes : list with every element of list is the list of complexes at that time the list of complexes at given time is again a list of the format complexes[time_index][complex_index]=[number_of_complex : int, structure_of_complex : str] """ if len(list(open(filepath))) == 0: warn("complexes.txt file is empty") nl = np.empty((0,3), dtype=str) else: nl = np.loadtxt(filepath, skiprows=skiprows, dtype=str, ndmin = 2) steps = np.array(nl[:,0], dtype = int) total_physical_time = np.array(nl[:,1], dtype = np.float64) number_and_structure_of_complexes = nl[:,2] from json import loads number_and_structure_of_complexes = [ loads(number_and_structure_of_complexes[ii].replace('None','"none"')) for ii in range(len(number_and_structure_of_complexes)) ] return steps, total_physical_time, number_and_structure_of_complexes
[docs] def steps_and_times_and_sequence_trajectory_from_complexes_txt(filepath : str, skiprows : int = 2, ) -> Union[np.array, np.array, list]: """ Parameters: ----------- filepath : str, skiprows : int = 2, skip the first <skiprows> lines of the complex.txt file Returns: -------- steps : nd-array with dtype int total_physical_time : nd-array with dtype np.float64 complexes : list with every element of list is the list of complexes at that time the list of complexes at given time is again a list of the format complexes[time_index][complex_index]=[number_of_complex : int, structure_of_complex : str] """ steps, total_physical_time, number_and_structure_of_complexes = steps_and_times_and_complexes_from_complexes_txt(filepath,skiprows=skiprows) sequence_number_trajectory = [{} for ii in range(len(total_physical_time))] for time_idx in range(len(total_physical_time)): current_number_and_structure_of_complexes = number_and_structure_of_complexes[time_idx] for complex_index in range(len(current_number_and_structure_of_complexes)): number_of_complex = current_number_and_structure_of_complexes[complex_index][0] structure_of_complex = current_number_and_structure_of_complexes[complex_index][1] upper_strands_as_continuous_string, lower_strands_as_continuous_string = _extract_upper_and_lower_strands_as_continuous_strings_from_structure_of_complex(structure_of_complex) upper_separate_strand_sequences = _cut_strands_as_continuous_string_into_separate_strand_sequences(upper_strands_as_continuous_string) lower_separate_strand_sequences = _cut_strands_as_continuous_string_into_separate_strand_sequences(lower_strands_as_continuous_string) for upper_separate_strand_sequence in list(upper_separate_strand_sequences): if upper_separate_strand_sequence in sequence_number_trajectory[time_idx].keys(): sequence_number_trajectory[time_idx][upper_separate_strand_sequence] += number_of_complex else: sequence_number_trajectory[time_idx][upper_separate_strand_sequence] = number_of_complex for lower_separate_strand_sequence in lower_separate_strand_sequences: if lower_separate_strand_sequence in sequence_number_trajectory[time_idx].keys(): sequence_number_trajectory[time_idx][lower_separate_strand_sequence] += number_of_complex else: sequence_number_trajectory[time_idx][lower_separate_strand_sequence] = number_of_complex return steps, total_physical_time, sequence_number_trajectory
def _reverse_segments(list_of_strand_segments): """ reverses a strand """ return [letter[::-1] for letter in list_of_strand_segments[::-1]] def _extract_upper_and_lower_segments(structure_of_complex : list) -> Union[np.array,np.array]: list_of_upper_segments = np.array(structure_of_complex)[:,0] list_of_lower_segments = np.array(structure_of_complex)[:,1] return list_of_upper_segments, list_of_lower_segments def _transform_list_of_segments_to_string(list_of_segments : list, list_of_strings_that_are_replaced : list = [('|',0),('-',''),('X',0),('x',0),('none',''),('enon',''),('00','0')] ) -> str: """ transforms a list_of_segments into strands as continuous string """ for ii in range(len(list_of_segments)): list_of_segments[ii] = list_of_segments[ii].replace(' ','').lstrip('5').rstrip('3') strands_as_continuous_string = ''.join(list_of_segments) for string_replace_tuple in list_of_strings_that_are_replaced: strands_as_continuous_string = strands_as_continuous_string.replace(str(string_replace_tuple[0]),str(string_replace_tuple[1])) if len(strands_as_continuous_string)>0: if strands_as_continuous_string[0]!='0': strands_as_continuous_string = '0'+strands_as_continuous_string if strands_as_continuous_string[-1]!='0': strands_as_continuous_string = strands_as_continuous_string +'0' return strands_as_continuous_string def _extract_upper_and_lower_strands_as_continuous_strings_from_structure_of_complex(structure_of_complex : list) -> Union[str,str]: """ takes structure_of_complex from the RNA Strand Reactor output and returns the upper strands sequences and the lower strands sequences. Parameters: ---------- structure_of_complex : list or numpy.array Returns: -------- upper_strands_sequences : list of str lower_strands_sequences : list of str """ # segments are continuous parts of a complex without the end or beginning of a strand list_of_upper_segments, list_of_lower_segments = _extract_upper_and_lower_segments(structure_of_complex) list_of_reversed_lower_segments = _reverse_segments(list_of_lower_segments) upper_strands_as_continuous_string = _transform_list_of_segments_to_string(list_of_upper_segments) lower_strands_as_continuous_string = _transform_list_of_segments_to_string(list_of_reversed_lower_segments) return upper_strands_as_continuous_string, lower_strands_as_continuous_string def _cut_strands_as_continuous_string_into_separate_strand_sequences(strands_as_continuous_string : str) -> list: """ cut the upper strand of a complex into its single continuous strands """ strands_as_continuous_string_array = np.array(list(strands_as_continuous_string)) indices_of_empty_spots = np.where(strands_as_continuous_string_array=='0')[0] separate_strand_sequences = [[]]*(indices_of_empty_spots.size-1) for ii in range(indices_of_empty_spots.size-1): separate_strand_sequences[ii] = strands_as_continuous_string[indices_of_empty_spots[ii]+1:(indices_of_empty_spots[ii+1])] return separate_strand_sequences def _transform_sequence_trajectory_into_motif_vector_list( sequence_trajectory : list, alphabet : list, motiflength : int) -> list: """ Returns: -------- motif_vectors : list list of MotifVectors """ motif_vectors = [[]]*len(sequence_trajectory) for time_index in range(len(sequence_trajectory)): sequence_vector = sequence_trajectory[time_index] motif_vectors[time_index] = _transform_sequence_vector_into_motif_vector(sequence_vector, alphabet, motiflength) return motif_vectors def _translate_letters_to_numbers(alphabet:list, zero_is_a_letter : bool = False): """ dictionary to translate letters into numbers """ if zero_is_a_letter: dct = { '0' : 0 } else: dct = {} for ii in range(len(alphabet)): dct[alphabet[ii]] = ii+zero_is_a_letter return dct def _transform_motif_string_to_index_tuple(motif_string : str, alphabet : list) -> tuple: motif_as_tuple_of_letters = tuple(motif_string) """ transforms a motif e.g. ('0','A','T','C') to corresponding indices, e.g. (0,1,3,2) PARAMETERS: ----------- motif : array of letters RETURNS: -------- indices """ transdict = _translate_letters_to_numbers(alphabet) return tuple(transdict[letter] for letter in motif_as_tuple_of_letters) def _transform_sequence_vector_into_motif_vector(sequence_vector : dict, alphabet : list, motiflength : int) -> MotifVector: from ..obj.motif_vector import _create_empty_motif_vector_dct motif_vector_dct = _create_empty_motif_vector_dct(motiflength,alphabet=alphabet) from ..domains.motif_space import _motif_categories motif_categories = _motif_categories() for sequence in sequence_vector.keys(): strandlength = len(sequence) if (strandlength < (motiflength-1)): motif_category = motif_categories[0].format(strandlength) motif_index_tuple = _transform_motif_string_to_index_tuple(sequence,alphabet) motif_vector_dct[motif_category][motif_index_tuple] += sequence_vector[sequence] else: occupation_number = sequence_vector[sequence] # add beginning motif_category = motif_categories[-3] motif_string = sequence[:(motiflength-1)] motif_index_tuple = _transform_motif_string_to_index_tuple(motif_string, alphabet) motif_vector_dct[motif_category][motif_index_tuple] += occupation_number # add ending motif_category = motif_categories[-1] motif_string = sequence[-(motiflength-1):] motif_index_tuple = _transform_motif_string_to_index_tuple(motif_string, alphabet) motif_vector_dct[motif_category][motif_index_tuple] += occupation_number # add continuations motif_category = motif_categories[-2] for motif_index in range(len(sequence)-motiflength+1): motif_string = sequence[motif_index:motif_index+motiflength] motif_index_tuple = _transform_motif_string_to_index_tuple(motif_string, alphabet) motif_vector_dct[motif_category][motif_index_tuple] += occupation_number motif_vector = MotifVector(motiflength,alphabet,'1') return motif_vector(motif_vector_dct)