Coverage for model_workflow/analyses/markov

1# Markov

3# Set the data needed to represent a Markov State Model graph in the client

4# This is finding the most populated frames and the transition probabilities matrix between these frames

6# DANI: Este análisis, aunque ya funciona, no se usa

7# DANI: Los ejemplos que tenemos de MSM incluyen unas transition probabilities plagadas de 0s

8# DANI: Esto hace que al hacer un subset con solo las más populadas a penas capturemos ninguna transición

9# DANI: Se podrían prerocesar los datos para hacer clusters o algo similar

11import mdtraj as mdt

13from model_workflow.tools.get_screenshot import get_screenshot

14from model_workflow.utils.auxiliar import save_json

15from model_workflow.utils.type_hints import *

17def markov (

18 input_topology_filename : str,

19 input_trajectory_filename : str,

20 output_analysis_filename : str,

21 structure : 'Structure',

22 populations : List[float],

23 transitions : List[List[float]],

24 nodes_number : int = 20,

25):

27 print('-> Running Markov analysis')

29 # If there is no populations then we stop here

30 if populations is None or len(populations) == 0:

31 print(' There are no populations')

32 return

34 # If there is no transitions then we stop here

35 if transitions is None or len(transitions) == 0:

36 print(' There are no transitions')

37 return

39 # Get the numbers of frames with highest populations

40 population_per_frames = [ (population, frame) for frame, population in enumerate(populations) ]

41 highest_populations = []

42 highest_population_frames = []

43 for population, frame in sorted(population_per_frames, reverse=True)[0:nodes_number]:

44 highest_populations.append(population)

45 highest_population_frames.append(frame)

46 print(' Reading most equilibrium populated frames in trajectory')

47 # Read the trajectory frame by frame looking for the specified frames

48 trajectory = mdt.iterload(input_trajectory_filename, top=input_topology_filename, chunk=1)

49 # Set a generator for the frames to be selected once sorted

50 selected_frames = iter(sorted(highest_population_frames))

51 next_frame = next(selected_frames)

52 # Conserve only the desired frames

53 frame_coordinates = {}

54 for frame_number, frame in enumerate(trajectory):

55 # Update the current frame log

56 print(f' Frame {frame_number}', end='\r')

57 # Skip the current frame if we do not need it

58 if frame_number != next_frame:

59 continue

60 # Save it otherwise

61 frame_coordinates[frame_number] = frame

62 # Update the next frame

63 next_frame = next(selected_frames, None)

64 if next_frame == None:

65 break

66 print(' Building transition probability matrix')

67 # Get a subset of transitions only for the selected frames

68 transitions_matrix = []

69 for frame in highest_population_frames:

70 row = []

71 for other_frame in highest_population_frames:

72 transition = transitions[frame][other_frame]

73 row.append(transition)

74 transitions_matrix.append(row)

75 # Make a copy of the structure to avoid mutating the original structure

76 reference_structure = structure.copy()

77 print(' Taking screenshots of selected frames')

78 frame_count = len(frame_coordinates)

79 # For each frame coordinates, generate PDB file, take a scrrenshot and delete it

80 for i, frame in enumerate(frame_coordinates.values(), 1):

81 # Update the current frame log

82 print(f' Screenshot {i}/{frame_count}', end='\r')

83 # Get the actual coordinates

84 coordinates = frame.xyz[0] * 10 # We multiply by to restor Ångstroms

85 # Update the reference structure coordinates

86 reference_structure.set_new_coordinates(coordinates)

87 # Set the screenshot filename

88 screenshot_filename = f'markov_screenshot_{str(i).zfill(2)}.jpg'

89 # Generate the screenshot

90 get_screenshot(reference_structure, screenshot_filename)

91 # Export the analysis data to a json file

92 data = {

93 'frames': highest_population_frames,

94 'populations': highest_populations,

95 'transitions': transitions_matrix

96 }

97 save_json(data, output_analysis_filename)

Coverage for model_workflow/analyses/markov_2.py: 0%

50 statements