Coverage for model_workflow/analyses/markov_2.py: 0%

50 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-23 10:54 +0000

1# Markov 

2 

3# Set the data needed to represent a Markov State Model graph in the client 

4# This is finding the most populated frames and the transition probabilities matrix between these frames 

5 

6# DANI: Este análisis, aunque ya funciona, no se usa 

7# DANI: Los ejemplos que tenemos de MSM incluyen unas transition probabilities plagadas de 0s 

8# DANI: Esto hace que al hacer un subset con solo las más populadas a penas capturemos ninguna transición 

9# DANI: Se podrían prerocesar los datos para hacer clusters o algo similar 

10 

11import mdtraj as mdt 

12 

13from model_workflow.tools.get_screenshot import get_screenshot 

14from model_workflow.utils.auxiliar import save_json 

15from model_workflow.utils.type_hints import * 

16 

17def markov ( 

18 input_topology_filename : str, 

19 input_trajectory_filename : str, 

20 output_analysis_filename : str, 

21 structure : 'Structure', 

22 populations : List[float], 

23 transitions : List[List[float]], 

24 nodes_number : int = 20, 

25): 

26 

27 print('-> Running Markov analysis') 

28 

29 # If there is no populations then we stop here 

30 if populations is None or len(populations) == 0: 

31 print(' There are no populations') 

32 return 

33 

34 # If there is no transitions then we stop here 

35 if transitions is None or len(transitions) == 0: 

36 print(' There are no transitions') 

37 return 

38 

39 # Get the numbers of frames with highest populations 

40 population_per_frames = [ (population, frame) for frame, population in enumerate(populations) ] 

41 highest_populations = [] 

42 highest_population_frames = [] 

43 for population, frame in sorted(population_per_frames, reverse=True)[0:nodes_number]: 

44 highest_populations.append(population) 

45 highest_population_frames.append(frame) 

46 print(' Reading most equilibrium populated frames in trajectory') 

47 # Read the trajectory frame by frame looking for the specified frames 

48 trajectory = mdt.iterload(input_trajectory_filename, top=input_topology_filename, chunk=1) 

49 # Set a generator for the frames to be selected once sorted 

50 selected_frames = iter(sorted(highest_population_frames)) 

51 next_frame = next(selected_frames) 

52 # Conserve only the desired frames 

53 frame_coordinates = {} 

54 for frame_number, frame in enumerate(trajectory): 

55 # Update the current frame log 

56 print(f' Frame {frame_number}', end='\r') 

57 # Skip the current frame if we do not need it 

58 if frame_number != next_frame: 

59 continue 

60 # Save it otherwise 

61 frame_coordinates[frame_number] = frame 

62 # Update the next frame 

63 next_frame = next(selected_frames, None) 

64 if next_frame == None: 

65 break 

66 print(' Building transition probability matrix') 

67 # Get a subset of transitions only for the selected frames 

68 transitions_matrix = [] 

69 for frame in highest_population_frames: 

70 row = [] 

71 for other_frame in highest_population_frames: 

72 transition = transitions[frame][other_frame] 

73 row.append(transition) 

74 transitions_matrix.append(row) 

75 # Make a copy of the structure to avoid mutating the original structure 

76 reference_structure = structure.copy() 

77 print(' Taking screenshots of selected frames') 

78 frame_count = len(frame_coordinates) 

79 # For each frame coordinates, generate PDB file, take a scrrenshot and delete it 

80 for i, frame in enumerate(frame_coordinates.values(), 1): 

81 # Update the current frame log 

82 print(f' Screenshot {i}/{frame_count}', end='\r') 

83 # Get the actual coordinates 

84 coordinates = frame.xyz[0] * 10 # We multiply by to restor Ångstroms 

85 # Update the reference structure coordinates 

86 reference_structure.set_new_coordinates(coordinates) 

87 # Set the screenshot filename 

88 screenshot_filename = f'markov_screenshot_{str(i).zfill(2)}.jpg' 

89 # Generate the screenshot 

90 get_screenshot(reference_structure, screenshot_filename) 

91 # Export the analysis data to a json file 

92 data = { 

93 'frames': highest_population_frames, 

94 'populations': highest_populations, 

95 'transitions': transitions_matrix 

96 } 

97 save_json(data, output_analysis_filename)