# %%
from __future__ import annotations
import os
import platform
import warnings
from typing import TYPE_CHECKING
import numpy as np
try:
import matplotlib.pyplot as plt
except ImportError:
plt = None
try:
import pymol
from pymol import cmd
except ImportError:
pymol = None
cmd = None
try:
import nglview as ngl
except ImportError:
ngl = None
if TYPE_CHECKING:
try:
from nglview import NGLWidget
except ImportError:
NGLWidget = None
def _check_mpl_installation():
"""Check if matplotlib is installed."""
if plt is None:
msg = (
"install matplotlib using conda install -c conda-forge"
"matplotlib or pip install matplotlib"
)
raise Exception(msg)
return plt
def _append_new_result(
water_type: str,
waterO: list,
waterH1: list | None,
waterH2: list | None,
fname: str,
):
if isinstance(waterO, np.ndarray):
waterO = waterO.tolist()
with open(fname, "a") as f:
if waterH1 is not None and waterH2 is not None:
if isinstance(waterH1, np.ndarray):
waterH1 = waterH1.tolist()
if isinstance(waterH2, np.ndarray):
waterH2 = waterH2.tolist()
print(water_type, *waterO, *waterH1, *waterH2, file=f)
else:
print(water_type, *waterO, file=f)
[docs]
def read_results(
fname: str,
) -> tuple[list[str], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
"""Read results from files.
Read results from files and return them in order for further
processing.
Args:
fname (str): Name of the file containing results.
Returns:
tuple[ list[str], list[np.ndarray], list[np.ndarray], list[np.ndarray] ]:
returns list of strings which represents water types, and arrays
of locations of oxygen and two hydrogens. If only oxygens were
saved returned hydrogen coordinates are empty arrays
Examples::
water_types, coord_O, coord_H1, coord_H2 = read_results(
fname = "Clust_res.dat",
)
"""
with open(fname) as f:
# rewind to line 27
for _ in range(27):
next(f)
water_type = []
waterO = []
waterH1 = []
waterH2 = []
for line_read in f:
line = line_read.split()
water_type.append(line[0])
waterO.append(np.asarray([float(line[1]), float(line[2]), float(line[3])]))
if len(line) == 10:
waterH1.append(
np.asarray([float(line[4]), float(line[5]), float(line[6])])
)
waterH2.append(
np.asarray([float(line[7]), float(line[8]), float(line[9])])
)
else:
waterH1.append([])
waterH2.append([])
return (
water_type,
waterO,
waterH1,
waterH2,
)
[docs]
def get_orientations_from_positions(
coordsO: np.ndarray, coordsH: np.ndarray
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Returns orientations from coordinates.
Calculates relative orientations of hydrogen atoms from their
positions. The output of this function can be used as input for
water clustering.
Args:
coordsO (np.ndarray): Oxygen coordinates - shape
(N_waters, 3)
coordsH (np.ndarray): Hydrogen coordinates - two
hydrogens bound to the same oxygen have to be placed
one after another in the array. Shape: (2*N_waters, 3).
Returns:
tuple[np.ndarray, np.ndarray, np.ndarray]:
returns oxygen coordinates array and two hydrogen orientation
arrays.
"""
Odata: np.ndarray = np.asarray(coordsO)
if len(coordsH) > 1:
H1: np.ndarray = coordsH[::2, :]
H2: np.ndarray = coordsH[1:, :]
H2 = H2[::2, :]
v1: list[np.ndarray] = []
v2: list[np.ndarray] = []
for o, h1, h2 in zip(Odata, H1, H2):
a: np.ndarray = h1 - o
b: np.ndarray = h2 - o
if (np.linalg.norm(h1 - o) > 1.2) or (np.linalg.norm(h2 - o) > 1.2):
msg = "bad input HO bonds in water longer than 1.2A; value:"
raise ValueError(
msg,
np.linalg.norm(h1 - o),
np.linalg.norm(h2 - o),
)
v1.append(a)
v2.append(b)
H1orientdata: np.ndarray = np.asarray(v1)
H2orientdata: np.ndarray = np.asarray(v2)
return Odata, H1orientdata, H2orientdata
msg = "Hydrogen array of wrong length"
raise Exception(msg)
def _make_protein_surface_with_ligand():
protein = cmd.get_unused_name("only_protein_")
cmd.select(protein, "polymer", state=1)
povrsina = cmd.get_unused_name("protein_surface_")
cmd.create(povrsina, protein, 1, 1)
cmd.show("surface", povrsina)
cmd.color("gray70", povrsina)
cmd.set("transparency", 0.5, povrsina)
# ligand representation
ligand = cmd.get_unused_name("ligand_")
cmd.select(ligand, "organic", state=1)
cmd.show("licorice", ligand)
return povrsina, protein, ligand
def _add_polar_contacts(waters: str, aminokis_u_am: str | None = None):
if aminokis_u_am is not None:
sele = aminokis_u_am + " or " + waters + " or organic"
else:
sele = waters + " or organic"
cmd.distance(
"polar_contacts",
sele,
"sol",
mode=2,
)
cmd.hide("labels")
def _fix_pymol_camera(active_site_center: str | None = None, waters: str | None = None):
# reset camera
cmd.reset()
if active_site_center is not None:
cmd.center(active_site_center)
elif waters is not None:
cmd.center(waters)
def _determine_active_site_ids(active_site_ids: list[int]):
selection = ""
for i in active_site_ids:
selection += str(i) + "+"
selection = selection[: len(selection) - 1]
selection = "resi " + selection
aminokis_u_am: str = cmd.get_unused_name("active_site_aa")
cmd.select(
aminokis_u_am,
" " + selection + " and polymer ",
)
cmd.show("licorice", aminokis_u_am)
cmd.color("magenta", aminokis_u_am)
# pseudoatom in active site center
active_site_center = cmd.get_unused_name("activesite_center_")
cmd.pseudoatom(active_site_center, aminokis_u_am)
cmd.hide(representation="everything", selection=active_site_center)
return active_site_center, aminokis_u_am
def _add_density_map(density_map: str):
cmd.load(density_map)
cmd.volume("water_density", density_map.split(".", maxsplit=1)[0])
def _add_crystal_waters(
crystal_waters, protein, ligand_resname, dist, active_site_ids, active_site_center
):
cmd.fetch(crystal_waters)
cmd.hide("everything", crystal_waters)
cmd.align(
"polymer and " + crystal_waters,
"polymer and " + protein,
)
if ligand_resname is not None:
cmd.select(
"crystal_waters",
"("
+ crystal_waters
+ " and SOL) within "
+ str(dist)
+ " of resname "
+ ligand_resname,
)
elif active_site_ids is not None:
cmd.select(
"crystal_waters",
"("
+ crystal_waters
+ " and SOL) within 10 of resname "
+ active_site_center,
)
else:
cmd.select("crystal_waters", crystal_waters + " and SOL")
cmd.show("spheres", "crystal_waters")
cmd.set("sphere_scale", "0.4", "crystal_waters")
if os.path.exists(crystal_waters + ".cif"):
os.remove(crystal_waters + ".cif")
def _add_hydrogen_and_bond(wname, Hpos, Hname, resn, resi):
cmd.pseudoatom(
wname,
pos=[Hpos[0], Hpos[1], Hpos[2]],
name=Hname,
resn=resn,
resi=resi,
elem="H",
chain="W",
)
cmd.bond(f"{wname} and name O", f"{wname} and name {Hname}")
def _make_water_objects(water_type, waterO, waterH1, waterH2, output_file):
cntr = {
name: (
len(cmd.get_names("objects", False, f"model {name}*"))
if cmd.get_names("objects", False, f"model {name}*")
else 0
)
for name in ["FCW", "WCW", "HCW", "O_clust"]
}
ind = 0 # initialize index
while ind < len(water_type):
tip, Opos = (water_type[ind], waterO[ind])
if water_type[ind] != "O_clust":
H1pos, H2pos = (
waterH1[ind],
waterH2[ind],
)
cntr[tip] += 1
wname = tip + str(cntr[tip])
resis = cmd.identify("all", mode=0)
if len(resis) == 0:
highest_resi = 0
else:
highest_resi = np.max(resis)
if output_file is None or output_file.endswith(".pse"):
resn = "SOL"
elif output_file.endswith(".pdb"):
resn = f"{tip}"
else:
resn = "SOL"
cmd.create(wname, "none", source_state=0, target_state=0)
cmd.pseudoatom(
wname,
pos=[Opos[0], Opos[1], Opos[2]],
name="O",
resn=resn,
resi=highest_resi + 1,
elem="O",
chain="W",
state=1,
)
if tip == "O_clust":
cmd.show("spheres", wname)
cmd.set("sphere_scale", 0.1, wname)
ind += 1
else:
_add_hydrogen_and_bond(wname, H1pos, "H1", resn, highest_resi + 1)
_add_hydrogen_and_bond(wname, H2pos, "H2", resn, highest_resi + 1)
# check future water type
add_ind = 0
ghind = 0
while ind + add_ind + 1 < len(water_type) and tip != "FCW":
if (
tip == "HCW"
and water_type[ind + add_ind + 1] == "HCW"
and np.array_equal(Opos, waterO[ind + add_ind + 1])
):
_add_hydrogen_and_bond(
wname,
waterH2[ind + add_ind + 1],
f"H{add_ind + 3}",
resn,
highest_resi + 1,
)
add_ind += 1
elif (
tip == "WCW"
and water_type[ind + add_ind + 1] == "WCW"
and np.array_equal(Opos, waterO[ind + add_ind + 1])
):
hind = 0
# check for all previos Hs
H1eq = [
True
if np.array_equal(waterH1[ind], waterH1[ind + dind + 1])
else False
for dind in range(add_ind)
] + [
True
if np.array_equal(waterH2[ind], waterH1[ind + dind + 1])
else False
for dind in range(add_ind)
]
if H1eq.count(True) == 0:
_add_hydrogen_and_bond(
wname,
waterH1[ind + add_ind + 1],
f"H{add_ind + ghind + 3}",
resn,
highest_resi + 1,
)
hind += 1
H2eq = [
True
if np.array_equal(waterH1[ind], waterH2[ind + dind + 1])
else False
for dind in range(add_ind)
] + [
True
if np.array_equal(waterH2[ind], waterH2[ind + dind + 1])
else False
for dind in range(add_ind)
]
if H2eq.count(True) == 0:
_add_hydrogen_and_bond(
wname,
waterH2[ind + add_ind + 1],
f"H{add_ind + hind + ghind + 3}",
resn,
highest_resi + 1,
)
hind += 1
if hind > 0:
add_ind += 1
ghind += hind
else:
break
ind += 1 + add_ind
if output_file is None or output_file.endswith(".pse"):
cmd.show("lines", wname)
if tip == "FCW":
cmd.color("firebrick", wname)
elif tip == "HCW":
cmd.color("skyblue", wname)
if add_ind > 0:
cmd.color("red", "name H1 and " + wname)
elif tip == "WCW":
cmd.color("limegreen", wname)
cmd.show("sticks", wname)
[docs]
def visualise_pymol(
water_type: list[str],
waterO: list[list[float]],
waterH1: list[list[float]],
waterH2: list[list[float]],
aligned_protein: str | None = None,
output_file: str | None = None,
active_site_ids: list[int] | None = None,
crystal_waters: str | None = None,
ligand_resname: str | None = None,
dist: float = 10.0,
density_map: str | None = None,
polar_contacts: bool = False,
lunch_pymol: bool = True,
reinitialize: bool = True,
) -> None:
"""Visualises results via `pymol <https://pymol.org/>`__.
Visualises results using pymol in a pymol session or saves to a file.
On mac OS the interactive pymol session will fail to lunch. If `output_file`
is `None`, a visualisation state will be saved to
`pymol_water_visualization.pse` in this case on mac OS.
Args:
water_type (list): List containing water type results from
water clustering.
waterO (list): Coordinates of Oxygen atom in water molecules.
waterH1 (list): Coordinates of Hydrogen1 atom in water molecules.
waterH2 (list): Coordinates of Hydrogen2 atom in water molecules.
aligned_protein (str | None, optional): file name containing protein
configuration trajectory was aligned to. If `None` no
protein will be shown. Defaults to ``None``.
output_file (str | None, optional): File to save the
visualisation state. If ``None``, a pymol session is started
(this probably doesn't work on Mac OSX). Defaults to None.
active_site_ids (list[int] | None, optional): Residue ids -
numbers of aminoacids in active site. These are visualised
as licorice. Defaults to None.
crystal_waters (str | None, optional): PDBid from which crystal
waters will attempted to be extracted. Defaults to None.
ligand_resname (str | None, optional): Residue name of the
ligand around which crystal waters (oxygens) shall be
selected. Defaults to None.
dist (float): distance from the centre of ligand around which
crystal waters shall be selected. Defaults to 10.0.
density_map (str | None, optional): Water density map to add to
visualisation session (usually .dx file). Defaults to None.
polar_contacts (bool, optional): If `True` polar contacts
between waters and protein will be visualised. Defaults to
False.
lunch_pymol (bool, optional): If `True` pymol will be lunched
in interactive mode. If `False` pymol will be imported
without lunching. Defaults to True.
reinitialize (bool, optional): If `True` pymol will be
reinitialized (defaults restored and objects cleaned).
Defaults to True.
Example::
# Read the results from files
water_types, coord_O, coord_H1, coord_H2 = read_results(
fname = "Clust_res.dat",
)
visualise_pymol(
water_type = water_types,
waterO = coord_O,
waterH1 = coord_H1,
waterH2 = coord_H2,
aligned_protein = "aligned.pdb",
output_file = "results.pse",
active_site_ids = [1,5,77,98],
crystal_waters = "3t73",
ligand_resname = "UBX",
dist = 8.0,
)
"""
if platform.system() == "Darwin":
lunch_pymol = False
_initialize_pymol(reinitialize, lunch_pymol)
if platform.system() == "Darwin" and output_file is None:
warnings.warn(
(
"mac OS detected interactive pymol session cannot be lunched."
" Visualisation state will be saved to "
"pymol_water_visualization.pse"
),
RuntimeWarning,
stacklevel=2,
)
output_file = "pymol_water_visualization.pse"
cmd.hide("everything")
active_site_center = None
aminokis_u_am = None
if aligned_protein is not None:
cmd.load(aligned_protein)
cmd.hide("everything")
# polymer for surface def
tmpObj = cmd.get_unused_name("_tmp")
cmd.create(tmpObj, "( all ) and polymer", zoom=0)
# aminoacids in active site
if active_site_ids is not None:
aminokis_u_am, active_site_center = _determine_active_site_ids(
active_site_ids
)
# protein surface
_make_protein_surface_with_ligand()
_make_water_objects(water_type, waterO, waterH1, waterH2, output_file)
waters: str = cmd.get_unused_name("waters")
cmd.select(waters, "SOL in (FCW* or WCW* or HCW*)")
if polar_contacts:
_add_polar_contacts(waters, aminokis_u_am)
# Add crystal waters
if crystal_waters and aligned_protein is not None:
_add_crystal_waters(
crystal_waters,
aligned_protein,
ligand_resname,
dist,
active_site_ids,
active_site_center,
)
# add volume density visualisation
if density_map is not None:
_add_density_map(density_map)
_fix_pymol_camera(active_site_center, waters)
# save
if output_file is not None:
cmd.save(output_file)
[docs]
def visualise_pymol_from_pdb(
pdbfile: str,
active_site_ids: list[int] | None = None,
crystal_waters: str | None = None,
ligand_resname: str | None = None,
dist: float = 10.0,
density_map: str | None = None,
polar_contacts: bool = False,
lunch_pymol: bool = True,
reinitialize: bool = True,
) -> None:
"""Make a `pymol <https://pymol.org/>`__ session from a pdb file.
Visualises a pdb made by :py:meth:`make_results_pdb_MDA` file with water
clustering results in pymol.
Args:
pdbfile (str): Name of the pdb file to read, should end in .pdb
active_site_ids (list[int] | None, optional): Residue ids -
numbers of aminoacids in active site. These are visualised
as licorice. Defaults to None.
crystal_waters (str | None, optional): PDBid from which crystal
waters will attempted to be extracted. Defaults to None.
ligand_resname (str | None, optional): Residue name of the
ligand around which crystal waters (oxygens) shall be
selected. Defaults to None.
dist (float): distance from the centre of ligand around which
crystal waters shall be selected. Defaults to 10.0.
density_map (str | None, optional): Water density map to add to
visualisation session (usually .dx file). Defaults to None.
polar_contacts (bool, optional): If `True` polar contacts
between waters and protein will be visualised. Defaults to
False.
lunch_pymol (bool, optional): If `True` pymol will be lunched
in interactive mode. If `False` pymol will be imported
without lunching. Defaults to True.
reinitialize (bool, optional): If `True` pymol will be
reinitialized (defaults restored and objects cleaned).
Defaults to True.
Example::
visualise_pymol_from_pdb(
pdbfile = "results.pdb",
active_site_ids = [1,5,77,98],
crystal_waters = "3t73",
ligand_resname = "UBX",
dist = 8.0,
density_map = "waters.dx"
)
"""
if platform.system() == "Darwin":
lunch_pymol = False
_initialize_pymol(reinitialize, lunch_pymol)
cmd.load(pdbfile)
cmd.hide("everything")
# polymer for surface def
tmpObj = cmd.get_unused_name("_tmp")
cmd.create(tmpObj, "( all ) and polymer", zoom=0)
# aminoacids in active site
active_site_center = None
aminokis_u_am = None
if active_site_ids is not None:
aminokis_u_am, active_site_center = _determine_active_site_ids(active_site_ids)
else:
active_site_center = None
aminokis_u_am = None
# protein surface
_make_protein_surface_with_ligand()
# add water representations
# conserved waters
conserved = cmd.get_unused_name("FCW_")
cmd.select(conserved, "resname FCW")
cmd.color("firebrick", conserved)
# half conserved waters
half_conserved = cmd.get_unused_name("HCW_")
cmd.select(half_conserved, "resname HCW")
cmd.color("skyblue", half_conserved)
# semi conserved waters
semi_conserved = cmd.get_unused_name("WCW_")
cmd.select(semi_conserved, "resname WCW")
cmd.color("limegreen", semi_conserved)
# all waters
waters = cmd.get_unused_name("waters_")
cmd.select(
waters,
semi_conserved + " or " + half_conserved + " or " + conserved,
)
cmd.show("sticks", waters)
# Add crystal waters
if crystal_waters is not None:
_add_crystal_waters(
crystal_waters,
pdbfile.split(".", maxsplit=1)[0],
ligand_resname,
dist,
active_site_ids,
active_site_center,
)
if polar_contacts:
_add_polar_contacts(waters, aminokis_u_am)
# add volume density visualisation
if density_map is not None:
_add_density_map(density_map)
_fix_pymol_camera(active_site_center, waters)
def _initialize_pymol(reinitialize: bool, finish: bool):
"""Initializes pymol.
Initializes pymol for visualisation. If `finish` is `True` pymol
will be lunched in interactive mode. If `False` pymol will be
imported without lunching.
Args:
reinitialize (bool): If `True` pymol will be
reinitialized (defaults restored and objects cleaned).
Defaults to False.
finish (bool): If `True` pymol will be lunched
in interactive mode. If `False` pymol will be
imported without lunching. Defaults to True.
"""
if pymol is None or cmd is None:
msg = "pymol not installed. Either install pymol or use nglview"
raise Exception(msg)
if finish:
pymol.finish_launching(["pymol", "-q"])
if reinitialize:
cmd.reinitialize()
[docs]
def visualise_nglview(
water_type: list[str],
waterO: list[list[float]],
waterH1: list[list[float]],
waterH2: list[list[float]],
aligned_protein: str | None = None,
active_site_ids: list[int] | None = None,
crystal_waters: str | None = None,
density_map_file: str | None = None,
) -> NGLWidget:
"""Creates a nglview visualisation widget for results.
Starts a `nglview <https://github.com/nglviewer/nglview>`__ visualisation
instance from clustering results.
Args:
water_type (list): List containing water type results from
water clustering.
waterO (list): Coordiantes of Oxygen atom in water molecules.
waterH1 (list): Coordinates of Hydrogen1 atom in water molecules.
waterH2 (list): Coordinates of Hydrogen2 atom in water molecules.
aligned_protein (str, optional): file name containing protein
configuration trajectory was aligned to. Defaults to ``None``.
active_site_ids (list[int] | None, optional): Residue ids -
numbers of aminoacids in active site. These are visualised
as licorice. Defaults to None.
crystal_waters (str | None, optional): PDBid from which crystal
waters will attempted to be extracted. Defaults to None.
density_map_file (str | None, optional): Water density map to add to
visualisation session (usually .dx file). Defaults to None.
Returns:
NGLWidget: Returns nglview Widget for visualisation of the
results.
Example::
# read results and visualise them using nglview
water_types, coord_O, coord_H1, coord_H2 = read_results(
fname = "Clust_res.dat",
)
view = visualise_nglview(
water_type = water_types,
waterO = coord_O,
waterH1 = coord_H1,
waterH2 = coord_H2,
aligned_protein = "aligned.pdb",
active_site_ids = [1,5,77,98],
crystal_waters = "3t73",
)
# initialise widget
view
"""
if ngl is None:
msg = "nglview not installed. Either install pymol or nglview"
raise Exception(msg)
if aligned_protein is not None:
view: NGLWidget = ngl.show_file(aligned_protein, default_representation=False)
view.clear_representations()
view.add_representation("surface", selection="protein", opacity=0.5)
view.add_representation("ball+stick", selection="water", color="red")
selection = ""
if active_site_ids is not None:
for i in active_site_ids:
selection = selection[: len(selection) - 3]
selection += str(i) + " or "
view.add_representation(
"ball+stick",
selection=selection,
color="pink",
)
else:
view = ngl.NGLWidget()
col = {"FCW": "red", "WCW": "blue", "HCW": "green"}
for tip, Opos, H1pos, H2pos in zip(water_type, waterO, waterH1, waterH2):
view.add_pdbid("hoh")
view[-1].add_representation("licorice", color=col[tip])
view[-1].set_coordinates(np.asarray([H1pos, Opos, H2pos]))
if crystal_waters is not None:
view.add_pdbid(crystal_waters)
view[-1].add_representation("spacefill", selection="water", color="red")
if density_map_file is not None:
view.add_component(density_map_file)
return view