Source code for jscatter.bio.utilities

# -*- coding: utf-8 -*-
# written by Ralf Biehl at the Forschungszentrum Jülich ,
# Jülich Center for Neutron Science 1 and Institute of Complex Systems 1
#    Jscatter is a program to read, analyse and plot data
#    Copyright (C) 2015-2024  Ralf Biehl
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

"""
This module contains the scattering functions.

"""

import os
import re
import sys
import subprocess
import shutil
import tempfile
import numbers

import numpy as np
from scipy import constants as co
from scipy.spatial.transform import Rotation as Rot

import MDAnalysis as mda
from MDAnalysis.topology.guessers import guess_masses
import MDAnalysis.transformations as trans

from .. import formel
from .. import data
from .mda import scatteringUniverse


try:
    import pymol2
except ImportError:
    pymol2 = False


__all__ = ['runHydropro', 'readHydroproResult', 'createWaterBox']


def is_float(s):
    try:
        float(s)
        return True
    except ValueError:
        return False


def is_dataline(words):
    """
    Test if line words starts with float.
    wf : list of strings

    """
    try:
        return is_float(words[1]) and is_float(words[0])
    except IndexError:
        return False


[docs] def runHydropro(atomgroup, outfile=None, type=1, T=20, visc=0, AER=None, NSIG=-1, SIGMAX=4.0, SIGMIN=3.0, MW=None, soldensity=1.11, solvent='d2o', showoutput=0, hydropro=None): """ Diffusion tensor, Dtrans, Drot, S and more from PDB structure using HYDROPRO. HYDROPRO computes the hydrodynamic properties of rigid macromolecules from PDB structures. This wrapper writes the input file for HYDROPRO [1]_ hydropro.dat, runs HYDROPRO and reads the result file. The hydropro executable named 'hydropro10-lnx.exe' needs to be in the PATH. Download from [2]_. Use :py:func:`~.readHydroproResult` to read an existing result file. Parameters ---------- atomgroup : atomgroup, string A MDAnalysis atomgroup (e.g. uni.select_atoms('protein') or the filename of a PDB structure. If the filename contains a prepended path this is used as working directory wor the output. outfile : string, default None Output filename to use, by default a name is generated from residue names and Rg. type : 1,2,4 - 1 -- Atomic-level primary model, shell calculation, default and only for all atom model - 2 -- Residue-level primary model, shell calculation, default residue model (Ca only) - 4 -- Residue-level primary model, bead calculation, (Ca only) T : float default 20 Temperature in °C. visc : float , default 0 Viscosity in poise=10*Pa*s. H2O@20C is 0.01 Poise. 0 means calc from temperature T for solvent. AER : float, default depends on type The value of the hydrodynamic radius, in Å. Default values a described in [1]_ and should not be changed. - type 1 default to 2.9 A - type 2 default to 4.8 A - type 4 default to 6.1 A NSIG : int, default -1 NSIG (INTEGER) Number of values of the radius of the mini bead. -1 autodetermine SIGMIN SIGMAX SIGMAX : float, default 4.0 Lowest value of sigma, the mini bead radius SIGMIN : float, default 3.0 Highest value of sigma, the mini bead radius MW : float, default None Molecular weight; if None calculated from universe soldensity : float,default 1.1 Solvent density 1.1 is d2o solvent : 'd2o' or 'h2o' Solvent showoutput : 0 Show output of hydropro 0 ->minimal; None ->No output; other ->full output hydropro : string, default 'hydropro10-lnx.exe' or 'hydropro10-msd.exe' Filename of the hydropro executable in PATH. For Windows or Linux the default is tested. Change this if you use a different name. Returns ------- dict with results of 6x6 mobility matrix with 4 3x3 matrices - 'DTT' : translational 3x3 matrix, units nm^2/ps - 'DRR' : rotational 3x3 matrix, units 1/ps - 'DTR' ='DRT'^T : translational rotational coupling, units nm/ps - other keys with units as given in values Examples -------- :: import jscatter as js uni = js.bio.scatteringUniverse('3rn3',addHydrogen=False) H = js.bio.runHydropro(uni.atoms) uni.qlist = np.r_[0.01:2:0.1] D = js.bio.diffusionTRUnivTensor(uni,DTT=H['DTT'],DRR=H['DRR'],DRT=H['DRT']) References ---------- .. [1] Prediction of hydrodynamic and other solution properties of rigid proteins from atomic- and residue-level models A. Ortega, D. Amorós, J. Garc1a de la Torre, Biophys. J. 101, 892-898 (2011) .. [2] http://leonardo.inf.um.es/macromol/programs/hydropro/hydropro.htm """ # test which hydropro if hydropro is None: if sys.platform.startswith('linux'): hydropro = 'hydropro10-lnx.exe' elif sys.platform.startswith('win'): hydropro = 'hydropro10-msd.exe' else: raise NotImplementedError('hydropro executable only for windows and linux.') hydroproexe = shutil.which(hydropro) if not hydroproexe: raise FileNotFoundError('No hydropro executable found in PATH.') path = os.getcwd() # noinspection PyAugmentAssignment if hasattr(atomgroup, 'write'): if outfile is None: # generate a specific name aminos = [mda.lib.util.convert_aa_code(res) for res in atomgroup.select_atoms('protein').residues[:5].resnames] nucleic = [r[0] for r in atomgroup.select_atoms('nucleic').residues[:5].resnames] pdbfile = ''.join(aminos+nucleic)[:5] if not pdbfile: pdbfile = 'compl' pdbfile += f'{atomgroup.total_mass():.0f}' else: pdbfile=outfile Rg = atomgroup.radius_of_gyration() pdbfile = pdbfile+f'Rg{10*Rg:.0f}T{T:.0f}' pdbfile = pdbfile[:20] atomgroup.write(pdbfile+'.pdb') if np.all(atomgroup.atoms.names == 'CA'): # for Calpha models if MW is None: MW = atomgroup.n_atoms * 109 if type !=4: type = 2 AER = 4.8 # in A for Ca models else: AER = 6.1 # in A for Ca models else: if MW is None: MW = atomgroup.total_mass() type = 1 AER = 2.9 # in A for atomic models elif os.path.isfile(atomgroup+'.pdb') or os.path.isfile(atomgroup): path, file = os.path.split(atomgroup) pdbfile, _ = os.path.splitext(file) if AER is None: if type == 1: AER = 2.9 elif type == 2: AER = 4.8 elif type == 4: AER = 6.1 else: raise ValueError('Input is not atomgroup nor pdb file!') if len(pdbfile) > 38: raise NameError('PDB filename to long. Should be not more than 38 char.') # generate the input file zeilen = [pdbfile, pdbfile, pdbfile + '.pdb', f'{type:.0f}, !Type of calculation ', f'{AER:.1f}, !AER, radius of the atomic elements', f'{NSIG:.1f}, !NSIG ANzahl R zwischen min max'] if NSIG != -1: zeilen.append(f'{SIGMIN:.1f}, !Minimum radius of beads in the shell (SIGMIN)') zeilen.append(f'{SIGMAX:.1f}, !Maximum radius of beads in the shell (SIGMAX)') zeilen.append(f'{T:.1f}, !T (temperature, C)') if visc == 0: visc = formel.viscosity(mat=solvent, T=273.15+T) * 10 zeilen.append(f'{visc:.4f}, !ETA ') zeilen.append(f'{MW:.1f}, !RM (Molecular weigth)') zeilen.append('-1.0, !partial specific volume, cm3/g') zeilen.append(f'{soldensity:.2f}, !Solvent density, g/cm3') zeilen.append('0, !NQ Number of values of Q') zeilen.append('0, !Number of intervals for the distance distribution') zeilen.append('0, !Number of trials for MC calculation of covolume') zeilen.append('1 !IDIF=1 (yes) for full diffusion tensors') zeilen.append('* !End of file') hydroprodatpath = os.path.join(path, 'hydropro.dat') with open(hydroprodatpath, 'w') as f: f.writelines(' \n'.join(zeilen)) hydroproexe = shutil.which(hydropro) if not hydroproexe: raise FileNotFoundError('No hydropro executable found in PATH.') p = subprocess.run(hydroproexe, shell=True, capture_output=True, cwd=path) if p.stderr != '': for line in p.stderr.split(b'\n'): print('hydropro_std_err>', line) if showoutput: for line in p.stdout.split(b'\n'): print('hydropro>', line) elif showoutput is not None: linestoshow = [b'poise', b'Molecular weight', b'Translational'] for line in p.stdout.split(b'\n'): if any(word in line for word in linestoshow): print('hydropro>', line) # result = readHydroproResult(os.path.join(path, pdbfile)+'-res.txt') return result
[docs] def readHydroproResult(file='.res'): """ Reads the result file of HYDROPRO. Parameters ---------- file filename in dir or full path without dir Returns ------- dict with results of 6x6 mobility matrix with 4 3x3 matrizes - 'DTT' : translational 3x3 matrix, units nm^2/ps - 'DRR' : rotational 3x3 matrix, units 1/ps - 'DTR' ='DRT'^T : tranlational rotational coupling, units nm/ps - other keys with units as given in values """ with open(file) as f: zeilen = f.readlines() H = [] result = {} empty = re.compile('^\s*$') for zeile in zeilen: if empty.match(zeile): continue words = zeile.split() if is_dataline(words): H.append([float(w) for w in words]) elif ':' in zeile: try: words = zeile.split(':') key = words[0].strip().replace(' ','_') vals = words[1].split() if is_float(vals[0]): val = float(vals[0]) result[key] = (val,) + tuple(vals[1:]) except: pass aH = np.array(H) result['DTT'] = aH[:3, :3] * 1.e+2 result['DTR'] = aH[:3, 3:] * 1.e-5 result['DRT'] = aH[3:, :3] * 1.e-5 result['DRR'] = aH[3:, 3:] * 1.e-12 return result
def savePymolpng(atomgroup, fname, rotate=[]): """ Save png image of current configuration using Pymol. A simplified method to make png images. For more control use this function as a template and adopt it to your needs. Pymol needs to be installed (version >2.4) that `pymol2` can be imported. `pymol2` provides an additional interface to PyMol. Parameters ---------- atomgroup : atomgroup Atomgroup to show in plot. fname : string Filename rotate : list 3xfloat Angles to rotate around ['x','y','z'] axis. Missing values are zero. """ if isinstance(rotate,numbers.Number): rotate = [rotate] if pymol2: with pymol2.PyMOL() as p1: with tempfile.TemporaryDirectory() as tdir: name = os.path.splitext(atomgroup.universe.filename)[0] + '.pdb' tfile = os.path.join(tdir, name) atomgroup.write(tfile) p1.cmd.load(tfile) for ax, angle in zip(['x','y','z'], rotate): p1.cmd.rotate(ax, angle, 'all') # set colors p1.cmd.color('red', 'ss h') p1.cmd.color('yellow', 'ss s') p1.cmd.color('blue', 'ss l+') p1.cmd.set('cartoon_nucleic_acid_color', 'yellow') p1.cmd.set('cartoon_ladder_color', 'green') p1.cmd.set('cartoon_discrete_colors',1) p1.cmd.set('cartoon_ring_mode', 1) p1.cmd.color('green', '(resn DA+DC+DG+DT)') # make png p1.cmd.png(fname, width=600, height=600, dpi=-1, ray=1) else: print('Pymol needed but Pymol not installed or old version.') h2o = np.array([[ 0, 0, 0 ], # oxygen [ 0.95908, -0.02691, 0.03231], # hydrogen [-0.28004, -0.58767, 0.70556]]) # hydrogen def createWaterBox(size, density=None, temperature=293.15, skip=321): """ Create a MDAnalysis universe filled with water. Water positions are on a pseudorandom grid (ses :py:`~.formel.randomPointsInCube` ). Parameters ---------- size : float Edge length of the universe box in units A. density : float Density in the waterbox in g/ml for pure H2O. If None H2O density at the given temperature is used. D2O density is a bit diffferent than H2O. temperature : Temperature of universe in K. skip : integer Skip this number of points in Halton sequence to get random configurations. For same number we get always the same configuration. Returns ------- MDAnalysis universe quasi random water molecules in random orientation. Solvent ``resname='HOH"`` To create scattering universe :: import jscatter as js u = js.bio.createWaterBox(50) """ if density is None: density = formel.waterdensity('1H2O1', T=temperature) wmass = 2 * data.Elements['h'][1] + data.Elements['o'][1] n_residues = int((size/10)**3 / wmass * co.N_A/1e24*1000 * density) n_atoms = n_residues * 3 resindices = np.repeat(range(n_residues), 3) segindices = [0] * n_residues sol = mda.Universe.empty(n_atoms, n_residues=n_residues, atom_resindex=resindices, residue_segindex=segindices, trajectory=True) sol.add_TopologyAttr('name', ['O', 'H1', 'H2'] * n_residues) sol.add_TopologyAttr('type', ['O', 'H', 'H'] * n_residues) sol.add_TopologyAttr('resname', ['HOH'] * n_residues) sol.add_TopologyAttr('resid', list(range(1, n_residues + 1))) sol.add_TopologyAttr('segid', ['SOL']) sol.add_TopologyAttr('mass', guess_masses(sol.atoms.types)) op = formel.randomPointsInCube(n_residues, skip) * size coord_array = (op[:, None] + h2o).reshape(n_residues * 3, 3) sol.atoms.positions = coord_array sol.dimensions = [size, size, size, 90, 90, 90] bonds = [] for o in range(0, n_atoms, 3): bonds.extend([(o, o+1), (o, o+2)]) sol.add_TopologyAttr('bonds', bonds) for w, a, b in zip(sol.residues, np.random.rand(n_residues)*2*np.pi, np.random.rand(n_residues)*2*np.pi): com = w.atoms[0].position v = com - w.atoms[1].position R = Rot.from_rotvec(v/np.linalg.norm(v) * a) w.atoms[2].position = R.as_matrix() @ (w.atoms[2].position - com) + com v = com - w.atoms[2].position R = Rot.from_rotvec(v/np.linalg.norm(v) * b) w.atoms[1].position = R.as_matrix() @ (w.atoms[1].position - com) + com water = sol.select_atoms('not protein') transforms = [trans.wrap(water, compound='fragments')] sol.trajectory.add_transformations(*transforms) boxvolume = mda.lib.mdamath.box_volume(sol.dimensions) # in A³ # waterdensity = watermass / co.N_A / boxvolume * 1e24 # ~0.997 numdensity = sol.residues.n_residues / boxvolume * 1000 uni = scatteringUniverse(sol, guess_bonds=False) # set numdensity to exact values dependent on total mass uni.setSolvent(solvent='1H2O1', numDensity=numdensity) return uni