Source code for jscatter.bio.mda

# -*- coding: utf-8 -*-
# written by Ralf Biehl at the Forschungszentrum Jülich ,
# Jülich Center for Neutron Science 1 and Institute of Complex Systems 1
#    Jscatter is a program to read, analyse and plot data
#    Copyright (C) 2015-2024  Ralf Biehl
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

"""
mda module contains functions to prepare/use MDAnalysis universes for scattering.

scattering length is in nm units

"""

import os
import sys
import re
import numbers
from collections import defaultdict
import gzip
import io
import urllib
import shutil
import tempfile
import subprocess
import warnings
import multiprocessing as mp
from multiprocessing.pool import ThreadPool
import numpy as np
import numpy.linalg as la
import scipy
from scipy.interpolate import interp1d

import MDAnalysis
from MDAnalysis.core.topologyattrs import AtomAttr, ResidueAttr, AtomStringAttr, Atomtypes, Atomnames
from MDAnalysis.core.groups import Atom, AtomGroup, Residue, ResidueGroup
from MDAnalysis.topology.guessers import guess_types

from pdb2pqr.main import build_main_parser, main_driver, \
                        drop_water as pdb2pqr_drop_water, \
                        setup_molecule as pdb2pqr_setup_molecule,\
                        non_trivial as pdb2pqr_non_trivial, \
                        print_pqr as pdb2pqr_print_pqr, \
                        print_pdb as pdb2pqr_print_pdb
import pdb2pqr.io as pdb2pqr_io
import pdb2pqr.pdb as pdb2pqr_pdb

try:
    import pymol2
except (ModuleNotFoundError, ImportError):
    pymol2 = False

from .. import data
from .. import formel
# noinspection PyUnresolvedReferences
try:
    from ..libs import fscatter
except ImportError:
    ImportWarning('bio module needs a Fortran compiler to work. '
                  'Reinstall Jscatter with Fortran Compiler')
from ..libs import SASAsurface as sasa

Nscatlength = data.Nscatlength.copy()
neutronFFgroup = data.neutronFFgroup.copy()
xrayFFatomic = data.xrayFFatomic.copy()
xrayFFatomicdummy = data.xrayFFatomicdummy.copy()
xrayFFgroupdummy = data.xrayFFgroupdummy.copy()
xrayFFgroup = data.xrayFFgroup.copy()
vdwradii_ = {k.upper(): v for k, v in data.vdwradii.copy().items()}

pi = np.pi
identity3x3 = np.identity(3)
zero3x3 = np.zeros((3, 3))
QLIST = data.QLIST

# add deuterium to MDAnalysis tables
MDAnalysis.topology.tables.masses['D'] = data.Elements['d'][1]
MDAnalysis.topology.tables.vdwradii['D'] = MDAnalysis.topology.tables.vdwradii['H']

__all__ = ['QLIST', 'copyUnivProp', 'getSurfaceVolumePoints', 'getNativeContacts', 'getDistanceMatrix', 'scatteringUniverse',
    'pdb2pqr', 'fastpdb2pqr', 'addH_Pymol', 'fetch_pdb', 'mergePDBModel', 'xrayFFgroup', 'neutronFFgroup',
           'occupiedVolume']


ucopylist = ['d2oFract', 'temperature', 'solvent', 'solventDensity', 'error',
             'amideHexFract', 'histidinExchange', 'bcDensitySol', 'b2_incSol', 'probe_radius', 'shellThickness',
             'iscalphamodel', 'explicitResidueFormFactorAmpl', 'xsldSol', 'edensitySol', 'numDensitySol',
             'SESVolume', 'SASVolume', 'SASArea',
             'hDelocalisation', 'xDelocalisation', 'solventDelocalisation']


[docs] def copyUnivProp(universe, objekt, addlist=[]): """ Copies important universe properties from universe to object if they exist. The default list is in js.bio.ucopylist Parameters ---------- universe : MDAnalysis universe objekt : objekt Objekt to copy to addlist : additional attribute list """ for attr in ucopylist + addlist: # noinspection PyBroadException try: if hasattr(universe, attr): if hasattr(getattr(universe, attr), '__call__'): aa = getattr(universe, attr)() # as test to call setattr(objekt, attr, getattr(universe, attr)()) else: setattr(objekt, attr, getattr(universe, attr)) except: pass
class XRayFormFactor(AtomAttr): """ coherent Xray formfactors amplitude in unit nm """ attrname = 'faxs' singular = 'fax' dtype = np.float32 # object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] QLIST=QLIST transplants = defaultdict(list) @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros((n_atoms, QLIST.shape[0])) def get_atoms(self, atomgroup): # Get values from single atom by interpolation to qlist fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear') return fxq(atomgroup.universe.qlist) def set_atoms(self, atomgroup, values): # set values for atom # we set the ff for QLIST (first is zero) and interpolate for qlist in get if values == 'types': # use atom types for typ in np.unique(atomgroup.types): try: self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomic[typ.capitalize()].array[1] except KeyError: # this happens e.g. for metals like MG to be recognised as type M -> try the names instead ag = atomgroup.select_atoms('type ' + typ) for a in ag: self.values[a.ix] = xrayFFatomic[a.name.capitalize()].array[1] elif isinstance(values, str): # set all to same type self.values[atomgroup.ix] = xrayFFatomic[values.capitalize()].array[1] elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape: # explicit given formfactor self.values[atomgroup.ix] = values else: raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.') def get_residues(self, res_group): # coherent residue formfactors need to be calculated or taken from saved dict qlist = res_group.universe.qlist if isinstance(res_group.ix, numbers.Integral): try: if res_group.universe.explicitResidueFormFactorAmpl: # provoke explicit calculation raise KeyError() # get from list, if KeyError it is calculated return xrayFFgroup[res_group.resname.upper()].interp(qlist) except KeyError: # for a single residue atoms = res_group.atoms utypes = list(np.unique(atoms.types)) ufax = np.c_[[xrayFFatomic[a.capitalize()][1] for a in utypes]] ff = np.c_[self.QLIST, ufax.T].T iff = [utypes.index(a)+1 for a in atoms.types] fa = [ff[i, 0] for i in iff] ff[1:, :] = ff[1:, :]/ff[1:, :1] # normalize # Debye with interpolation cog = atoms.posnm.mean(axis=0) res = fscatter.cloud.scattering_debye(qlist, atoms.posnm - cog, # positions fa, # atom fax iff, # formfactor row sequence ff, # normalized formfactors 0) # parallel # fa are always positive => sign(sum(fa)) is always positive val = res[1]**0.5 return val else: # for a residue group list vals = np.empty((res_group.ix.shape[0], len(qlist))) for i, rg in enumerate(res_group): vals[i] = rg.fax return vals def typesScatteringAmplitudeQ0_ag(ag): """xray scattering amplitude atom group """ # need to avoid interp in atom.fax for qlist so reconstruct from xrayFFatomicdummy values = np.zeros_like(ag.ix, dtype=np.float32) for typ in np.unique(ag.types): values[ag.types == typ] = xrayFFatomic[typ.capitalize()].array[1, 0] return values def typesScatteringAmplitudeQ0_r(r): """xray scattering amplitude residue """ try: if r.universe.explicitResidueFormFactorAmpl: raise KeyError() return xrayFFgroup[r.resname.upper()].Y[0] except KeyError: return r.atoms.fax0().sum() def typesScatteringAmplitudeQ0_rg(rg): """xray scattering amplitude residue group """ return np.r_[[r.fax0() for r in rg]] transplants[AtomGroup].append(('fax0', typesScatteringAmplitudeQ0_ag)) transplants[Residue].append(('fax0', typesScatteringAmplitudeQ0_r)) transplants[ResidueGroup].append(('fax0', typesScatteringAmplitudeQ0_rg)) class XRayFormFactorDummy(AtomAttr): """ Coherent Xray formfactor amplitude dummy atoms in unit nm values contains coherent_scattering_amplitude for global QLIST Specific qlist values in get_atoms will be linear interpolated """ attrname = 'faxdumys' singular = 'faxdumy' dtype=np.float32 # object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] QLIST=QLIST transplants = defaultdict(list) @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros((n_atoms, QLIST.shape[0])) def get_atoms(self, atomgroup): # Get values from single atom by interpolation fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear') return fxq(atomgroup.universe.qlist) def set_atoms(self, atomgroup, values): # set values for atoms for all QLIST if values=='types': # use atom types for typ in np.unique(atomgroup.types): try: self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomicdummy[typ.capitalize()].array[1] except KeyError: # this happens e.g. for metals like MG recognised as type M -> try the names instead which is MG ag = atomgroup.select_atoms('type ' + typ) for a in ag: self.values[a.ix] = xrayFFatomicdummy[a.name.capitalize()].array[1] elif isinstance(values, str): # set all to same type self.values[atomgroup.ix] = xrayFFatomicdummy[values.capitalize()][1] elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape: # explicit given formfactor self.values[atomgroup.ix] = values else: raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.') def get_residues(self, res_group): # coherent residue formfactors need to be calculated or taken from saved dict qlist = res_group.universe.qlist if isinstance(res_group.ix, numbers.Integral): try: if res_group.universe.explicitResidueFormFactorAmpl: # provoke explicit calculation raise KeyError() # get from list, if KeyError it is calculated return xrayFFgroupdummy[res_group.resname.upper()].interp(qlist) except KeyError: # for a single residue atoms = res_group.atoms utypes = list(np.unique(atoms.types)) ufax = np.c_[[xrayFFatomicdummy[a.capitalize()][1] for a in utypes]] ff = np.c_[self.QLIST, ufax.T].T iff = [utypes.index(a)+1 for a in atoms.types] fa = [ff[i, 0] for i in iff] ff[1:, :] = ff[1:, :]/ff[1:, :1] # normalize cog = atoms.posnm.mean(axis=0) res = fscatter.cloud.scattering_debye(qlist, atoms.posnm - cog, # positions fa, # atom faxdumy iff, # type index in ff ff, # normalized formfactors for each type 0) # parallel val = res[1]**0.5 return val else: # for a residue group list vals = np.empty((res_group.ix.shape[0], len(qlist))) for i, rg in enumerate(res_group): vals[i] = rg.faxdumy return vals def typesScatteringAmplitudeQ0_ag(ag): """xray scattering amplitude dummy atom group """ # need to avoid interp in atom.faxdumy for qlist so reconstruct from xrayFFatomicdummy values = np.zeros_like(ag.ix, dtype=np.float32) for typ in np.unique(ag.types): values[ag.types == typ] = xrayFFatomicdummy[typ.capitalize()].array[1, 0] return values def typesScatteringAmplitudeQ0_r(r): """xray scattering amplitude dummy residue """ try: if r.universe.explicitResidueFormFactorAmpl: raise KeyError() return xrayFFgroupdummy[r.resname.upper()].Y[0] except KeyError: return r.atoms.fax0dumy().sum() def typesScatteringAmplitudeQ0_rg(rg): """xray scattering amplitude dummy residue group """ return np.r_[[r.fax0dumy() for r in rg]] transplants[AtomGroup].append(('fax0dumy', typesScatteringAmplitudeQ0_ag)) transplants[Residue].append(('fax0dumy', typesScatteringAmplitudeQ0_r)) transplants[ResidueGroup].append(('fax0dumy', typesScatteringAmplitudeQ0_rg)) class incXRayFormFactor(AtomAttr): """ incoherent Xray formfactors = formfactor_amplitude**2 in unit nm² incoherent sums as sum(inx**2) different to coherent """ attrname = 'fi2xs' singular = 'fi2x' dtype=np.float32 # object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] QLIST=QLIST @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros((n_atoms, QLIST.shape[0])) def get_atoms(self, atomgroup): # Get values from single atom fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear') return fxq(atomgroup.universe.qlist) def set_atoms(self, atomgroup, values): # set values for atom if isinstance(values, str): if values=='types': # use atom types for typ in np.unique(atomgroup.types): try: self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomic[typ.capitalize()][2] except KeyError: # this happens e.g. for metals like MG to be recognised as type M -> try the names instead ag = atomgroup.select_atoms('type ' + typ) for a in ag: self.values[a.ix] = xrayFFatomic[a.name.capitalize()].array[1] else: # set all to same type self.values[atomgroup.ix] = xrayFFatomic[values.capitalize()][2] elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape: # explicit given formfactor self.values[atomgroup.ix] = values else: raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.') def get_residues(self, res_group): # coherent residue formfactors need to be calculated or taken from saved dict qlist = res_group.universe.qlist if isinstance(res_group.ix, numbers.Integral): # for a single residue atoms = res_group.atoms utypes = list(np.unique(atoms.types)) ufax = np.c_[[xrayFFatomic[a.capitalize()][2] for a in utypes]].sum(axis=0) vals = np.interp(qlist, self.QLIST, ufax, ufax[0], ufax[-1]) else: # for a residue group list vals = np.empty((res_group.ix.shape[0], len(qlist))) for i, rg in enumerate(res_group): atoms = rg.atoms utypes = list(np.unique(atoms.types)) ufax = np.c_[[xrayFFatomic[a.capitalize()][2] for a in utypes]].sum(axis=0) vals[i] = np.interp(qlist, self.QLIST, ufax, ufax[0], ufax[-1]) return vals class nFormFactor(AtomAttr): """ Neutron coherent scattering amplitude in unit nm The return value should reflect the deuteration of individual atoms and possible hd exchange """ attrname = 'fans' singular = 'fan' dtype=np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] QLIST = QLIST hcoh, hinc = Nscatlength['h'] dcoh, dinc = Nscatlength['d'] transplants = defaultdict(list) @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): return np.zeros(n_atoms) def get_atoms(self, atomgroup): # for neutrons the scattering length is a constant independent on q # return array for all q # this is frequently changed during fitting and should be always updated # (maybe in the calling function, then return only scalar) ql = atomgroup.universe.qlist.shape[0] # atomic formfactor for neutrons q independent return np.tile(self.values[atomgroup.ix], (ql, 1)).T def set_atoms(self, atomgroup, values): # here we set the values for atomic neutron scattering amplitudes # including deuteration and hd exchange # solvent H coh scattering respecting the d2o fraction in solvent solventcoh = self.hcoh + atomgroup.universe.d2oFract*(self.dcoh - self.hcoh) if isinstance(values, str): if values == 'types': # set it according to types, for H,D with solvent exchange and deuteration for typ in np.unique(atomgroup.types): agt = atomgroup.select_atoms('type ' + typ) if typ == 'H' or typ=='D': # account for partial deuteration bd = self.hcoh + agt.deuteration * (self.dcoh-self.hcoh) # account for possible HD exchange bdex = bd + agt.hdexchange * (solventcoh - bd) self.values[agt.ix] = bdex else: try: # some scattering length are complex, we use only real part self.values[agt.ix] = np.real(Nscatlength[typ.lower()][0]) except KeyError: # this happens e.g. for metals like MG to be recognised as type M -> try the names instead for a in agt: self.values[a.ix] = Nscatlength[a.name.lower()][0] else: # set all same type according to string name try: self.values[atomgroup.ix] = Nscatlength[values.lower()][0] except KeyError: raise KeyError('Key should be an element name like C,O,N,...') elif isinstance(values, np.ndarray) and (values.shape == atomgroup.ix.shape): # set explicit values self.values[atomgroup.ix] = values else: # isinstance(values, (int,float)): # explicit values self.values[atomgroup.ix] = values def get_residues(self, res_group): # coherent residue formfactors for residue coarse graining # these need to be calculated (if all atoms are present) # or taken from saved dict with averaged values for e.g CA model # the actual needed qlist of the universe qlist = res_group.universe.qlist # a const normalized formfactor for the atoms, QLIST is reference q list ff=np.c_[self.QLIST, np.ones_like(self.QLIST)].T if isinstance(res_group.ix, numbers.Integral): try: if res_group.universe.explicitResidueFormFactorAmpl: # provoke explicit calculation raise KeyError() # get from list, if KeyError it is calculated return neutronFFgroup[res_group.resname.upper()].interp(qlist) except KeyError: # for a single residue # We use the Debye scattering to calc it for a residue, interpolation for correct q is done in fscatter # the sign of fan.sum needs to be recovered as sum(x**2)**0.5 loses contrast information in matching atoms = res_group.atoms cog = atoms.posnm.mean(axis=0) res = fscatter.cloud.scattering_debye(qlist, atoms.posnm - cog, # positions self.values[atoms.ix], # fan for atoms [1]*atoms.ix.shape[0], # all same constant formfactor row 1 ff, # normalized formfactor 0) # ncpu parallel execution # formfactor amplitude is root of formfactor # this is valid only for small Q val = res[1]**0.5 * np.sign(self.values[atoms.ix].sum()) return val else: # for a residue group list vals = np.empty((len(res_group), len(qlist))) for i, rg in enumerate(res_group): vals[i] = rg.fan return vals def set_residues(self, values): raise TypeError('Set atom attributes instead of residueAttr.') def typesScatteringAmplitudeQ0_ag(ag): """neutron scattering amplitude atom group """ # because of constant atomic neutron formfactor we just take first return ag.fans[:, 0] def typesScatteringAmplitudeQ0_r(r): """neutron scattering amplitude residue """ try: if r.universe.explicitResidueFormFactorAmpl: raise KeyError() return neutronFFgroup[r.resname.upper()].Y[0] except KeyError: return r.atoms.fan0().sum() def typesScatteringAmplitudeQ0_rg(rg): """neutron scattering amplitude residue group """ return np.r_[[r.fan0() for r in rg]] transplants[AtomGroup].append(('fan0', typesScatteringAmplitudeQ0_ag)) transplants[Residue].append(('fan0', typesScatteringAmplitudeQ0_r)) transplants[ResidueGroup].append(('fan0', typesScatteringAmplitudeQ0_rg)) class incnFormFactor(AtomAttr): """ Incoherent neutron formfactor amplitude as fomrfactor_amplitude**2 in unit nm² The return value should reflect the deuteration of individual atoms and possible hd exchange """ attrname = 'fi2ns' singular = 'fi2n' dtype = np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] QLIST = QLIST hcoh, hinc = Nscatlength['h'] dcoh, dinc = Nscatlength['d'] hinc2=hinc**2 dinc2=dinc**2 @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): return np.zeros(n_atoms) def get_atoms(self, atomgroup): # solvent scattering length density with d2o fraction return self.values[atomgroup.ix] def set_atoms(self, atomgroup, values): # here we set the values for incoherent atomic neutron scattering amplitudes if values=='types': # set it according to types # solvent H coh scattering respecting the d2o fraction in solvent solventinc = self.hinc2 + atomgroup.universe.d2oFract*(self.dinc2 - self.hinc2) for typ in np.unique(atomgroup.types): agt = atomgroup.select_atoms('type ' + typ) if typ == 'H' or typ=='D': # account for partial deuteration bi = self.hinc2 + agt.deuteration * (self.dinc2-self.hinc2) # account for possible HD exchange biex = bi + agt.hdexchange * (solventinc - bi) self.values[agt.ix] = biex else: try: self.values[agt.ix] = Nscatlength[typ.lower()][1]**2 except KeyError: # this happens e.g. for metals like MG to be recognised as type M -> try the names instead for a in agt: self.values[a.ix] = Nscatlength[a.name.lower()][1]**2 elif isinstance(values, str): # set all same type self.values[atomgroup.ix] = Nscatlength[values.lower()][1]**2 elif isinstance(values, np.ndarray) and (values.shape == atomgroup.ix.shape): # set explicit values self.values[atomgroup.ix] = values else: # isinstance(values, (int,float)): # explicit values self.values[atomgroup.ix] = values def get_residues(self, res_group): # incoherent of a residue is sum of incfin**2 # vals = np.empty(len(res_group)) if isinstance(res_group.ix, numbers.Integral): return self.values[res_group.atoms.ix].sum() else: return np.array([self.values[rg.atoms.ix].sum() for rg in res_group]) class deuteration(AtomAttr): """ Deuteration level. Only H can be deuterated to D. For intermediate levels we use statistical average. """ attrname = 'deuteration' singular = 'deuteration' dtype=np.float32 # object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros(n_atoms) def get_atoms(self, atomgroup): # Get values from single atom including exchange with the solvent return self.values[atomgroup.ix] def set_atoms(self, atomgroup, values): assert (0 <= values <= 1), 'deuteration values should be in interval [0..1] !' # set values for all in atomgroup self.values[atomgroup.ix] = values # update neutron coh + incoh only for the relevant H,D ag = atomgroup.select_atoms('type H or type D') ag.fans = 'types' ag.fi2ns = 'types' class hdexchangable(AtomAttr): """ Deuteration level. Only H can be deuterated to D. For intermediate levels we use statistical average. set as 3x tuple with exchangeable fraction for ( H bonded to O,S,sidechain N; backbone N-H, histidine -H*) default (1, universe.amideHexFract,u.histidinExchange) = (1, 0.9, 0.5) """ attrname = 'hdexchange' singular = 'hdexchange' dtype=np.float32 # object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros(n_atoms) def get_atoms(self, atomgroup): # Get values from single atom including exchange with the solvent return self.values[atomgroup.ix] def set_atoms(self, atomgroup, values): if values == 'default': # default values for normal conditions u = atomgroup.universe values = (1, max(min(u.amideHexFract, 1), 0), max(min(u.histidinExchange, 1), 0)) if isinstance(values, (tuple, set, list)) and np.shape(values) == (3,): # explicit given fractions u = atomgroup.universe if not u.bonds: # this may need vdwradii for unknown types u.atoms.guess_bonds() try: # backbone H bonded to N exchange to fraction 0.9 backboneH = atomgroup.select_atoms('type H and bonded type N and bonded backbone') self.values[backboneH.ix] = values[1] except AttributeError: # noch backbone present pass try: # histidin hydrogens can exchange to fraction 0.5 histidinH = atomgroup.select_atoms('(resname HSD or resname HIS) and type H*') self.values[histidinH.ix] = values[2] except AttributeError: # noch residues pass # H bonded to 'OSN' show complete exchange exchangeableH = atomgroup.select_atoms('type H and bonded type O') exchangeableH += atomgroup.select_atoms('type H and bonded type S') try: exchangeableH += atomgroup.select_atoms('type H and bonded type N and not bonded backbone') except AttributeError: # there is no backbone so add all exchangeableH += atomgroup.select_atoms('type H and bonded type N') self.values[exchangeableH.ix] = values[0] else: # allow explicit given values for groups self.values[atomgroup.ix] = values atomgroup.fans = 'types' atomgroup.fi2ns = 'types' class positionUnitsnm(AtomAttr): """ Geometric center position in units nm """ attrname = 'posnm' singular = 'posnm' dtype = object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): return np.zeros(n_atoms) @staticmethod def get_atoms(atomgroup): # Get values from single atom including exchange with the solvent if isinstance(atomgroup.ix, numbers.Integral): return atomgroup.position / 10 else: return atomgroup.positions / 10 @staticmethod def set_atoms(atomgroup, values): if isinstance(atomgroup.ix, numbers.Integral): atomgroup.position = values * 10 else: atomgroup.positions = values * 10 @staticmethod def get_residues(res_group): # incoherent of a residue is sum of incfin**2 # vals = np.empty(len(res_group)) if isinstance(res_group.ix, numbers.Integral): return res_group.atoms.posnm.mean(axis=0) else: posnm = np.zeros((res_group.ix.shape[0], 3)) posnm[:, :] = [rg.atoms.posnm.mean(axis=0) for rg in res_group] return posnm @staticmethod def get_segments(seg_group): # incoherent of a residue is sum of incfin**2 # vals = np.empty(len(res_group)) if isinstance(seg_group.ix, numbers.Integral): return seg_group.atoms.posnm.mean(axis=0) else: posnm = np.zeros((seg_group.ix.shape[0], 3)) posnm[:, :] = [sg.atoms.posnm.mean(axis=0) for sg in seg_group] return posnm class surfacename(AtomStringAttr): """ Is surface atoms indicator """ attrname = 'surface' singular = 'surface' dtype = object target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.array(['' for _ in range(n_atoms)], dtype=object) def get_residues(self, res_group): """ Residue is surface if any atom is in surface """ if isinstance(res_group.ix, numbers.Integral): val = self.values[res_group.atoms.ix] return np.unique(val[val != ''])[0] if np.any(val != '') else '' else: return [rg.surface for rg in res_group] class surfaceData(AtomAttr): """ Surface atoms properties [1x surface area, 1x SAS surface volume, 1x hydration shell volume, 3x area mean position] """ attrname = 'surfdata' singular = 'surfdata' dtype = np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros((n_atoms, 6)) def get_atoms(self, ag): return self.values[ag.ix, :] def set_atoms(self, ag, values): if isinstance(values, str) and values == 'default': self.values[ag.ix, :] = 0 else: self.values[ag.ix, :] = values def get_residues(self, res_group): # equivalent if isinstance(res_group.ix, numbers.Integral): val = self.values[res_group.atoms.ix] sd = np.zeros(6) sd[:3] = val[:, :3].sum(axis=0) sd[3:] = val[:, 3:].mean(axis=0) return sd else: return np.array([rg.surfdata for rg in res_group], dtype=np.float32) class surfaceLayerDensity(AtomAttr): """ Surface layer density per surface atom relative to bulk solvent, default 1 [1x surface area, 1x surface volume, 3x area mean position] """ attrname = 'surfdensity' singular = 'surfdensity' dtype = np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.ones(n_atoms) def set_atoms(self, ag, values): if isinstance(values, str) and values == 'default': self.values[ag.ix] = 1. else: self.values[ag.ix] = values def get_residues(self, res_group): if isinstance(res_group.ix, numbers.Integral): return self.values[res_group.atoms.ix].mean() else: return np.array([rg.surfdensity for rg in res_group], dtype=np.float32) class occupiedVolume(AtomAttr): """ Occupied volume of a atom/residue as non overlapping Voronoi volume. """ attrname = 'oVolume' singular = 'oVolume' dtype = np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] @staticmethod def _gen_initial_values(n_atoms, n_residues, n_segments): # prepare zero shape array for later values return np.zeros(n_atoms) def set_atoms(self, ag, values): if isinstance(values, str) and values == 'default': self.values[ag.ix] = 0. else: self.values[ag.ix] = values def set_residues(self, res_group, values): if isinstance(res_group.ix, numbers.Integral): self.values[res_group.atoms.ix] = values / res_group.atoms.n_atoms else: for rg, v in zip(res_group, values): rg.atoms.oVolume = v / rg.atoms.n_atoms def get_residues(self, res_group): if isinstance(res_group.ix, numbers.Integral): return self.values[res_group.atoms.ix].sum() else: return np.array([rg.oVolume for rg in res_group], dtype=np.float32) class vanderWaalsRadii(AtomAttr): """ van der Waals radii in units nm. """ attrname = 'vdWradiinm' singular = 'vdWradiinm' dtype = np.float32 target_classes = [Atom, Residue, AtomGroup, ResidueGroup] def get_residues(self, res_group): # equivalent radius for sum of vdW Volumes if isinstance(res_group.ix, numbers.Integral): return (self.values[res_group.atoms.ix]**3).sum()**(1/3) else: return np.array([rg.vdWradiinm for rg in res_group], dtype=np.float32)
[docs] def getSurfaceVolumePoints(objekt, point_density=5, probe_radius=0.13, surfacename='1', vdwradii=None, shellThickness=0.3): r""" Calculates the solvent excluded volume (SES and SAS) for a dense object like protein or DNA. It is **NOT** necessary to call this function as it is called automatically from all scattering functions and determines the surface atoms and the objekt volume. It is less accurate for solvent boxes as these may have space in between molecules and need adjusted probe_radius. Parameters ---------- objekt : atom group Atom group. point_density : integer, default 5 Point density on surface of each atom for rolling ball algorithm as n_points = 2**(2*point_density)+2 On error it is automatically incremented probe_radius : float, default 0.13 nm for water Probe radius for SAS/SES calculation. shellThickness : float Shell thickness of the surface layer. The default is 0.3. surfacename : str Name for surface selection vdwradii : dict, default None Dictionary of van der Waals radii for rolling ball algorithm in units nm. - If None the vdW radii used in the object universe are used. See notes below. - I a defaultdict only these values are used. This is used e.g. for Calpha or coarse grain models to use only the given defaults like `vdwradii = defaultdict(lambda :0.38)` . Returns ------- None Universe topology attributes *surface* and *surfdata* are populated - per surface atom with .surfdata = [surface area, SASvolume, shellVolume, surfmean position] - adds to objekt.universe .SASVolume volume SAS - adds to objekt.universe .SESVolume volume SES - adds to objekt.universe .SASArea - surface area can be calculated universe.select_atoms('surface 1').surfdata[:,0].sum() - SASVolume is universe.select_atoms('surface 1').surfdata[:,1].sum() - dummy bead positions in surface layer universe.select_atoms('surface 1').surfdata[:,3:] Notes ----- **SES/SAS** see `Accessible surface area <https://en.wikipedia.org/wiki/Accessible_surface_area>`_ **SAS** area/volume (solvent-accessible surface) is the surface area of a molecule that is accessible to a solvent center. SAS area is calculated by sasa.aVSPointsGradients. SAS area is here calculated using the *rolling ball* algorithm developed by Shrake & Rupley [1]_. It describes the surface of the center of a probe molecule (ball) with probe_radius rolling over the van der Waals surface. We use a probe_radius of 0.13 nm that represents the radius of a water molecule. **SES** area/volume (solvent excluded surface) is defined by the envelope excluding the volume occupied by the rolling ball probe. Also called Connolly surface [5]_ . **Method** assuming a object like a protein (not solvent like water) - Shrake/Rupley rolling ball algorithm: For each atom a regular angular grid of :math:`N_{SAS}` points builds a surface S in a distance :math:`d=R_{vdW} +R_{probe}` of the atomic vdWaals surface. :math:`R_{probe}` represents the solvent size. The points in S are tested for overlap with the same surface of other neighboring atoms. The set of non overlapping points build the SAS surface enclosing the SAS volume of the atom group. The SAS points have a distance :math:`R_{probe}` from the van der Waals surface of the atoms. - The SESVolume as a kind of dry volume is :math:`V_{SES} = V_{SAS} - V_{surface layer}`. The surface layer volume :math:`V_{surface layer}` for each atom `i` may be estimated as the volume between previous determined SAS points `p` and the van der Waals surface with volume per SAS point :math:`V_{p,i} = 4/3\pi ((R_{vdW,i}+R_{probe})^3 - R_{vdW,i}^3) / N_{SAS}`. In each corner (touching atoms) the probe surface sphere describes a smooth surface not reaching into the corner. This missing volume can be approximated using the rolling ball algorithm with same number of points :math:`N_{SAS}` but reduced probe radius :math:`R_{probe}/2`. Each additional appearing point not in the original SAS surface contributes a volume :math:`V_{p,c} = 4/3\pi (R_{probe})^3 / N_{SAS}` as sector from the probe center located in the SAS corner. Also buried atoms (between non-touching atoms which are not in the SAS surface) contribute with the same amount. It should be mentioned that this is an approximation for the corner probe sphere volume as the points distributed around the original atoms show a small deviation compared to atoms distributed around probe spheres located in the corner. On the other hand most algorithms describe only an approximation. .. figure:: ../../examples/images/rollingball.png :align: center :width: 50 % :alt: rollingball red: atoms, green: probe, solid line: SAS, dashed: SES, dots: probe R/2 points, blue: additional layer volume corner with additional points and probe section per point A comparison to experimental determined protein densities is shown in :ref:`Protein density determination`. For a probe radius of 1.3 A, we find excellent agreement. 1.3 A corresponds to the reduced oxygen vdW radius for proper protein density determination. As vdWaals radii we use for the atoms H, C, N, O the observed radii according to Fraser at al [2]_. For H this corresponds to 1.07 A which is a bit smaller comapred to the revised value 1.1 of Rowland et al. [3]_. For other values we use vdWaals radii of Bondi et al. [4]_ which are rare for proteins. For the determination of the **hydration layer** contributing to the neutron/X-ray scattering usually a 0.3 nm hydration layer is assumed as e.g. in CRYSON/CRYSOL. To determine the respective hydration layer volume we use the same approach as above with the shellthickness parameter. By default, we use a thickness of 0.3 nm for the hydration layer if no other value is given. It should be noted that compared to CRYSOL the scattering pattern is a bit different at larger Q. CRYSOL fits the parameter r0 as an adjustment to the listed vdW radius mainly changing the excluded volume scattering. The values of CYSOL can be repdoduced by seting the H vdW radius to 1.2 A ``universe.select_atoms('name H').vdWradiinm =.12`` which result in a different protein density. References ---------- .. [1] Environment and exposure to solvent of protein atoms. Lysozyme and insulin Shrake, A; Rupley, JA. (1973). J Mol Biol. 79 (2): 351–71. doi:10.1016/0022-2836(73)90011-9. .. [2] An improved method for calculating the contribution of solvent to the X-ray diffraction pattern of biological molecules. Fraser, R. D. B., MacRae, T. P., & Suzuki, E. (1978). Journal of Applied Crystallography, 11(6), 693–694. https://doi.org/10.1107/S0021889878014296 .. [3] Intermolecular Nonbonded Contact Distances in Organic Crystal Structures: Comparison with Distances Expected from van der Waals Radii. R. Scott Rowland, Robin Taylor The Journal of Physical Chemistry. Bd. 100, Nr. 18, 1996, S. 7384–7391, doi:10.1021/jp953141 .. [4] van der Waals Volumes and Radii. A. Bondi The Journal of Physical Chemistry. Bd. 68, Nr. 3, 1964, S. 441–451, doi:10.1021/j100785a001 .. [5] Analytical molecular surface calculation Connolly, M. L. J. Appl. Crystallogr. 16, 548–558 (1983). doi:10.1107/S0021889883010985. """ if len(objekt.atoms) == 0: raise ValueError('no atoms in objekt found') objekt.universe.surfdata = 'default' objekt.universe.surface = '' if vdwradii is None: vdwradii = objekt.universe.vdWradiinm while True: if point_density>20: break try: # aVSPointsGradients returns (area, volume, sPAG), sPAG as dictionary with # surface atom: [atom SAS exposed surface area, # list of points in the atom exposed surface, # gradient vector pointing outward from the atom] area, volume, sPAGpr = sasa.aVSPointsGradients(objekt, probe_radius, point_density=point_density, vdwradii=vdwradii) # same but with smaller probe to get the corner additional points _, _, sPAG0 = sasa.aVSPointsGradients(objekt, probe_radius / 2., point_density=point_density, vdwradii=vdwradii) # same but with larger probe to get the corner seen from the shell _, _, sPAG2 = sasa.aVSPointsGradients(objekt, shellThickness, point_density=point_density, vdwradii=vdwradii) except ZeroDivisionError: # increment number of points as sometimes there is an error with no points on surface for an atom point_density+=1 print('incremented point_density to %i' %(2**(2*point_density) + 2)) else: break # now add surface points of smaller surface to surface atoms # sPAG will be list of surface atoms with # [points in surface , exposed surface area, gradient vector pointing outward, # number of points in smaller and larger surface if present] sPAG = {satom: ((sPAGpr[satom][1], sPAGpr[satom][0], sPAGpr[satom][2]) if satom in sPAGpr else (None, 0, 0)) + (len(sPAG0[satom][1]), len(sPAG2[satom][1]) if satom in sPAG2 else None) for satom in sPAG0} allSASlayerVolume = 0 pointsonsphere = 2 ** (2 * point_density) + 2 for atom, surf in sPAG.items(): if surf[0] is None: # buried atoms in innermost surface contributing to r/2 but not to r; needed for SESVolume allSASlayerVolume += 4/3.*pi * probe_radius**3 * surf[3] / pointsonsphere continue # mean vector of surface points with length atom.vdW_radius+probe_radius is in the surface # center in surface v = (surf[0] - atom.posnm).mean(axis=0) # R is vdWradius + probe_radius and sync to vdW used in aVSPointsGradients R = la.norm(surf[0][0]-atom.posnm) normv = la.norm(v) if normv > 0: surfmean = atom.posnm + R * v/normv else: # it happens that 2 points on opposite site are found and norm(v) is nan # then we take just the first v = surf[0][0]-atom.posnm R = la.norm(surf[0][0]-atom.posnm) surfmean = atom.posnm + R * v/la.norm(v) # respective layer volume per point pointShellVolume = 4/3*pi* ((R - probe_radius + shellThickness)**3 - (R - probe_radius)**3) / pointsonsphere pointSASVolume = 4/3* pi * (R**3 - (R - probe_radius)**3) / pointsonsphere # difference in sphere volumes x fraction of points # SAS layer add overlap SASlayervolume = pointSASVolume * len(surf[0]) # full points # in corners the point difference approximates a probe sphere reaching into the corner SASlayervolume += 4/3.*pi * probe_radius**3 * (surf[3]-len(surf[0])) / pointsonsphere # same for hydration shell with shellThickness # the result is not much dependent on accuracy of these calculations # the more important part is SES calculation # a second option would be to add SASlayervolume and use pointShellVolume only outside SAS # The corner volume needs to be weighted half in this case if surf[4] is not None: shellVolume = pointShellVolume * surf[4] # full points shellVolume += 4/3.*pi * shellThickness**3 * (len(surf[0])-surf[4]) / pointsonsphere else: # no full points shellVolume = 4/3.*pi * shellThickness**3 * len(surf[0]) / pointsonsphere # surface data = [surf area, SASlayervolume, shellVolume, surfmean] atom.surfdata = np.r_[surf[1], SASlayervolume, shellVolume, surfmean] atom.surface = surfacename allSASlayerVolume += SASlayervolume objekt.universe.SASVolume = volume objekt.universe.SESVolume = volume - allSASlayerVolume objekt.universe.SASArea = area return
[docs] def getNativeContacts(objekt, overlap=0.01, vdwradii=None): """ Creates a native contact residue dict for a given object for Go-like protein modeling. A residue j is added as contact to residue i if any atom van der Waals radii overlap :math:`R_i^{vdW} + R_j^{wdW}-overlap < distance(i,j)` objekt : group of atoms overlap : Overlap of van der Waals radii to be counted as in contact in units nm. vdwradii : dictionary Van der Waals radii to use in units nm. Returns ------- dict with {residue : list of residue in contact } or list of index pairs for bonds Examples -------- :: import jscatter as js from collections import defaultdict # all atom universe with hydrogens uni=js.bio.scatteringUniverse('3pgk') u = uni.select_atoms("protein") NN1 = js.bio.getNativeContacts(u) # CA atom group u = uni.select_atoms("protein and name CA") # use a larger van der Waals radius for CA only with 3.8A as distance between CA along backbone NN2 = js.bio.getNativeContacts(u, vdwradii=defaultdict(lambda: 3.8, {'C':3.8})) References ---------- .. [1] Prediction of hydrodynamic and other solution properties of partially disordered proteins with a simple, coarse-grained model. Amorós, D., Ortega, A., & De La Torre, J. G. (2013). Journal of Chemical Theory and Computation, 9(3), 1678–1685. https://doi.org/10.1021/ct300948u .. [2] Selection of Optimal Variants of Gō-Like Models of Proteins through Studies of Stretching Joanna I. Sułkowska Marek Cieplak Biophysical Journal 95,3174-3191 (2008) https://doi.org/10.1529/biophysj.107.127233 """ if vdwradii is None: # a default for proteins in nm vdwradii = vdwradii_ atoms = objekt.atoms # create atom data with position atom_data = [None] * len(atoms) for i, a in enumerate(atoms): pos1 = a.posnm atom_data[i] = (pos1[0], pos1[1], pos1[2], vdwradii[a.type.capitalize()]) # index and d2 of neighbours nborslist = sasa.NeighborList(atoms, np.max(list(vdwradii.values())), atom_data) # a dictionary with items: atom : [neighbor atoms] nativecontacts ={} for i, atom in enumerate(atoms): ipos = atom_data[i] iresidue=atom.residue iwdWrad = ipos[3] nbors=nborslist[i] for j, d2 in nbors: atomj=atoms[j] jpos=atom_data[j] jwdWrad = jpos[3] if (iwdWrad+jwdWrad-overlap)**2 > d2: jresidue=atomj.residue if iresidue == jresidue: continue if not iresidue in nativecontacts: nativecontacts[iresidue] = [] if jresidue not in nativecontacts[iresidue]: nativecontacts[iresidue].append(jresidue) # print(iwdWrad, jwdWrad, d2**0.5, iresidue, jresidue) return nativecontacts
[docs] def getDistanceMatrix(objekt, cutoff=None): """ Distance matrix for atoms in objekt. objekt migth be a selection to get e.g. distances between CA or H atoms Parameters ---------- objekt : AtomGroup or residueGroup Objekt to get distance matrix. cutoff : float Cutoff radius. For larger distances 0 is returned. Returns ------- Distance matrix NxN (upper triagonal) in units nm. Notes ----- For larger matrices a faster neighbor search is used. Examples -------- :: import jscatter as js uni=js.bio.scatteringUniverse('3pgk') u = uni.select_atoms("protein and type H") dd = js.bio.getDistanceMatrix(u) u = uni.select_atoms("protein and name CA") dd = js.bio.getDistanceMatrix(u,cutoff=2) """ atoms = objekt.atoms dv = scipy.spatial.distance.pdist(atoms.posnm) distances = scipy.spatial.distance.squareform(dv) if cutoff is not None: distances[distances > cutoff] = 0 return distances
def convexHullVolume(points): return scipy.spatial.ConvexHull(points, incremental=False).volume
[docs] def occupiedVolume(atoms, dl=0.12): r""" Calculates Voronoi cell volume for atom positions in a filled universe box. Only for filled boxes meaningful. A universe box gets 26 copies of itself around the central original box (periodic boundary conditions in MD). From these the layer with thickness dl around the center universe box is used to get neigbors for the close to box boundary positions. This guarranties proper neigbors, a filled box and :math:`\sum V_{atoms} = box volume`. Parameters ---------- atoms : universe atoms or residues Atom positions are used for Voronoi cell volume. dl : float Border size to include in convexHull as fraction from box size. Returns ------- array : floats Voronoi cell volumes in units A³ for the atoms. Notes ----- scipy.spatial.Voronoi and scipy.spatial.convexHullVolume are used to calc the volume per atom. """ # get box dimension vectors = MDAnalysis.lib.mdamath.triclinic_vectors(atoms.universe.dimensions) size = len(atoms) # points for 3x3x3 boxes around central box points = np.zeros([27 * size, 3]) # copy along X points[0 * size:1 * size, :] = atoms.posnm * 10 # in units A # just in case someone moved the atoms out of the box we use here the center of geometry points[0 * size:1 * size, :] -= points[0 * size:1 * size, :].mean(axis=0) points[1 * size:2 * size, :] = points[:1 * size, :] + vectors[0] points[2 * size:3 * size, :] = points[:1 * size, :] - vectors[0] # copy along Y points[3 * size:6 * size, :] = points[:3 * size, :] + vectors[1] points[6 * size:9 * size, :] = points[:3 * size, :] - vectors[1] # copy along Z points[9 * size:18 * size, :] = points[:9 * size, :] + vectors[2] points[18 * size:27 * size, :] = points[:9 * size, :] - vectors[2] # projections along box vectors nv0 = la.norm(vectors[0]) nv1 = la.norm(vectors[1]) nv2 = la.norm(vectors[2]) pro0 = points @ vectors[0] / nv0 pro1 = points @ vectors[1] / nv1 pro2 = points @ vectors[2] / nv2 # select points in central box with some border layer in neighbor boxes # we need the neighbor points that the Voronoi can calc all volumes using neighbor points over box planes val = (pro0 > -(0.5 + dl) * nv1) & (pro0 < (0.5 + dl) * nv0) & \ (pro1 > -(0.5 + dl) * nv1) & (pro1 < (0.5 + dl) * nv1) & \ (pro2 > -(0.5 + dl) * nv2) & (pro2 < (0.5 + dl) * nv2) if mp.current_process().name != 'MainProcess': v = scipy.spatial.Voronoi(points[val], incremental=False, qhull_options="QJ Pp") vol = np.zeros(v.npoints) for i, reg_num in enumerate(v.point_region): indices = v.regions[reg_num] if -1 in indices: # some regions can be open vol[i] = np.inf else: vol[i] = convexHullVolume(v.vertices[indices]) vv = vol else: # do it in a pool # here ThreadPool is ok as ConvexHull releases Gil in cython v = scipy.spatial.Voronoi(points[val], incremental=False, qhull_options="QJ Pp") with ThreadPool() as pool: iter = [v.vertices[v.regions[reg_num]] for i, reg_num in enumerate(v.point_region) if -1 not in v.regions[reg_num]] chunksize, extra =divmod(len(iter), len(pool._pool) * 4) chunksize = chunksize + (1 if extra else 0) vol = pool.map(convexHullVolume, iter, chunksize=chunksize) vv = np.array(vol) # return only points in original box return vv[:size]
def addNXAttributes(universe, vdwradiiA={}): """ Add topology attributes as needed for Xray/neutron scattering to existing universe. Uses default values for a protein in D2O. See :py:class:`scatteringUniverse` for details. Parameters ---------- universe : MDAnalysis universe MDAnalysis universe. vdwradiiA : dict, Dictionary of van der Waals radii in units A. Here an explicit list has to be given without default. Returns ------- None """ # get universe u = universe u._qlist = np.r_[0.01, 0.1, 1.0, 2.] # units 1/nm , force ndarray u.tlist = 10**np.r_[0:6] # units ps u.error = 200 u.amideHexFract = 0.9 u.histidinExchange = 0.5 # delocalisations # Hydrogen atoms in proteins: Positions and dynamics # Engler, Ostermann, Niimura, Parak PNAS 100, 10243 (2003) https://doi.org/10.1073/pnas.1834279100 # -> 0.21**0.5 / 10 = 0.045 nm approximate 290K # How large B-factors can be in protein crystal structures. # Carugo, BMC Bioinformatics 19, 61 (2018). https://doi.org/10.1186/s12859-018-2083-8 # B=8π^2 u^2 <25 A² => u =0.056A at maximum , we use 0.045 with B=0.16 A² u.hDelocalisation = 0.045 # nm u.xDelocalisation = 0.045 # nm # according to Sorenson 0.22/A -> 0.045nm u.solventDelocalisation = 0.045 # nm # water radius u.probe_radius = 0.13 # unit nm u.shellThickness = 0.3 # nm u.calphaCoarseGrainRadius = 0.342 # unit nm u.iscalphamodel = False # register scattering length and other new properties needed for Xray and neutron scattering u.add_TopologyAttr("fax") u.add_TopologyAttr("faxdumys") u.add_TopologyAttr("fan") u.add_TopologyAttr("fi2ns") u.add_TopologyAttr("fi2xs") u.add_TopologyAttr("deuteration") u.add_TopologyAttr("hdexchange") u.add_TopologyAttr("posnm") u.add_TopologyAttr("surface") u.add_TopologyAttr("surfdata") u.add_TopologyAttr("surfdensity") u.add_TopologyAttr("oVolume") u.vdWradiiA = vdwradiiA.copy() u.vdWradiinm = {k: v/10 for k, v in vdwradiiA.items()} vdwr = np.zeros_like(u.atoms) for t in list(set(u.atoms.types)): ag = u.select_atoms('type ' + t) vdwr[ag.ix] = u.vdWradiinm[t.upper()] u.add_TopologyAttr('vdWradiinm', vdwr) # set default values for scattering length densities # u.atoms.deuteration = 0 # this is already the initial value u.atoms.hdexchange = 'default' # in above hdexchange the following two lines are already called, we don't do it twice # these incorporate hdexchange and deuteration # u.atoms.fans = 'types' # u.atoms.fi2ns = 'types' u.atoms.faxs = 'types' u.atoms.fi2xs = 'types' u.atoms.faxdumys = 'types' u.atoms.surfdensity = 'default' u.atoms.oVolume = 'default' # force explicit residue ff calculation u.explicitResidueFormFactorAmpl = True
[docs] class scatteringUniverse(MDAnalysis.Universe): def __init__(self, *args, **kwargs): r""" MDAnalysis universe with atoms from PDB or simulation for neutron/Xray scattering. `scatteringUniverse` returns a `MDAnalysis <https://docs.mdanalysis.org/stable/index.html>`_ universe which contains proteins/DNA and solvent. It has methods e.g. for selection of parts or position analysis from MDAnalysis complemented by scattering specific methods/attributes as atomic formfactors or volume determination. Parameters describe additional scattering parameters like embedding solvent. Others are passed to MDAnalysis.universe. Parameters ---------- args : pdb structure file, universe or trajectories PDB files, PDB ID or path to trajectories to build the universe as described for MDAnalysis.universe. See `MDAnalysis <https://userguide.mdanalysis.org/stable/universe.html>`__ for different formats. Already existing local PDB files are prefered. biounit : bool, default False If biounit is True or above args is a PDB biounit/assembly1 with ending *.pdb[biounit]* as e.g. ``.pdb1`` the biounit is downloaded, merged and used for the universe. See :py:func:`~mergePDBModel`. addHydrogen : bool, default True Add hydrogen to atomic coordinates (only) for PDB files. - False: do not add hydrogens. If the pdb file contains hydrogens or contains only Cα atoms set to False. - True: - By default :py:func:`~pdb2pqr` is used to add hydrogen in a fast mode without debumping and position optimization, see :py:func:`~fastpdb2pqr`. The resulting ``_h.pdb`` file is used that contains also ligands. The '_h.pqr' file also contains charge information but no ligands by default. This file can be used to populate the charge attribute in a second universe and adding the ligands from the first universe using MERGE (but no ligand charge). For more complicated cases see pdb2pqr documentation. - For other formats as e.g. trajectories this option is ignored as these should contain hydrogens. - If PyMol is installed, it can be used to add hydrogens, addHydrogen='pymol'. Pymol is maybe bit faster and add hydrogens also to ligands. No charges are added. vdwradii : dict Atomic van der Waals radii (units A) passed to universe are used to calculate SES and SAS volume. The default is for protein atoms ``{'H': 1.09, 'C': 1.58, 'N': 0.84, 'O': 1.3, 'S': 1.68, 'P': 1.11}`` These values are smaller than the vdW radii from Bondi but result in a correct SESVolume and scattering intensity. See :py:func:`~.bio.mda.getSurfaceVolumePoints` . guess_bonds : MDAnalysis selection string, default='all' Guess bonds for atoms selected by selection string. Bonds are needed for correct hydrogen exchange (e.g. H bonded to 'O,N,S' exchange with D2O to D) in neutron scattering. See `hdexchange`. For X-ray scattering or if all bonds are present in the topology set to *False*. For trajectories with a lot of solvent, it takes time to determine solvent bonds. Selecting only parts like ``guess_bonds='protein'`` or ``guess_bonds='not segid seg_1_SOL'`` shortcuts this. The solvent *H* needs to be explicitly set ``.hdexchange`` or the atomtype to *D*. assignTypes : dict Assign unknown atom type values from known atom names if the unknowns are non-standard. Some coordinate formats use non-chemical symbols (e.g. lammps uses integers), which need to be assigned some element atom types. We do this here to real atom types. Dictionary of ``{'unknownname': 'existingname'}`` like ``{'mw': 'C'}`` or for lamps ``{'1':'C','2':'H'}``. The key 'from_names' prepends ``guess_types(self.atoms.names)`` before above assignment ( {'from_names':1} ). For custom setting (not existing element) assign corresponding data for an element to ``(js.bio.mda).xrayFFatomic, .xrayFFatomicdummy, .Nscatlength, .vdwradii``. Notes ----- The *scatteringUniverse* contains proteins/DNA with/without explicit solvent possibly in a box and has attributes as X-ray/neutron scattering amplitudes for all atoms in the universe. Parameters referring to solvent describe the embedding continuous solvent outside. Without explicit solvent a virtual surface layer and volume determination is used e.g. for PDB structures from the PDB databank or implicit solvent simulations. Explicit solvent is in most cases simulated as protonated while neutron scattering experiments are done with D2O/H2O mixtures. This needs to be accounted by adjusting ``.hdexchange`` for the solvent or changing *H* to *D* for the solvent. See specific examples in :ref:`Biomacromolecules (bio)` Use a **biological unit** for calculation using PDB structures from the PDB databank. The PDB crystal structure is not always the biological unit in special for multimeric proteins. The biological assembly can be retrieved from PDB servers as e.g.https://www.ebi.ac.uk/pdbe/ . Look for biological unit or assembly. Check the structure in a PDB viewer as Pymol or VMD. If existent the biological unit can be downloaded with *.pdb1* or *.pdb2* file ending, see :py:func:`~.bio.mda.fetch_pdb`. The biological unit can then be merged into one model using see :py:func:`~.bio.mda.mergePDBModel`. The *scatteringUniverse* has **additional attributes for scattering** of atoms/residues. All atoms need a defined type to determine the scattering amplitude. These attributes like fax/fan atomic formfactors are used during scattering calculations as described in :py:func:`~.bio.scatter.scatIntUniv` . **mutable Universe attributes** : - qlist : array scattering vectors in unit 1/nm. - tlist : array, optional Times in units *ps* for dynamics. - solvent : Continuous solvent embedding the atoms/universe. Saved in attribute uni.solvent. Use :py:func:`setSolvent` for details and how to change. - temperature : float Temperature in K. Use :py:func:`setSolvent`. - solventDensity : See and use :py:func:`setSolvent`. - probe_radius : float 0.13 radius of the probe for surface determination in the rolling ball algorithm, See :py:func:`~.bio.mda.getSurfaceVolumePoints` . The default is 0.13 [nm] for water. This reduced value (often 0.14 nm) corresponds to the reduced vdwradii of oxygen that reproduces SESVolume and results in the best protein density determination. - shellThickness : float Hydration shell thickness as a surface layer thickness. Default is 0.3 nm. - point_density : int, default 5 Point density on surface (per atom) for SES and SAS calculation. See :py:func:`~.bio.mda.getSurfaceVolumePoints` . :math:`n_{points} = 2^{2 point_density}+2` . - error : float Determines how to calculate spherical averages (number of Fibonacci points on sphere). - amideHexFract : float 0..1 Exchangeable fraction of backbone amide -NH hydrogen - 0.9 for folded proteins - For intrinsically unfolded proteins this might be higher due to improved access to the backbone. (see hdexchange to change) - histidinExchange : float 0..1 Exchangeable fraction histidine hydrogens (0.5) (see hdexchange to change) - solventDelocalisation : float, default 0.045 nm. Debye-Waller like solvent delocalisation. See Sorenson et al [6]_. - xDelocalisation : float, default 0.045 nm. Debye-Waller like delocalisation of heavy atoms. Crystallographic B factors are limited to :math:`B=8\pi^2 \langle u^2\rangle < 25 A²` [7]_ leading to :math:`u_{max} = 0.056A` at maximum. We use 0.045 nm with B=0.16 A². - hDelocalisation : float, default 0.045 nm. Debye-Waller like hydrogen delocalisation of non-solvent atoms. :math:`\langle u^2\rangle \approx 0.21 A² \rightarrow u\approx 0.045 nm` at 290K [8]_. - calphaCoarseGrainRadius : float 0.342 [nm] vdwradii for Cα only structures for coarse grained calculations. The value gives an average good approximation for the protein densities. - iscalphamodel : bool - explicitResidueFormFactorAmpl : bool - ``True`` (default) : The residue formfactor amplitude (fan/fax) is calculated explicit for each residue instead of using precalculated average values. hdexchange is included like deuteration - False : Precalculated residue formfactors are used from ``js.mda.xrayFFgroup`` and ``js.bio.neutronFFgroup``. - Additional residue or monomer types can be added to the dict for coarse grained calculations e.g. of polymer bead models. In this case set ``explicitResidueFormFactorAmpl=False`` **Atom attributes** can be changed for each atom. Predefined values are OK for most folded proteins without explicit solvent. These attributes can be set for individual atoms or for groups like :: urn.select_atoms('not protein and type H').deuteration =1 urn.select_atoms('not segid seg_1_SOL').hdexchange =1 - deuteration : atomic deuteration, effective only for H atoms. Fractional values are interpreted as statistical deuteration e.g. of half the atoms in an ensemble. For residue calculation use with ``uni.explicitResidueFormFactorAmpl = True`` to get updated residue formfactors accordingly. - hdexchange: Fraction of hydrogens that exchange with solvent hydrogens for a group of atoms or individual atoms. The default is for folded proteins, Intrinsically folded may be different e.g. for the backbone N-H. To get number of exchanged H atoms ``uni.atoms.hdexchange.sum()`` Set for atomgroup as ``ag.hdexchange = (a,b,c)`` or ``ag.hdexchange = 'default'`` with - a : H bonded to O,S,sidechain N ; default 1, all exchange - b : backbone N-H : default uni.amideHexFract = 0.9 - c : histidine -H : default u.histidinExchange = 0.5 A single float sets a=b=c and keyword 'default' sets to universe defaults. Usefull for e.g a disordered part that has different backbone exchange. - surfdensity : scattering length density relative to bulk for surface atoms. - Typically between 1.00 to 1.18 for proteins. - If equal 1 no surface is assumed. Example how to set for individual atoms/residues E.g. set according to residue type or residue numbers :: import jscatter as js uni = js.bio.scatteringUniverse('3CLN') uni.select_atoms('resname ARG HIS LYS ASP GLU').atoms.surfdensity = 1.1 protein = uni.select_atoms('protein') # scatIntUniv uses prepScatGroups which uses getSurfaceVolumePoints to determine the surface Sx = js.bio.scatIntUniv(protein ,mode='xray') print('Mean universe atoms ', uni.atoms.surfdensity.mean()) print('Mean in surface atoms', protein.select_atoms('surface 1').surfdensity.mean()) # hydrophobicity scale [-4.5..4.5] dependent on residue type md = 0.09 # maximum density in layer 9% different from bulk for k, v in js.data.aaHydrophobicity.items(): uni.select_atoms('resname '+k.upper()).atoms.surfdensity = 1 + md * v[0]/4.5 print('Mean universe atoms ', uni.atoms.surfdensity.mean()) print('Mean in surface atoms', protein.select_atoms('surface 1').surfdensity.mean()) # set higher exchange for backbone NH in a segment (migth be unfolded at the surface) protein.select_atoms('resnum 33:44').atoms.hdexchange = (1, 0.7, 0.5) # use a partial deuteration of a small domain or even individual H atoms. # maybe partial deuteration is possible in the future protein.select_atoms('resnum 33:44').atoms.deuteration = 1 # deuteration of 2 amino acids types protein.select_atoms('resname ARG LYS').atoms.deuteration = 1 **Fixed attributes** Set according to other attributes or atom type for all atoms (residue values as appropriate averages) (singular/plural): - fax/faxs : atomic/residue xray formfactor amplitude, unit nm - atomic xray formfactor amplitudes :math:`f_j(Q)` according to REZ et al. [2]_ . See :py:func:`~.bio.scatter.scatIntUniv` - Delocalisation is taken into account exchanging :math:`f_i(Q)` -> :math:`f^{\prime}_i(Q)` with (:math:`\delta_X=universe.xDelocalisation` and :math:`\delta_H=universe.hDelocalisation`) .. math:: f^{\prime}_i(Q) = exp(-Q^2\delta_{X,H}^2/2) f_i(Q) - residue formfactor amplitudes for coarse grained calculation are calculated using the Debye scattering equation with atomic positions :math:`r_j` according to Yang et al. [4]_ if atoms are present: .. math:: F_a^{residue}(Q) = \big\langle \sum_{j.k} f^{\prime}_j(Q)f^{\prime}_k(Q) \frac{sin(Q(r_j-r_k))}{Q(r_j-r_k)} \big\rangle^\frac{1}{2} This is obviously only valid for small scattering vectors where we can neglect the atomic details and interferences in a residue, thus the real :math:`F_a^{residue}(Q) >0`. - Precalculated amino acid residue formfactors are stored in the dict ``js.mda.xrayFFgroup`` and ``js.bio.neutronFFgroup``. Adding new ones for e.g. polymer monomers :: monomerfa = js.ff.sphere(q=js.bio.QLIST, radius=1.2) monomerfa.Y = (monomer.Y/monomer.I0)**0.5 * V**2 * x_contrast**2 # valid only for small q js.bio.mda.xrayFFgroup['ETH'] = monomerfa # naming the specific residues to use the new defined formfactor # leads to automatic usage of the new formfactor amplitude. # use select mechanism of MDAnalysis to be specific uni.residues.resnames = 'ETH' # set corresponding incoherent if needed (just as example for C2H4 monomer) # here we assume one Cα atom per residue/monomer in coarse graining. uni.residues.atoms.fi2xs = js.data.xrayFFatomic['C'][2]*2 + js.data.xrayFFatomic['H'][2] * 4 - fan/fans : atomic/residue neutron formfactor amplitude unit nm - atomic neutron formfactor amplitudes which are Q independent with a scattering length according to [1]_ taken from https://www.ncnr.nist.gov/resources/n-lengths/list.html - Delocalisation is taken into account exchanging :math:`f_i(Q)` -> :math:`f^{\prime}_i(Q)` with (:math:`\delta_X=universe.xDelocalisation` and :math:`\delta_H=universe.hDelocalisation`) .. math:: f^{\prime}_i(Q) = exp(-Q^2\delta_{X,H}^2/2)]f_i(Q) - residue scattering is calculated as for fax based on atomic scattering amplitudes.. - fi2n/fi2ns : neutron incoherent scattering amplitude squared, unit nm². Same source as fan. - fi2x/fi2xs : xray incoherent scattering amplitude squared according to [3]_ (compton scattering), unit nm². - posnm : atom/residue center of geometry positions in unit nm. MDAnalysis uses Å in .position. posnm is for convenience to get 1/nm wavevectors. - surface : name of surface, to test if atoms belong to surface atoms. (Maybe later different surfaces) - surfdata : surface atom data as [surface area, surface volume, shell volume, 3x surface area mean position] - vdWradiinm : vdWradii in nm as defined in js.data.vdwradii Set **default values** dependent on *.solvent* using See :py:func:`setSolvent` . - xsld : solvent xray scattering length density, unit 1/nm²=nm/nm³ - edensity : solvent electron density, unit e/nm³ - bcDensitySol : solvent neutron scattering length density, unit 1/nm²=nm/nm³ - b2_incSol : solvent neutron incoherent scattering length density squared, unit 1/nm=nm²/nm³ - d2oFract : solvent d/h mol fraction - d2omassFract : solvent d/h mass fraction Examples -------- Load pdb protein structure from the PDB data bank by ID to *scatteringUniverse*. The pdb file is corrected and hydrogen is added automatically. The protein structure including H is saved to 3rn3_h.pdb. .. literalinclude:: ../../examples/example_bio_loadPDB.py :language: python Load existing PDB file (the one with H from above) :: # reload the generated structurestructure later with hydrogens using the local saved structure. uni2 = js.bio.scatteringUniverse('3rn3_h.pdb',addHydrogen=False) uni2.view() A *scatteringUniverse* with a complete trajctory from MD simulation is created. The PSF needs atomic types to be guessed from names to identify atoms in the used format. You may need to install MDAnalysisTests to get the files.( `python -m pip install MDAnalysisTests`) It migh be nesserary to transform the box that the protein is not crossing boundaries of the universe box. .. literalinclude:: ../../examples/example_bio_loadTrajectory.py :language: python .. image:: ../../examples/images/uniformfactorstraj.jpg :align: center :width: 50 % :alt: uniformfactors `scatteringUniverse` does the same as this in MDAnalysis. :: u = mda.Universe(PSF, DCD) # determine types from names u.atoms.types = guess_types(u.atoms.names) u.atoms.guess_bonds(vdwradii={'H': 1.09, 'C': 1.58, 'N': 0.84, 'O': 1.3, 'S': 1.68, 'P': 1.11}) # add scattering attributes upsf2 = js.bio.scatteringUniverse(u, addHydrogen=False, guess_bonds=False) **Examples for PDB structures** without explicit solvent for small angle scattering The example shows the validity of residue coarse graining up to around 3/nm. With coarse graining in cubes (cubesize) the approximation seems best. This might be useful to speed up computations that take long (e.g. ISF at low Q) There is basically no difference between precalculated and averaged residue formfactors and explicit calculated residue formfactors for each residue (uni.explicitResidueFormFactorAmpl = True) The explicit ones include better deuteration of specific atoms. Cα models loose some precision in volume respectivly in forward scattering. C-alpha models need a .calphaCoarseGrainRadius = 0.342 nm because of the missing atoms. In addition, the mean residue position is not the C-alpha position. We use 0.342 nm as a good average to get same forward scattering over a bunch of proteins (see example_bio_proteinCoarseGrainRadius.py). .. literalinclude:: ../../examples/example_bio_comparecoarsegraining.py :language: python .. image:: ../../examples/images/uniformfactors.jpg :align: center :width: 50 % :alt: uniformfactors References ---------- .. [1] Neutron scattering lengths and cross-sections V. F. Sears Neutron News, 3, 26-37 (1992) https://doi.org/10.1080/10448639208218 .. [2] REZ et al.Acta Cryst. A50, 481-497 (1994) .. [3] A new analytic approximation to atomic incoherent X-ray scattering intensities. J. THAKKAR and DOUGLAS C. CHAPMAN Acta Cryst. (1975). A31, 391 .. [4] A Rapid Coarse Residue-Based Computational Method for X-Ray Solution Scattering Characterization of Protein Folds and Multiple Conformational States of Large Protein Complexes S. Yang, S. Park, L. Makowski, B. Roux Biophysical Journal 96, 4449–4463 (2009) doi: 10.1016/j.bpj.2009.03.036 .. [5] An improved method for calculating the contribution of solvent to the X-ray diffraction pattern of biological molecules R. D. B. Fraser, T. P. MacRae and E. Suzuki J. Appl. Cryst. (1978). 11, 693-694 https://doi.org/10.1107/S0021889878014296 .. [6] What can x-ray scattering tell us about the radial distribution functions of water? Jon M. Sorenson, Greg Hura, Robert M. Glaeser, et al. J. Chem. Phys. 113, 9149 (2000); https://doi.org/10.1063/1.1319615 .. [7] How large B-factors can be in protein crystal structures. Carugo et al BMC Bioinformatics 19, 61 (2018). https://doi.org/10.1186/s12859-018-2083-8 .. [8] Hydrogen atoms in proteins: Positions and dynamics Engler, Ostermann, Niimura, Parak PNAS 100, 10243 (2003) https://doi.org/10.1073/pnas.1834279100 """ addHydrogen = kwargs.pop('addHydrogen', True) guess_bonds = kwargs.pop('guess_bonds', 'all') ph = kwargs.pop('ph', None) # add all vdW radii (in A) as defaults vdwradiiA = {k: v * 10 for k, v in vdwradii_.items()} vdwradiiA.update( {k: v for k, v in kwargs.pop('vdwradii', {}).items()}) # no uppercase translation biounit = kwargs.pop('biounit', False) assig_Types = kwargs.pop('assignTypes', {}) solvent = kwargs.pop('guess_bonds', '1h2o1') temperature = kwargs.pop('temperature', 293.15) # create universe, postpone bonds to end to have more control kwargs.update({'guess_bonds': False, 'vdwradii': vdwradiiA}) if isinstance(args[0], str): # creating scattering universe from a file or PDB ID that will be downloaded and hydrogens added if os.path.exists(args[0]): # prefer existing files pass elif os.path.splitext(args[0])[1] in ['', '.pdb1', '.pdb2'] and \ re.match('^[0-9][a-z0-9]{3}$', os.path.splitext(args[0])[0].lower()): # download pdb id and requested biounit; pdb1,pdb2 is biounit, '' only PDB id if os.path.splitext(args[0])[1] in ['.pdb1', '.pdb2'] or biounit: arg0 = fetch_pdb(id=args[0], biounit=True) arg0 = mergePDBModel(arg0) else: arg0 = fetch_pdb(id=args[0]) args = (arg0,) + args[1:] if addHydrogen and (os.path.splitext(args[0])[1] == '.pdb' and isinstance(args[0], (str, os.PathLike))): if pymol2 and addHydrogen == 'pymol': pdb_h = addH_Pymol(args[0]) print('Used PyMol to add hydrogens.') args = (pdb_h,) + args[1:] else: # add hydrogens using pdb2pqr and missing atoms debump = kwargs.pop('debump', False) opt = kwargs.pop('opt', False) pqr_h, pdb_h = fastpdb2pqr(args[0], debump=debump, opt=opt, ph=ph) print('Used pdb2pqr to add hydrogens.') args = (pdb_h,) + args[1:] # except ValueError: # Warning('No hydrogens added, maybe not an atomic structure but coarse grained as CA model?') # init universe with updated args super(scatteringUniverse, self).__init__(*args, **kwargs) elif isinstance(args[0], MDAnalysis.Universe): # create universe with topology from input nargs = (args[0]._topology.copy(),)+args[1:] kwargs.update({'addHydrogen': False}) super(scatteringUniverse, self).__init__(*nargs, **kwargs) self.trajectory = args[0].trajectory.copy() self.dimensions = args[0].dimensions elif isinstance(args[0], MDAnalysis.core.topology.Topology): # create from topology e.g. from a copy nargs = (args[0].copy(),)+args[1:] kwargs.update({'addHydrogen': False}) super(scatteringUniverse, self).__init__(*nargs, **kwargs) else: raise TypeError('Cannot create universe from input arguments.') # try to add types (to get elements for scattering amplitudes) # e.g. psf files might contain 'numbers' as types, but we need element types # later we might switch to elements when these are better defined if assig_Types.pop('from_names', {}): self.atoms.types = guess_types(self.atoms.names) if assig_Types: for k, v in assig_Types.items(): ak = self.select_atoms('type ' + k) ak.types = np.array([v] * ak.n_atoms) if not set(self.atoms.types).issubset(vdwradiiA.keys()) and hasattr(self.atoms, 'names'): # in case try to determine from names raise TypeError('Mismatch in types and vdwradii.', set(self.atoms.types).difference(vdwradiiA.keys()), 'Translate to capital letters like vdwradii={"Na": js.bio.vdwradii["NA"]} ') if guess_bonds: self.guess_bonds(guess_bonds=guess_bonds, vdwradii=vdwradiiA) # add default solvent H2O and temperature self.setSolvent(solvent, temperature=temperature) # now add all Attributes for N+X scattering # prepare all needed scattering attributes addNXAttributes(self, vdwradiiA)
[docs] def setSolvent(self, solvent=None, density=None, temperature=None, numDensity=None, units='mol'): """ Set solvent emdedding the universe to calculate scattering lengths and contrast. Usage only for changing solvent. Other parameters changed automatically. Parameters ---------- solvent : list of string Describes solvent composition with molar fractions. If None universe.solvent is used. A chemical formula with fraction+[lettercode+number]+..... - e.g.'D2O1' or 'H2O1' for water and heavy water - ['0.6D2O1','0.4H2O1'] for a mixture of 0.6 hwater and 0.4 water by mol fraction - ['6D2O1','4H2O1'] for a mixture of 6 hwater and 4 water as mixed mol - default is pure h2o density : optional, default=None, float Density of the solvent in units g/cm³. Explicit given values force the density to be different from tabulated values! By default (None) tabulated values according to formel.waterdensity as given in solvent are use. See :py:func:'jscatter.formel.physics.waterdensity' for specific references. numDensity: optional, default=None, float Number density of solvent molecules in units 1/nm³. Forces scaling of related values to different number density e.g. in a simulation box. Overrides density above and is intended only for simulation boxes. - 0 : resets to above density values. - None : water with components as given in universe.solvent is used. This is the default for protein scattering from PDB structures (like implicit solvent). See :py:func:'jscatter.formel.waterdensity' for specific references. - float: For explicit solvent universes from MD simulation the values gives the total number density of all solvent molecules used in the MD simulation. temperature : float Temperature of the universe in units K. units : 'mol' Units used in solvent as mol or mass. Anything except 'mol' (default) is mass fraction . e.g. 1l Water with 123mmol NaCl ['55.5H2O1','0.123Na1Cl1'] See :py:func:'jscatter.formel.scatteringLengthDensityCalc'. Returns ------- Sets universe attributes solventDensity, numDensitySol, bcDensitySol, edensitySol, b2_incSol,d2oFract, xsldSol Notes ----- - :py:func:`~jscatter.formel.scatteringLengthDensityCalc` is used to calc solvent scattering length (units=mol). - For a simulation box with water it is important to have the correct water numDensity some distance away from the protein surface (outside of the hydration layer) to get the right contrast to the embedding solvent. Simulation boxes have not always the correct waterdensity. This is automatically taken into account. - For PDB structures or simulation with implicit solvent the density is waterdensity. - For simulation boxes the numberdensity of the initial box is usually fixed and not changed (constant volume simulation over constant pressure). If the initial equal distributed water forms a hydration layer with higher density around the protein, consequently the density outside the hydration layer must decrease. To correctly determine the contrast we need the numberdensity of the solvent outside of the hydration layer. We can extract this by selecting water some distance away from the protein surfece. In the same way we can approximate the numberdensity in the hydration layer. """ u = self.universe u.temperature = temperature if temperature is not None else u.temperature u.solvent = solvent if solvent is not None else u.solvent if numDensity == 0: # to reset to water values u.solventDensity = None numDensity = None # density = None forces to use tabulated values for water or buffers u.solventDensity = density if density is not None else None # determine from water if no explicit values are given # water density is calculated in scatteringLengthDensityCalc from formel.waterdensity xsld, edensity, nsld, incsld, mass, massfullprotonated, massfulldeuterated, dhfraction, numdensity, density = \ formel.scatteringLengthDensityCalc(u.solvent, u.solventDensity, T=u.temperature, mode='all2', units=units) if numDensity is not None: # scaling factor according to atom numberDensity f = numDensity / sum(numdensity) else: f = 1 self.solventDensity = f * density self._numDensitySol = f * sum(numdensity) self._bcDensitySol = f * nsld self._edensitySol = f * edensity self._b2_incSol = f * incsld self._d2oFract = dhfraction self._xsldSol = f * xsld
[docs] def guess_bonds(self, guess_bonds='all', vdwradii=None): r""" Guess bonds according to distance between atoms. Bonds between atoms are created, if the two atoms are within :math:`d < f \cdot (R_1 + R_2)` of each other, where :math:`R_{1,2}` are the VdW radii of the atoms and :math:`f=0.55` is an ad-hoc *fudge_factor*. This is the same algorithm that VMD uses but with f=0.6. Parameters ---------- vdwradii : dict Dictionary with {name:value[A]} pairs for vdWaals radii. guess_bonds : MDAnalysis selection string, default='all' Guess bonds for atoms selected by selection string. Bonds are needed for correct hydrogen exchange (e.g. H bonded to 'O,N,S' exchange with D2O to D) in neutron scattering. See hdexchange. For X-ray scattering or if all bonds are present in the topology set to *False*. For trajectories with a lot of solvent, it takes time to determine solvent bonds. Selecting only parts like ``guess_bonds='protein'`` or ``guess_bonds='not segid seg_1_SOL'`` shortcuts this. The solvent *H* needs to be explicitly set ``.hdexchange`` or the atomtype to *D*. Returns ------- None, bonds are created as atomAttribute ``bonds``. Notes ----- Atomic van der Waals radii (units A) passed to universe are used to identify bonds between atoms using the universe.guess_bonds method of MDAnalysis. The default radii are according to `<https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)#Van_der_Waals_radius>`_ (mainly Bondi reference). These values are larger than the radii for SESvolume calculation (:py:func:`~.bio.mda.getSurfaceVolumePoints`) to identify all bonds. """ try: self.select_atoms(guess_bonds).guess_bonds(vdwradii) except Exception as ex: # e.g. LAMPSdata BONDS throw an error message = "An exception of type {0} occurred. Arguments:\n{1!r}".format(type(ex).__name__, ex.args) print('Bonds cannot be guessed because of following error:') print(message)
@property def bcDensitySol(self): """ Coherent neutron scattering length density of solvent. """ self.setSolvent() return self._bcDensitySol @bcDensitySol.setter def bcDensitySol(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!') @property def b2_incSol(self): """ Incoherent neutron scattering length density solvent. """ self.setSolvent() return self._b2_incSol @b2_incSol.setter def b2_incSol(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!') @property def d2oFract(self): """ D2O number fraction in solvent. """ self.setSolvent() return self._d2oFract @d2oFract.setter def d2oFract(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!') @property def xsldSol(self): """X-ray scattering length of solvent.""" self.setSolvent() return self._xsldSol @xsldSol.setter def xsldSol(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!') @property def edensitySol(self): """Electron density solvent in e/nm³""" self.setSolvent() return self._edensitySol @edensitySol.setter def edensitySol(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!') @property def numDensitySol(self): """Number density solvent in 1/nm³""" self.setSolvent() return self._numDensitySol @numDensitySol.setter def numDensitySol(self, v): # only to get a proper Error raise AttributeError('Change this property using universe.setSolvent!')
[docs] def copy(self): """Return an independent copy of this Universe""" # copy is made in __class__ # TODO check which additional attributes are needed to be copied new = self.__class__(self._topology, addHydrogen=False, guess_bonds=False, vdwradii=self.vdWradiiA) new.trajectory = self.trajectory.copy() new.dimensions = self.dimensions.copy() return new
[docs] def view(self, select='all', frames=None, viewer=''): """ View the actual configuration in a selected viewer. Parameters ---------- select : string, default = 'all' Selection string as in select_atoms. frames : array-like or slice or FrameIteratorBase or str, optional An ensemble of frames to write. The ensemble can be a list or array of frame indices, a mask of booleans, an instance of :class:`slice`, or the value returned when a trajectory is indexed. By default, `frames` is set to ``None`` and only the current frame is written. If `frames` is set to "all", then all the frame from trajectory are written. viewer : 'pymol', 'vmd', 'chimera', or full path Viewer to show pdb structure. - If the programm is in the PATH the short name is enough. - The full path can be specified. - default is to use the first existing. Examples -------- Normal modes are just used to generate a trajectory. :: import jscatter as js # view single structure uni = js.bio.scatteringUniverse('3RN3.pdb') uni.view(select='protein') # normal mode anmation saved as trajectory uni = js.bio.scatteringUniverse(js.examples.datapath+'/arg61.pdb',addHydrogen=False) u = uni.select_atoms("protein and name CA") nm = js.bio.NM(u, cutoff=10) moving = nm.animate([6,7,8,9], scale=30) # as trajectory # view trajectory moving.view(viewer="pymol", frames="all") # animate and show nm.animate([6, 7, 8, 9], scale=30).view(viewer="pymol", frames="all") """ if viewer: viewerpath = shutil.which(viewer) if not viewerpath: raise FileNotFoundError(f'The executable {viewer} is not found in PATH.') else: if sys.platform.startswith('linux'): # viewerpath = 'xdg-open' # this closes before the file was read by the opend pdb viewr for viewer in ['pymol', 'vmd', 'chimera']: viewerpath = shutil.which(viewer) if viewerpath: break elif sys.platform.startswith('darwin'): viewerpath = 'open' elif sys.platform == 'win32': os.startfile(tfile) atoms = self.select_atoms(select) with tempfile.NamedTemporaryFile(suffix='.pdb') as tfile: with MDAnalysis.Writer(tfile.name, atoms.n_atoms, multiframe=True) as w: with warnings.catch_warnings(): warnings.simplefilter('ignore') if frames is None: # current frame w.write(atoms) elif isinstance(frames, slice): for ts in self.trajectory[frames]: w.write(atoms) elif frames == 'all': for ts in self.trajectory: w.write(atoms) else: # whatever else was given like list of indices for ts in self.trajectory[frames]: w.write(atoms) print('saved to temporary file ', tfile.name) view_process = subprocess.run([viewerpath, tfile.name])
@property def boxVolume(self): """ Volume of universe box in A³ """ try: return MDAnalysis.lib.mdamath.box_volume(self.dimensions) except: raise ValueError('No dimesnion in universe. ') @property def qlist(self): """ Scattering vectors """ return self._qlist @qlist.setter def qlist(self, ql): # force ndarray self._qlist = np.atleast_1d(ql)
[docs] def fetch_pdb(id, path='./', biounit=False): """ Fetch id from pdb databank at http://www.rcsb.org/ id : string PDB id 4 letter code of protein structure path : string path where to store the file biounit : bool, int Download biounit/assembly1 with ending *.pdb[biounit]* as e.g. *.pdb1*. Returns ------- Saves gunziped file and returns corresponding path. Notes ----- Biounit/assembly can be downloaded using file ending *.pdb1* or *.pdb2* """ ids = id.split('.') end = '' assembly = '' if len(ids)>1 and ids[1][-1] in ['1', '2', '3']: end = ids[1][-1] assembly = '_' + end elif isinstance(biounit, bool): if biounit: end = '1' assembly = '_' + end elif isinstance(biounit, int): end = str(biounit) assembly = '_' + end outfile = path + f'{ids[0]}{assembly}.pdb' url = f'http://www.rcsb.org/pdb/files/{ids[0]}.pdb{end}.gz' print('Try to download > ', url) print('save as ', outfile) try: with urllib.request.urlopen(url) as zfile: with gzip.GzipFile(fileobj=io.BytesIO(zfile.read())) as uncompressed: content = uncompressed.read() with open(outfile, 'wb') as f: f.write(content) except Exception as e: print(e) return 0 return outfile
[docs] def mergePDBModel(pdb): """ Merge models in PDB structure from biological assembly to single model. Biological units are saved as multi model PDB files. mergePDBmodel merges these multiple models into one model that can be read by MDAnalysis or other programs as multimeric protein. Parameters ---------- pdb : string,filename PDB id or filename with models to merge. Returns ------- merged filename Examples -------- Fetch ferritin (24-mer) 1lb3 biological assembly and merge to one frame. The example needs some time :: import jscatter as js fer = js.bio.fetch_pdb('1lb3',biounit=True) merged_filename = js.bio.mergePDBModel(fer) # to show it uni = js.bio.scatteringUniverse(merged_filename, addHydrogen=False) uni.view(viewer='pymol') """ u = MDAnalysis.Universe(pdb) segid = u.segments.segids models = [] if u.trajectory.n_frames <=1: raise TypeError('Only a single model is in pdb.') for ts in range(u.trajectory.n_frames): u.trajectory[ts] models.append(u.copy()) segid = [chr(ord(id) + len(np.unique(segid)) ) for id in segid] models[-1].segments.segids = np.array(segid, dtype=object) mergeduniverse = MDAnalysis.core.universe.Merge(*[m.atoms for m in models]) pdb, end = os.path.splitext(pdb) new = pdb + '_merged' + end mergeduniverse.atoms.write(new) return new
def _getargs(input_pdb, output, *args, **kwargs): # transform args,kwargs tor arg for pdb2pqr as list of strings argdefault = ['nodebump', 'noopt'] for default in argdefault: if default not in args: args = args + (default,) arg = [input_pdb, output] + ['--' + a.replace('_', '-') for a in args] if 'log-level' not in kwargs: kwargs.update({'log-level':'ERROR'}) for k,v in kwargs.items(): arg = arg + ['--' + k.replace('_', '-'), v] return arg def get_old_header(pdblist, header='short'): """ Get old header from list of :mod:`pdb` objects. header as 'short', 'all' """ header_types = (pdb2pqr_pdb.CRYST1, # added compared to pdb2pqr #pdb2pqr_pdb.CONECT, # added compared to pdb2pqr pdb2pqr_pdb.HEADER, pdb2pqr_pdb.TITLE ) if header == 'all': header_types +=( pdb2pqr_pdb.COMPND, pdb2pqr_pdb.SOURCE, pdb2pqr_pdb.KEYWDS, pdb2pqr_pdb.EXPDTA, pdb2pqr_pdb.AUTHOR, pdb2pqr_pdb.REVDAT, pdb2pqr_pdb.JRNL, pdb2pqr_pdb.REMARK, pdb2pqr_pdb.SPRSDE, pdb2pqr_pdb.NUMMDL, ) old_header = io.StringIO() for pdb_obj in pdblist: # there is a bug in the original function with break if isinstance(pdb_obj, header_types): old_header.write(str(pdb_obj)) old_header.write("\n") return old_header.getvalue() def print_pdb(args, pdb_lines, header_lines, missing_lines, is_cif): """Print PDB-format output to specified file In PDB2PQR this function ignores hader lines which i want here args : argparse.Namespace command-line arguments pdb_lines : output lines (records) header_lines : header lines missing_lines : lines describing missing atoms is_cif : flag indicating CIF format """ with open(args.pdb_output, "wt") as outfile: # Adding whitespaces if --whitespace is in the options if missing_lines: pass for line in header_lines: outfile.write(line) for line in pdb_lines: if line[0:3] != "TER" or not is_cif: outfile.write(line)
[docs] def pdb2pqr(input_pdb, output, *args, **kwargs): """ Adds hydrogens to pdb structure, optional determines charges and repairs missing atoms. Interface to pdb2pqr in interactive shell. Original source is at [1]_, [2]_, Please cite [3]_,[4]_. From original documentation at [2]_ : Adding a limited number of missing heavy (non-hydrogen) atoms to biomolecular structures. Estimating titration states and protonating biomolecules in a manner consistent with favorable hydrogen bonding. Assigning charge and radius parameters from a variety of force fields. Generating “PQR” output compatible with several popular computational modeling and analysis packages. Parameters ---------- input_pdb : string Path to input pdb file. If only the pdb ID the corresponding file is downloaded. output : string Path of output file. args : strings Positional arguments are prepended by --xxx to represent options without input parameter If '-' is in key exchange it by underscore '_'. kwargs : pairs key=value Keyword arguments for options with input value. passed as '--key value ' If '-' is in key exchange it by underscore '_'. Notes ----- The PDB2PQR tool prepares structures for further calculations (by APBS) by reconstructing missing atoms, **adding hydrogens**, assigning atomic charges and radii from specified force fields, and generating PQR files. `APBS <http://www.poissonboltzmann.org/>`_ solves the equations of continuum electrostatics for large biomolecular assemblies. Several programs use a modified PDB format called PQR, in which atomic partial charge (Q) and radius (R) fields follow the X,Y,Z coordinate fields in ATOM and HETATM records. See `PDB2PQR Server <https://server.poissonboltzmann.org/pdb2pqr>`_ or `PDB2PQR Homepage <https://server.poissonboltzmann.org/pdb2pqr>`_ There are other programs that allow addition of hydrogens: - `CHARMM GUI <http://www.charmm-gui.org/?doc=input/pdbreader>`_ provides a web-based graphical user interface to generate various molecular simulation systems and input files. The output of *Input Generator/PDB Reader* in the file *step1_pdbreader.pdb* contains hydrogen atoms. - PSFGEN is a tool (included in **VMD** or `NAMD <https://www.ks.uiuc.edu/Research/namd/>`_ ) to generate a protein structure file (PSF) for MD Simulations from a PDB structure. It can be used from `VMD <https://www.ks.uiuc.edu/Research/vmd/>`_ using the *Automatic PSF Builder* or on the `command line <https://www.ks.uiuc.edu/Research/vmd/plugins/psfgen/>`_. - HAAD https://zhanglab.ccmb.med.umich.edu/HAAD/ https://doi.org/10.1371/journal.pone.0006701 **Original help from pdb2pqr30** :: PDB2PQR v3.1.0+15.g41d841a.dirty: biomolecular structure conversion software. positional arguments: input_path Input PDB path or ID (to be retrieved from RCSB database output_pqr Output PQR path optional arguments: -h, --help show this help message and exit --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL} Logging level (default: INFO) Mandatory options: One of the following options must be used --ff {AMBER,CHARMM,PARSE,TYL06,PEOEPB,SWANSON} The forcefield to use. (default: PARSE) --userff USERFF The user-created forcefield file to use. Requires --usernames and overrides --ff (default: None) --clean Do no optimization, atom addition, or parameter assignment, just return the original PDB file in aligned format. Overrides --ff and --userff (default: False) General options: --nodebump Do not perform the debumping operation (default: True) --noopt Do not perform hydrogen optimization (default: True) --keep-chain Keep the chain ID in the output PQR file (default: False) --assign-only Only assign charges and radii - do not add atoms, debump, or optimize. (default: False) --ffout {AMBER,CHARMM,PARSE,TYL06,PEOEPB,SWANSON} Instead of using the standard canonical naming scheme for residue and atom names, use the names from the given forcefield (default: None) --usernames USERNAMES The user-created names file to use. Required if using --userff (default: None) --apbs-input APBS_INPUT Create a template APBS input file based on the generated PQR file at the specified location. (default: None) --pdb-output PDB_OUTPUT Create a PDB file based on input. This will be missing charges and radii (default: None) --ligand LIGAND Calculate the parameters for the specified MOL2-format ligand at the path specified by this option. (default: None) --whitespace Insert whitespaces between atom name and residue name, between x and y, and between y and z. (default: False) --neutraln Make the N-terminus of a protein neutral (default is charged). Requires PARSE force field. (default: False) --neutralc Make the C-terminus of a protein neutral (default is charged). Requires PARSE force field. (default: False) --drop-water Drop waters before processing biomolecule. (default: False) --include-header Include pdb header in pqr file. WARNING: The resulting PQR file will not work with APBS versions prior to 1.5 (default: False) pKa options: Options for titration calculations --titration-state-method {propka} Method used to calculate titration states. If a titration state method is selected, titratable residue charge states will be set by the pH value supplied by --with_ph (default: None) --with-ph PH pH values to use when applying the results of the selected pH calculation method. (default: 7.0) PROPKA invocation options: -f FILENAMES, --file FILENAMES read data from <filename>, i.e. <filename> is added to arguments (default: []) -r REFERENCE, --reference REFERENCE setting which reference to use for stability calculations [neutral/low-pH] (default: neutral) -c CHAINS, --chain CHAINS creating the protein with only a specified chain. Specify " " for chains without ID [all] (default: None) -i TITRATE_ONLY, --titrate_only TITRATE_ONLY Treat only the specified residues as titratable. Value should be a comma-separated list of "chain:resnum" values; for example: -i "A:10,A:11" (default: None) -t THERMOPHILES, --thermophile THERMOPHILES defining a thermophile filename; usually used in 'alignment-mutations' (default: None) -a ALIGNMENT, --alignment ALIGNMENT alignment file connecting <filename> and <thermophile> [<thermophile>.pir] (default: None) -m MUTATIONS, --mutation MUTATIONS specifying mutation labels which is used to modify <filename> according to, e.g. N25R/N181D (default: None) --version show program's version number and exit -p PARAMETERS, --parameters PARAMETERS set the parameter file [{default:s}] (default: /home/biehl/local/lib/python3.9/site-packages/propka/propka.cfg) -o PH, --pH PH setting pH-value used in e.g. stability calculations [7.0] (default: 7.0) -w WINDOW WINDOW WINDOW, --window WINDOW WINDOW WINDOW setting the pH-window to show e.g. stability profiles [0.0, 14.0, 1.0] (default: (0.0, 14.0, 1.0)) -g GRID GRID GRID, --grid GRID GRID GRID setting the pH-grid to calculate e.g. stability related properties [0.0, 14.0, 0.1] (default: (0.0, 14.0, 0.1)) --mutator MUTATOR setting approach for mutating <filename> [alignment/scwrl/jackal] (default: None) --mutator-option MUTATOR_OPTIONS setting property for mutator [e.g. type="side-chain"] (default: None) -d, --display-coupled-residues Displays alternative pKa values due to coupling of titratable groups (default: False) -l, --reuse-ligand-mol2-files Reuses the ligand mol2 files allowing the user to alter ligand bond orders (default: False) -k, --keep-protons Keep protons in input file (default: False) -q, --quiet suppress non-warning messages (default: None) --protonate-all Protonate all atoms (will not influence pKa calculation) (default: False) References ---------- .. [1] https://pdb2pqr.readthedocs.io/en/latest/ .. [2] https://github.com/Electrostatics/electrostatics.github.io .. [3] Improvements to the APBS biomolecular solvation software suite. Jurrus E, et al. Protein Sci 27 112-128 (2018). .. [4] PDB2PQR: expanding and upgrading automated preparation of biomolecular structures for molecular simulations. Dolinsky TJ, et al. Nucleic Acids Res 35 W522-W525 (2007). """ arglist = _getargs(input_pdb, output, *args, **kwargs) parser = build_main_parser() p2q_args = parser.parse_args(arglist) main_driver(p2q_args)
[docs] def fastpdb2pqr(input_pdb, debump=False, opt=False, drop_water=True, ph=None, whitespace=True): """ A fast version of :py:func:`~.bio.mda.pdb2pqr`. Speedup is achieved by omitting optimization, debumping, minimized logging and reducing options. For full options use pdb2pqr. Parameters ---------- input_pdb : str Input pdb file. debump : bool, default False Debump added atoms, ensure that new atoms are not rebuilt too close to existing atoms. opt : bool, default False Perform hydrogen optimization, default is not to do it, Adjusts hydrogen positions and flips certain side chains (His, Asn, Glu) as needed to optimize hydrogen bonds. drop_water : bool Drop water atoms. ph : float, default None pH value to use for assignment of charges. If None pH 7 is assumed but PROPKA is not used. Cite [5]_ [6]_ if using charge assignments by PROPKA. whitespace : bool Insert whitespaces between atom name and residue name, between x and y, and between y and z. This improves readability but breaks PDB file definition. Returns ------- input_pdb.pqr, input_pdb_h.pdb (without previous suffix) Notes ----- Uses default options of pdb2pqr except of these. - debump = False - opt = False - drop_water = True ; this reduces just te number of atoms not to get errors in mda - whitespace; mda has problems as somewhere split() is used instead of char numbers as defined for pqr files - pdb_output = input_pdb with prefix appended '_h' Examples -------- Use fastpdb2pqr and combine the '_h.pdb' file including ligands but without charges with the '_h.pqr file' that cotains charges but no ligands. To get charge states of ligands please use the web services or programs mentioned in :py:func:`pdb2pqr` . Charges can be added manually for the ligands. :: import jscatter as js import MDAnalysis as mda # this adds hydrogens to uni with ligands and adds charges in the corresponding '.pqr' file uligand = js.bio.scatteringUniverse('3pgk') uligand.add_TopologyAttr('charges') # all charges are zero ucharge = js.bio.scatteringUniverse('3pgk_h.pqr') protein = ucharge.select_atoms('protein').atoms for l,c in zip(uligand.select_atoms('protein').residues, ucharge.select_atoms('protein').residues): try: # this throws an error if len(charges) is different; in that way same residues get correct charge l.atoms.charges =c.atoms.charges except: print('---',l.resnum,c.resnum) uligand.atoms.charges.sum() # = -1 References ---------- .. [1] https://pdb2pqr.readthedocs.io/en/latest/ .. [2] https://github.com/Electrostatics/electrostatics.github.io .. [3] Improvements to the APBS biomolecular solvation software suite. Jurrus E, et al. Protein Sci 27 112-128 (2018). .. [4] PDB2PQR: expanding and upgrading automated preparation of biomolecular structures for molecular simulations. Dolinsky TJ, et al. Nucleic Acids Res 35 W522-W525 (2007). .. [5] Improved Treatment of Ligands and Coupling Effects in Empirical Calculation and Rationalization of pKa Values Sondergaard, Chresten R., Mats HM Olsson, Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and Computation 7, (2011): 2284-2295. .. [6] PROPKA3: consistent treatment of internal and surface residues in empirical pKa predictions. Olsson, Mats HM, Chresten R. Sondergaard, Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and Computation 7, no. 2 (2011): 525-537. """ # define fast arguments for adding H output_pqr = os.path.splitext(input_pdb)[0] + '_h.pqr' output_pdb = os.path.splitext(input_pdb)[0] + '_h.pdb' if ph: arglist = _getargs(input_pdb, output_pqr, with_ph=f'{ph:.1f}', titration_state_method='propka') else: arglist = _getargs(input_pdb, output_pqr) parser = build_main_parser() args = parser.parse_args(arglist) # set some args to get fast result args.debump = debump args.opt = opt args.assign_only = False args.clean = False args.userff = None args.ffout = None args.neutraln = False args.neutralc = False args.ligand = None args.keep_chain = True args.drop_water = drop_water args.whitespace = whitespace args.pdb_output = output_pdb definition = pdb2pqr_io.get_definitions() pdblist, is_cif = pdb2pqr_io.get_molecule(input_pdb) if args.drop_water: pdblist = pdb2pqr_drop_water(pdblist) biomolecule, definition, ligand = pdb2pqr_setup_molecule(pdblist, definition, args.ligand) biomolecule.set_termini(args.neutraln, args.neutralc) biomolecule.update_bonds() # do the magic results = pdb2pqr_non_trivial(args=args, biomolecule=biomolecule, ligand=ligand, definition=definition, is_cif=is_cif) # write output print('write ', output_pqr) pdb2pqr_print_pqr(args=args, pqr_lines=results["lines"], header_lines=results["header"], missing_lines=results["missed_residues"], is_cif=is_cif) print('write ', output_pdb) pdb_lines = pdb2pqr_io.print_biomolecule_atoms(biomolecule.atoms, chainflag=args.keep_chain, pdbfile=True) header_lines = get_old_header(pdblist) print_pdb(args=args, pdb_lines=pdb_lines, header_lines=header_lines, missing_lines=results["missed_residues"], is_cif=is_cif) return output_pqr, output_pdb
[docs] def addH_Pymol(pdbid): """ Add hydrogens to pdb file using PyMol if present. This works only for PyMol installations if `pymol2` can be imported. `pymol2` provides an additional interface to PyMol. Alternativly this can be done by saving to PDB file ; using PymMol and save it again. Parameters ---------- pdbid : string PDB id or filename of PDB file Returns ------- Filename of saved file. Notes ----- From https://pymolwiki.org/index.php/H_Add :: PyMOL fills missing valences but does no optimization of hydroxyl rotamers. Also, many crystal structures have bogus or arbitrary ASN/GLN/HIS orientations. Getting the proper amide rotamers and imidazole tautomers & protonation states assigned is a nontrivial computational chemistry project involving electrostatic potential calculations and a combinatorial search. There's also the issue of solvent & counter-ions present in systems like aspartyl proteases with adjacent carboxylates . For higher accuracy (optimization) use :py:func:`~jscatter.bio.mda.pdb2pqr` . """ # TODO simplify this ? if pymol2: p1 = pymol2.PyMOL() p1.start() if os.path.isfile(pdbid): pdb = pdbid p1.cmd.load(pdb) pdb, end = os.path.splitext(pdb) else: pdb = os.path.splitext(pdbid)[0].lower() p1.cmd.fetch(pdb) end = '.pdb' # remove alternate conformations # TODO select alternate location p1.cmd.remove('not alt ""+A') p1.cmd.alter('all', 'alt=""') p1.cmd.h_add() # save it p1.cmd.save(pdb+'_h'+end) p1.stop() return pdb+'_h'+end else: raise ModuleNotFoundError('pymol not installed. Use pdb2pqr.')