# -*- coding: utf-8 -*-
# written by Ralf Biehl at the Forschungszentrum Jülich ,
# Jülich Center for Neutron Science 1 and Institute of Complex Systems 1
# Jscatter is a program to read, analyse and plot data
# Copyright (C) 2015-2024 Ralf Biehl
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
mda module contains functions to prepare/use MDAnalysis universes for scattering.
scattering length is in nm units
"""
import os
import sys
import re
import numbers
from collections import defaultdict
import gzip
import io
import urllib
import shutil
import tempfile
import subprocess
import warnings
import multiprocessing as mp
from multiprocessing.pool import ThreadPool
import numpy as np
import numpy.linalg as la
import scipy
from scipy.interpolate import interp1d
import MDAnalysis
from MDAnalysis.core.topologyattrs import AtomAttr, ResidueAttr, AtomStringAttr, Atomtypes, Atomnames
from MDAnalysis.core.groups import Atom, AtomGroup, Residue, ResidueGroup
from MDAnalysis.topology.guessers import guess_types
from pdb2pqr.main import build_main_parser, main_driver, \
drop_water as pdb2pqr_drop_water, \
setup_molecule as pdb2pqr_setup_molecule,\
non_trivial as pdb2pqr_non_trivial, \
print_pqr as pdb2pqr_print_pqr, \
print_pdb as pdb2pqr_print_pdb
import pdb2pqr.io as pdb2pqr_io
import pdb2pqr.pdb as pdb2pqr_pdb
try:
import pymol2
except (ModuleNotFoundError, ImportError):
pymol2 = False
from .. import data
from .. import formel
# noinspection PyUnresolvedReferences
try:
from ..libs import fscatter
except ImportError:
ImportWarning('bio module needs a Fortran compiler to work. '
'Reinstall Jscatter with Fortran Compiler')
from ..libs import SASAsurface as sasa
Nscatlength = data.Nscatlength.copy()
neutronFFgroup = data.neutronFFgroup.copy()
xrayFFatomic = data.xrayFFatomic.copy()
xrayFFatomicdummy = data.xrayFFatomicdummy.copy()
xrayFFgroupdummy = data.xrayFFgroupdummy.copy()
xrayFFgroup = data.xrayFFgroup.copy()
vdwradii_ = {k.upper(): v for k, v in data.vdwradii.copy().items()}
pi = np.pi
identity3x3 = np.identity(3)
zero3x3 = np.zeros((3, 3))
QLIST = data.QLIST
# add deuterium to MDAnalysis tables
MDAnalysis.topology.tables.masses['D'] = data.Elements['d'][1]
MDAnalysis.topology.tables.vdwradii['D'] = MDAnalysis.topology.tables.vdwradii['H']
__all__ = ['QLIST', 'copyUnivProp', 'getSurfaceVolumePoints', 'getNativeContacts', 'getDistanceMatrix', 'scatteringUniverse',
'pdb2pqr', 'fastpdb2pqr', 'addH_Pymol', 'fetch_pdb', 'mergePDBModel', 'xrayFFgroup', 'neutronFFgroup',
'occupiedVolume']
ucopylist = ['d2oFract', 'temperature', 'solvent', 'solventDensity', 'error',
'amideHexFract', 'histidinExchange', 'bcDensitySol', 'b2_incSol', 'probe_radius', 'shellThickness',
'iscalphamodel', 'explicitResidueFormFactorAmpl', 'xsldSol', 'edensitySol', 'numDensitySol',
'SESVolume', 'SASVolume', 'SASArea',
'hDelocalisation', 'xDelocalisation', 'solventDelocalisation']
[docs]
def copyUnivProp(universe, objekt, addlist=[]):
"""
Copies important universe properties from universe to object if they exist.
The default list is in js.bio.ucopylist
Parameters
----------
universe : MDAnalysis universe
objekt : objekt
Objekt to copy to
addlist : additional attribute list
"""
for attr in ucopylist + addlist:
# noinspection PyBroadException
try:
if hasattr(universe, attr):
if hasattr(getattr(universe, attr), '__call__'):
aa = getattr(universe, attr)() # as test to call
setattr(objekt, attr, getattr(universe, attr)())
else:
setattr(objekt, attr, getattr(universe, attr))
except:
pass
class XRayFormFactor(AtomAttr):
"""
coherent Xray formfactors amplitude in unit nm
"""
attrname = 'faxs'
singular = 'fax'
dtype = np.float32 # object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
QLIST=QLIST
transplants = defaultdict(list)
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros((n_atoms, QLIST.shape[0]))
def get_atoms(self, atomgroup):
# Get values from single atom by interpolation to qlist
fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear')
return fxq(atomgroup.universe.qlist)
def set_atoms(self, atomgroup, values):
# set values for atom
# we set the ff for QLIST (first is zero) and interpolate for qlist in get
if values == 'types':
# use atom types
for typ in np.unique(atomgroup.types):
try:
self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomic[typ.capitalize()].array[1]
except KeyError:
# this happens e.g. for metals like MG to be recognised as type M -> try the names instead
ag = atomgroup.select_atoms('type ' + typ)
for a in ag:
self.values[a.ix] = xrayFFatomic[a.name.capitalize()].array[1]
elif isinstance(values, str):
# set all to same type
self.values[atomgroup.ix] = xrayFFatomic[values.capitalize()].array[1]
elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape:
# explicit given formfactor
self.values[atomgroup.ix] = values
else:
raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.')
def get_residues(self, res_group):
# coherent residue formfactors need to be calculated or taken from saved dict
qlist = res_group.universe.qlist
if isinstance(res_group.ix, numbers.Integral):
try:
if res_group.universe.explicitResidueFormFactorAmpl:
# provoke explicit calculation
raise KeyError()
# get from list, if KeyError it is calculated
return xrayFFgroup[res_group.resname.upper()].interp(qlist)
except KeyError:
# for a single residue
atoms = res_group.atoms
utypes = list(np.unique(atoms.types))
ufax = np.c_[[xrayFFatomic[a.capitalize()][1] for a in utypes]]
ff = np.c_[self.QLIST, ufax.T].T
iff = [utypes.index(a)+1 for a in atoms.types]
fa = [ff[i, 0] for i in iff]
ff[1:, :] = ff[1:, :]/ff[1:, :1] # normalize
# Debye with interpolation
cog = atoms.posnm.mean(axis=0)
res = fscatter.cloud.scattering_debye(qlist,
atoms.posnm - cog, # positions
fa, # atom fax
iff, # formfactor row sequence
ff, # normalized formfactors
0) # parallel
# fa are always positive => sign(sum(fa)) is always positive
val = res[1]**0.5
return val
else:
# for a residue group list
vals = np.empty((res_group.ix.shape[0], len(qlist)))
for i, rg in enumerate(res_group):
vals[i] = rg.fax
return vals
def typesScatteringAmplitudeQ0_ag(ag):
"""xray scattering amplitude atom group
"""
# need to avoid interp in atom.fax for qlist so reconstruct from xrayFFatomicdummy
values = np.zeros_like(ag.ix, dtype=np.float32)
for typ in np.unique(ag.types):
values[ag.types == typ] = xrayFFatomic[typ.capitalize()].array[1, 0]
return values
def typesScatteringAmplitudeQ0_r(r):
"""xray scattering amplitude residue
"""
try:
if r.universe.explicitResidueFormFactorAmpl:
raise KeyError()
return xrayFFgroup[r.resname.upper()].Y[0]
except KeyError:
return r.atoms.fax0().sum()
def typesScatteringAmplitudeQ0_rg(rg):
"""xray scattering amplitude residue group
"""
return np.r_[[r.fax0() for r in rg]]
transplants[AtomGroup].append(('fax0', typesScatteringAmplitudeQ0_ag))
transplants[Residue].append(('fax0', typesScatteringAmplitudeQ0_r))
transplants[ResidueGroup].append(('fax0', typesScatteringAmplitudeQ0_rg))
class XRayFormFactorDummy(AtomAttr):
"""
Coherent Xray formfactor amplitude dummy atoms in unit nm
values contains coherent_scattering_amplitude for global QLIST
Specific qlist values in get_atoms will be linear interpolated
"""
attrname = 'faxdumys'
singular = 'faxdumy'
dtype=np.float32 # object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
QLIST=QLIST
transplants = defaultdict(list)
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros((n_atoms, QLIST.shape[0]))
def get_atoms(self, atomgroup):
# Get values from single atom by interpolation
fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear')
return fxq(atomgroup.universe.qlist)
def set_atoms(self, atomgroup, values):
# set values for atoms for all QLIST
if values=='types':
# use atom types
for typ in np.unique(atomgroup.types):
try:
self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomicdummy[typ.capitalize()].array[1]
except KeyError:
# this happens e.g. for metals like MG recognised as type M -> try the names instead which is MG
ag = atomgroup.select_atoms('type ' + typ)
for a in ag:
self.values[a.ix] = xrayFFatomicdummy[a.name.capitalize()].array[1]
elif isinstance(values, str):
# set all to same type
self.values[atomgroup.ix] = xrayFFatomicdummy[values.capitalize()][1]
elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape:
# explicit given formfactor
self.values[atomgroup.ix] = values
else:
raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.')
def get_residues(self, res_group):
# coherent residue formfactors need to be calculated or taken from saved dict
qlist = res_group.universe.qlist
if isinstance(res_group.ix, numbers.Integral):
try:
if res_group.universe.explicitResidueFormFactorAmpl:
# provoke explicit calculation
raise KeyError()
# get from list, if KeyError it is calculated
return xrayFFgroupdummy[res_group.resname.upper()].interp(qlist)
except KeyError:
# for a single residue
atoms = res_group.atoms
utypes = list(np.unique(atoms.types))
ufax = np.c_[[xrayFFatomicdummy[a.capitalize()][1] for a in utypes]]
ff = np.c_[self.QLIST, ufax.T].T
iff = [utypes.index(a)+1 for a in atoms.types]
fa = [ff[i, 0] for i in iff]
ff[1:, :] = ff[1:, :]/ff[1:, :1] # normalize
cog = atoms.posnm.mean(axis=0)
res = fscatter.cloud.scattering_debye(qlist,
atoms.posnm - cog, # positions
fa, # atom faxdumy
iff, # type index in ff
ff, # normalized formfactors for each type
0) # parallel
val = res[1]**0.5
return val
else:
# for a residue group list
vals = np.empty((res_group.ix.shape[0], len(qlist)))
for i, rg in enumerate(res_group):
vals[i] = rg.faxdumy
return vals
def typesScatteringAmplitudeQ0_ag(ag):
"""xray scattering amplitude dummy atom group
"""
# need to avoid interp in atom.faxdumy for qlist so reconstruct from xrayFFatomicdummy
values = np.zeros_like(ag.ix, dtype=np.float32)
for typ in np.unique(ag.types):
values[ag.types == typ] = xrayFFatomicdummy[typ.capitalize()].array[1, 0]
return values
def typesScatteringAmplitudeQ0_r(r):
"""xray scattering amplitude dummy residue
"""
try:
if r.universe.explicitResidueFormFactorAmpl:
raise KeyError()
return xrayFFgroupdummy[r.resname.upper()].Y[0]
except KeyError:
return r.atoms.fax0dumy().sum()
def typesScatteringAmplitudeQ0_rg(rg):
"""xray scattering amplitude dummy residue group
"""
return np.r_[[r.fax0dumy() for r in rg]]
transplants[AtomGroup].append(('fax0dumy', typesScatteringAmplitudeQ0_ag))
transplants[Residue].append(('fax0dumy', typesScatteringAmplitudeQ0_r))
transplants[ResidueGroup].append(('fax0dumy', typesScatteringAmplitudeQ0_rg))
class incXRayFormFactor(AtomAttr):
"""
incoherent Xray formfactors = formfactor_amplitude**2 in unit nm²
incoherent sums as sum(inx**2) different to coherent
"""
attrname = 'fi2xs'
singular = 'fi2x'
dtype=np.float32 # object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
QLIST=QLIST
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros((n_atoms, QLIST.shape[0]))
def get_atoms(self, atomgroup):
# Get values from single atom
fxq = interp1d(self.QLIST, self.values[atomgroup.ix], kind='linear')
return fxq(atomgroup.universe.qlist)
def set_atoms(self, atomgroup, values):
# set values for atom
if isinstance(values, str):
if values=='types':
# use atom types
for typ in np.unique(atomgroup.types):
try:
self.values[atomgroup.select_atoms('type '+typ).ix] = xrayFFatomic[typ.capitalize()][2]
except KeyError:
# this happens e.g. for metals like MG to be recognised as type M -> try the names instead
ag = atomgroup.select_atoms('type ' + typ)
for a in ag:
self.values[a.ix] = xrayFFatomic[a.name.capitalize()].array[1]
else:
# set all to same type
self.values[atomgroup.ix] = xrayFFatomic[values.capitalize()][2]
elif isinstance(values, np.ndarray) and np.shape(values) == self.QLIST.shape:
# explicit given formfactor
self.values[atomgroup.ix] = values
else:
raise AttributeError('values does not fit allowed XRayFormFactor amplitude input.')
def get_residues(self, res_group):
# coherent residue formfactors need to be calculated or taken from saved dict
qlist = res_group.universe.qlist
if isinstance(res_group.ix, numbers.Integral):
# for a single residue
atoms = res_group.atoms
utypes = list(np.unique(atoms.types))
ufax = np.c_[[xrayFFatomic[a.capitalize()][2] for a in utypes]].sum(axis=0)
vals = np.interp(qlist, self.QLIST, ufax, ufax[0], ufax[-1])
else:
# for a residue group list
vals = np.empty((res_group.ix.shape[0], len(qlist)))
for i, rg in enumerate(res_group):
atoms = rg.atoms
utypes = list(np.unique(atoms.types))
ufax = np.c_[[xrayFFatomic[a.capitalize()][2] for a in utypes]].sum(axis=0)
vals[i] = np.interp(qlist, self.QLIST, ufax, ufax[0], ufax[-1])
return vals
class nFormFactor(AtomAttr):
"""
Neutron coherent scattering amplitude in unit nm
The return value should reflect the deuteration of individual atoms and possible hd exchange
"""
attrname = 'fans'
singular = 'fan'
dtype=np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
QLIST = QLIST
hcoh, hinc = Nscatlength['h']
dcoh, dinc = Nscatlength['d']
transplants = defaultdict(list)
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
return np.zeros(n_atoms)
def get_atoms(self, atomgroup):
# for neutrons the scattering length is a constant independent on q
# return array for all q
# this is frequently changed during fitting and should be always updated
# (maybe in the calling function, then return only scalar)
ql = atomgroup.universe.qlist.shape[0]
# atomic formfactor for neutrons q independent
return np.tile(self.values[atomgroup.ix], (ql, 1)).T
def set_atoms(self, atomgroup, values):
# here we set the values for atomic neutron scattering amplitudes
# including deuteration and hd exchange
# solvent H coh scattering respecting the d2o fraction in solvent
solventcoh = self.hcoh + atomgroup.universe.d2oFract*(self.dcoh - self.hcoh)
if isinstance(values, str):
if values == 'types':
# set it according to types, for H,D with solvent exchange and deuteration
for typ in np.unique(atomgroup.types):
agt = atomgroup.select_atoms('type ' + typ)
if typ == 'H' or typ=='D':
# account for partial deuteration
bd = self.hcoh + agt.deuteration * (self.dcoh-self.hcoh)
# account for possible HD exchange
bdex = bd + agt.hdexchange * (solventcoh - bd)
self.values[agt.ix] = bdex
else:
try:
# some scattering length are complex, we use only real part
self.values[agt.ix] = np.real(Nscatlength[typ.lower()][0])
except KeyError:
# this happens e.g. for metals like MG to be recognised as type M -> try the names instead
for a in agt:
self.values[a.ix] = Nscatlength[a.name.lower()][0]
else:
# set all same type according to string name
try:
self.values[atomgroup.ix] = Nscatlength[values.lower()][0]
except KeyError:
raise KeyError('Key should be an element name like C,O,N,...')
elif isinstance(values, np.ndarray) and (values.shape == atomgroup.ix.shape):
# set explicit values
self.values[atomgroup.ix] = values
else: # isinstance(values, (int,float)):
# explicit values
self.values[atomgroup.ix] = values
def get_residues(self, res_group):
# coherent residue formfactors for residue coarse graining
# these need to be calculated (if all atoms are present)
# or taken from saved dict with averaged values for e.g CA model
# the actual needed qlist of the universe
qlist = res_group.universe.qlist
# a const normalized formfactor for the atoms, QLIST is reference q list
ff=np.c_[self.QLIST, np.ones_like(self.QLIST)].T
if isinstance(res_group.ix, numbers.Integral):
try:
if res_group.universe.explicitResidueFormFactorAmpl:
# provoke explicit calculation
raise KeyError()
# get from list, if KeyError it is calculated
return neutronFFgroup[res_group.resname.upper()].interp(qlist)
except KeyError:
# for a single residue
# We use the Debye scattering to calc it for a residue, interpolation for correct q is done in fscatter
# the sign of fan.sum needs to be recovered as sum(x**2)**0.5 loses contrast information in matching
atoms = res_group.atoms
cog = atoms.posnm.mean(axis=0)
res = fscatter.cloud.scattering_debye(qlist,
atoms.posnm - cog, # positions
self.values[atoms.ix], # fan for atoms
[1]*atoms.ix.shape[0], # all same constant formfactor row 1
ff, # normalized formfactor
0) # ncpu parallel execution
# formfactor amplitude is root of formfactor
# this is valid only for small Q
val = res[1]**0.5 * np.sign(self.values[atoms.ix].sum())
return val
else:
# for a residue group list
vals = np.empty((len(res_group), len(qlist)))
for i, rg in enumerate(res_group):
vals[i] = rg.fan
return vals
def set_residues(self, values):
raise TypeError('Set atom attributes instead of residueAttr.')
def typesScatteringAmplitudeQ0_ag(ag):
"""neutron scattering amplitude atom group
"""
# because of constant atomic neutron formfactor we just take first
return ag.fans[:, 0]
def typesScatteringAmplitudeQ0_r(r):
"""neutron scattering amplitude residue
"""
try:
if r.universe.explicitResidueFormFactorAmpl:
raise KeyError()
return neutronFFgroup[r.resname.upper()].Y[0]
except KeyError:
return r.atoms.fan0().sum()
def typesScatteringAmplitudeQ0_rg(rg):
"""neutron scattering amplitude residue group
"""
return np.r_[[r.fan0() for r in rg]]
transplants[AtomGroup].append(('fan0', typesScatteringAmplitudeQ0_ag))
transplants[Residue].append(('fan0', typesScatteringAmplitudeQ0_r))
transplants[ResidueGroup].append(('fan0', typesScatteringAmplitudeQ0_rg))
class incnFormFactor(AtomAttr):
"""
Incoherent neutron formfactor amplitude as fomrfactor_amplitude**2 in unit nm²
The return value should reflect the deuteration of individual atoms and possible hd exchange
"""
attrname = 'fi2ns'
singular = 'fi2n'
dtype = np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
QLIST = QLIST
hcoh, hinc = Nscatlength['h']
dcoh, dinc = Nscatlength['d']
hinc2=hinc**2
dinc2=dinc**2
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
return np.zeros(n_atoms)
def get_atoms(self, atomgroup):
# solvent scattering length density with d2o fraction
return self.values[atomgroup.ix]
def set_atoms(self, atomgroup, values):
# here we set the values for incoherent atomic neutron scattering amplitudes
if values=='types':
# set it according to types
# solvent H coh scattering respecting the d2o fraction in solvent
solventinc = self.hinc2 + atomgroup.universe.d2oFract*(self.dinc2 - self.hinc2)
for typ in np.unique(atomgroup.types):
agt = atomgroup.select_atoms('type ' + typ)
if typ == 'H' or typ=='D':
# account for partial deuteration
bi = self.hinc2 + agt.deuteration * (self.dinc2-self.hinc2)
# account for possible HD exchange
biex = bi + agt.hdexchange * (solventinc - bi)
self.values[agt.ix] = biex
else:
try:
self.values[agt.ix] = Nscatlength[typ.lower()][1]**2
except KeyError:
# this happens e.g. for metals like MG to be recognised as type M -> try the names instead
for a in agt:
self.values[a.ix] = Nscatlength[a.name.lower()][1]**2
elif isinstance(values, str):
# set all same type
self.values[atomgroup.ix] = Nscatlength[values.lower()][1]**2
elif isinstance(values, np.ndarray) and (values.shape == atomgroup.ix.shape):
# set explicit values
self.values[atomgroup.ix] = values
else: # isinstance(values, (int,float)):
# explicit values
self.values[atomgroup.ix] = values
def get_residues(self, res_group):
# incoherent of a residue is sum of incfin**2
# vals = np.empty(len(res_group))
if isinstance(res_group.ix, numbers.Integral):
return self.values[res_group.atoms.ix].sum()
else:
return np.array([self.values[rg.atoms.ix].sum() for rg in res_group])
class deuteration(AtomAttr):
"""
Deuteration level. Only H can be deuterated to D.
For intermediate levels we use statistical average.
"""
attrname = 'deuteration'
singular = 'deuteration'
dtype=np.float32 # object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros(n_atoms)
def get_atoms(self, atomgroup):
# Get values from single atom including exchange with the solvent
return self.values[atomgroup.ix]
def set_atoms(self, atomgroup, values):
assert (0 <= values <= 1), 'deuteration values should be in interval [0..1] !'
# set values for all in atomgroup
self.values[atomgroup.ix] = values
# update neutron coh + incoh only for the relevant H,D
ag = atomgroup.select_atoms('type H or type D')
ag.fans = 'types'
ag.fi2ns = 'types'
class hdexchangable(AtomAttr):
"""
Deuteration level. Only H can be deuterated to D.
For intermediate levels we use statistical average.
set as 3x tuple with exchangeable fraction for ( H bonded to O,S,sidechain N; backbone N-H, histidine -H*)
default (1, universe.amideHexFract,u.histidinExchange) = (1, 0.9, 0.5)
"""
attrname = 'hdexchange'
singular = 'hdexchange'
dtype=np.float32 # object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros(n_atoms)
def get_atoms(self, atomgroup):
# Get values from single atom including exchange with the solvent
return self.values[atomgroup.ix]
def set_atoms(self, atomgroup, values):
if values == 'default':
# default values for normal conditions
u = atomgroup.universe
values = (1, max(min(u.amideHexFract, 1), 0), max(min(u.histidinExchange, 1), 0))
if isinstance(values, (tuple, set, list)) and np.shape(values) == (3,):
# explicit given fractions
u = atomgroup.universe
if not u.bonds:
# this may need vdwradii for unknown types
u.atoms.guess_bonds()
try:
# backbone H bonded to N exchange to fraction 0.9
backboneH = atomgroup.select_atoms('type H and bonded type N and bonded backbone')
self.values[backboneH.ix] = values[1]
except AttributeError:
# noch backbone present
pass
try:
# histidin hydrogens can exchange to fraction 0.5
histidinH = atomgroup.select_atoms('(resname HSD or resname HIS) and type H*')
self.values[histidinH.ix] = values[2]
except AttributeError:
# noch residues
pass
# H bonded to 'OSN' show complete exchange
exchangeableH = atomgroup.select_atoms('type H and bonded type O')
exchangeableH += atomgroup.select_atoms('type H and bonded type S')
try:
exchangeableH += atomgroup.select_atoms('type H and bonded type N and not bonded backbone')
except AttributeError:
# there is no backbone so add all
exchangeableH += atomgroup.select_atoms('type H and bonded type N')
self.values[exchangeableH.ix] = values[0]
else:
# allow explicit given values for groups
self.values[atomgroup.ix] = values
atomgroup.fans = 'types'
atomgroup.fi2ns = 'types'
class positionUnitsnm(AtomAttr):
"""
Geometric center position in units nm
"""
attrname = 'posnm'
singular = 'posnm'
dtype = object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
return np.zeros(n_atoms)
@staticmethod
def get_atoms(atomgroup):
# Get values from single atom including exchange with the solvent
if isinstance(atomgroup.ix, numbers.Integral):
return atomgroup.position / 10
else:
return atomgroup.positions / 10
@staticmethod
def set_atoms(atomgroup, values):
if isinstance(atomgroup.ix, numbers.Integral):
atomgroup.position = values * 10
else:
atomgroup.positions = values * 10
@staticmethod
def get_residues(res_group):
# incoherent of a residue is sum of incfin**2
# vals = np.empty(len(res_group))
if isinstance(res_group.ix, numbers.Integral):
return res_group.atoms.posnm.mean(axis=0)
else:
posnm = np.zeros((res_group.ix.shape[0], 3))
posnm[:, :] = [rg.atoms.posnm.mean(axis=0) for rg in res_group]
return posnm
@staticmethod
def get_segments(seg_group):
# incoherent of a residue is sum of incfin**2
# vals = np.empty(len(res_group))
if isinstance(seg_group.ix, numbers.Integral):
return seg_group.atoms.posnm.mean(axis=0)
else:
posnm = np.zeros((seg_group.ix.shape[0], 3))
posnm[:, :] = [sg.atoms.posnm.mean(axis=0) for sg in seg_group]
return posnm
class surfacename(AtomStringAttr):
"""
Is surface atoms indicator
"""
attrname = 'surface'
singular = 'surface'
dtype = object
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.array(['' for _ in range(n_atoms)], dtype=object)
def get_residues(self, res_group):
"""
Residue is surface if any atom is in surface
"""
if isinstance(res_group.ix, numbers.Integral):
val = self.values[res_group.atoms.ix]
return np.unique(val[val != ''])[0] if np.any(val != '') else ''
else:
return [rg.surface for rg in res_group]
class surfaceData(AtomAttr):
"""
Surface atoms properties
[1x surface area, 1x SAS surface volume, 1x hydration shell volume, 3x area mean position]
"""
attrname = 'surfdata'
singular = 'surfdata'
dtype = np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros((n_atoms, 6))
def get_atoms(self, ag):
return self.values[ag.ix, :]
def set_atoms(self, ag, values):
if isinstance(values, str) and values == 'default':
self.values[ag.ix, :] = 0
else:
self.values[ag.ix, :] = values
def get_residues(self, res_group):
# equivalent
if isinstance(res_group.ix, numbers.Integral):
val = self.values[res_group.atoms.ix]
sd = np.zeros(6)
sd[:3] = val[:, :3].sum(axis=0)
sd[3:] = val[:, 3:].mean(axis=0)
return sd
else:
return np.array([rg.surfdata for rg in res_group], dtype=np.float32)
class surfaceLayerDensity(AtomAttr):
"""
Surface layer density per surface atom relative to bulk solvent, default 1
[1x surface area, 1x surface volume, 3x area mean position]
"""
attrname = 'surfdensity'
singular = 'surfdensity'
dtype = np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.ones(n_atoms)
def set_atoms(self, ag, values):
if isinstance(values, str) and values == 'default':
self.values[ag.ix] = 1.
else:
self.values[ag.ix] = values
def get_residues(self, res_group):
if isinstance(res_group.ix, numbers.Integral):
return self.values[res_group.atoms.ix].mean()
else:
return np.array([rg.surfdensity for rg in res_group], dtype=np.float32)
class occupiedVolume(AtomAttr):
"""
Occupied volume of a atom/residue as non overlapping Voronoi volume.
"""
attrname = 'oVolume'
singular = 'oVolume'
dtype = np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
@staticmethod
def _gen_initial_values(n_atoms, n_residues, n_segments):
# prepare zero shape array for later values
return np.zeros(n_atoms)
def set_atoms(self, ag, values):
if isinstance(values, str) and values == 'default':
self.values[ag.ix] = 0.
else:
self.values[ag.ix] = values
def set_residues(self, res_group, values):
if isinstance(res_group.ix, numbers.Integral):
self.values[res_group.atoms.ix] = values / res_group.atoms.n_atoms
else:
for rg, v in zip(res_group, values):
rg.atoms.oVolume = v / rg.atoms.n_atoms
def get_residues(self, res_group):
if isinstance(res_group.ix, numbers.Integral):
return self.values[res_group.atoms.ix].sum()
else:
return np.array([rg.oVolume for rg in res_group], dtype=np.float32)
class vanderWaalsRadii(AtomAttr):
"""
van der Waals radii in units nm.
"""
attrname = 'vdWradiinm'
singular = 'vdWradiinm'
dtype = np.float32
target_classes = [Atom, Residue, AtomGroup, ResidueGroup]
def get_residues(self, res_group):
# equivalent radius for sum of vdW Volumes
if isinstance(res_group.ix, numbers.Integral):
return (self.values[res_group.atoms.ix]**3).sum()**(1/3)
else:
return np.array([rg.vdWradiinm for rg in res_group], dtype=np.float32)
[docs]
def getSurfaceVolumePoints(objekt, point_density=5, probe_radius=0.13,
surfacename='1', vdwradii=None, shellThickness=0.3):
r"""
Calculates the solvent excluded volume (SES and SAS) for a dense object like protein or DNA.
It is **NOT** necessary to call this function as it is called automatically from all scattering functions and
determines the surface atoms and the objekt volume.
It is less accurate for solvent boxes as these may have space in between molecules and need adjusted probe_radius.
Parameters
----------
objekt : atom group
Atom group.
point_density : integer, default 5
Point density on surface of each atom for rolling ball algorithm as n_points = 2**(2*point_density)+2
On error it is automatically incremented
probe_radius : float, default 0.13 nm for water
Probe radius for SAS/SES calculation.
shellThickness : float
Shell thickness of the surface layer. The default is 0.3.
surfacename : str
Name for surface selection
vdwradii : dict, default None
Dictionary of van der Waals radii for rolling ball algorithm in units nm.
- If None the vdW radii used in the object universe are used. See notes below.
- I a defaultdict only these values are used. This is used e.g. for Calpha or coarse grain models
to use only the given defaults like `vdwradii = defaultdict(lambda :0.38)` .
Returns
-------
None
Universe topology attributes *surface* and *surfdata* are populated
- per surface atom with .surfdata = [surface area, SASvolume, shellVolume, surfmean position]
- adds to objekt.universe .SASVolume volume SAS
- adds to objekt.universe .SESVolume volume SES
- adds to objekt.universe .SASArea
- surface area can be calculated universe.select_atoms('surface 1').surfdata[:,0].sum()
- SASVolume is universe.select_atoms('surface 1').surfdata[:,1].sum()
- dummy bead positions in surface layer universe.select_atoms('surface 1').surfdata[:,3:]
Notes
-----
**SES/SAS** see `Accessible surface area <https://en.wikipedia.org/wiki/Accessible_surface_area>`_
**SAS** area/volume (solvent-accessible surface) is the surface area of a molecule that is accessible to a solvent
center. SAS area is calculated by sasa.aVSPointsGradients.
SAS area is here calculated using the *rolling ball* algorithm developed by Shrake & Rupley [1]_.
It describes the surface of the center of a probe molecule (ball) with probe_radius
rolling over the van der Waals surface.
We use a probe_radius of 0.13 nm that represents the radius of a water molecule.
**SES** area/volume (solvent excluded surface) is defined by the envelope excluding the volume occupied
by the rolling ball probe. Also called Connolly surface [5]_ .
**Method** assuming a object like a protein (not solvent like water)
- Shrake/Rupley rolling ball algorithm:
For each atom a regular angular grid of :math:`N_{SAS}` points builds a surface S in a distance
:math:`d=R_{vdW} +R_{probe}` of the atomic vdWaals surface. :math:`R_{probe}` represents the solvent size.
The points in S are tested for overlap with the same surface of other neighboring atoms.
The set of non overlapping points build the SAS surface enclosing the SAS volume of the atom group.
The SAS points have a distance :math:`R_{probe}` from the van der Waals surface of the atoms.
- The SESVolume as a kind of dry volume is :math:`V_{SES} = V_{SAS} - V_{surface layer}`.
The surface layer volume :math:`V_{surface layer}` for each atom `i` may be estimated as the
volume between previous determined SAS points `p` and the van der Waals surface with volume per SAS point
:math:`V_{p,i} = 4/3\pi ((R_{vdW,i}+R_{probe})^3 - R_{vdW,i}^3) / N_{SAS}`.
In each corner (touching atoms) the probe surface sphere describes a smooth surface not reaching into the corner.
This missing volume can be approximated using the rolling ball algorithm with same
number of points :math:`N_{SAS}` but reduced probe radius :math:`R_{probe}/2`.
Each additional appearing point not in the original SAS surface contributes a volume
:math:`V_{p,c} = 4/3\pi (R_{probe})^3 / N_{SAS}` as sector from the probe center located in the SAS corner.
Also buried atoms (between non-touching atoms which are not in the SAS surface) contribute with the same amount.
It should be mentioned that this is an approximation for the corner probe sphere volume
as the points distributed around the original atoms show a small deviation compared to atoms distributed around
probe spheres located in the corner. On the other hand most algorithms describe only an approximation.
.. figure:: ../../examples/images/rollingball.png
:align: center
:width: 50 %
:alt: rollingball
red: atoms, green: probe, solid line: SAS, dashed: SES, dots: probe R/2 points,
blue: additional layer volume corner with additional points and probe section per point
A comparison to experimental determined protein densities is shown in :ref:`Protein density determination`.
For a probe radius of 1.3 A, we find excellent agreement. 1.3 A corresponds to the reduced oxygen vdW radius
for proper protein density determination.
As vdWaals radii we use for the atoms H, C, N, O the observed radii according to Fraser at al [2]_.
For H this corresponds to 1.07 A which is a bit smaller comapred to the revised value 1.1 of Rowland et al. [3]_.
For other values we use vdWaals radii of Bondi et al. [4]_ which are rare for proteins.
For the determination of the **hydration layer** contributing to the neutron/X-ray scattering
usually a 0.3 nm hydration layer is assumed as e.g. in CRYSON/CRYSOL.
To determine the respective hydration layer volume we use the same approach as above with
the shellthickness parameter. By default, we use a thickness of 0.3 nm for the hydration layer
if no other value is given.
It should be noted that compared to CRYSOL the scattering pattern is a bit different at larger Q.
CRYSOL fits the parameter r0 as an adjustment to the listed vdW radius mainly changing the
excluded volume scattering. The values of CYSOL can be repdoduced by seting the H vdW radius to 1.2 A
``universe.select_atoms('name H').vdWradiinm =.12`` which result in a different protein density.
References
----------
.. [1] Environment and exposure to solvent of protein atoms. Lysozyme and insulin
Shrake, A; Rupley, JA. (1973). J Mol Biol. 79 (2): 351–71. doi:10.1016/0022-2836(73)90011-9.
.. [2] An improved method for calculating the contribution of solvent to the X-ray diffraction pattern
of biological molecules.
Fraser, R. D. B., MacRae, T. P., & Suzuki, E. (1978).
Journal of Applied Crystallography, 11(6), 693–694. https://doi.org/10.1107/S0021889878014296
.. [3] Intermolecular Nonbonded Contact Distances in Organic Crystal Structures:
Comparison with Distances Expected from van der Waals Radii.
R. Scott Rowland, Robin Taylor
The Journal of Physical Chemistry. Bd. 100, Nr. 18, 1996, S. 7384–7391, doi:10.1021/jp953141
.. [4] van der Waals Volumes and Radii.
A. Bondi
The Journal of Physical Chemistry. Bd. 68, Nr. 3, 1964, S. 441–451, doi:10.1021/j100785a001
.. [5] Analytical molecular surface calculation
Connolly, M. L.
J. Appl. Crystallogr. 16, 548–558 (1983). doi:10.1107/S0021889883010985.
"""
if len(objekt.atoms) == 0:
raise ValueError('no atoms in objekt found')
objekt.universe.surfdata = 'default'
objekt.universe.surface = ''
if vdwradii is None:
vdwradii = objekt.universe.vdWradiinm
while True:
if point_density>20:
break
try:
# aVSPointsGradients returns (area, volume, sPAG), sPAG as dictionary with
# surface atom: [atom SAS exposed surface area,
# list of points in the atom exposed surface,
# gradient vector pointing outward from the atom]
area, volume, sPAGpr = sasa.aVSPointsGradients(objekt, probe_radius,
point_density=point_density, vdwradii=vdwradii)
# same but with smaller probe to get the corner additional points
_, _, sPAG0 = sasa.aVSPointsGradients(objekt, probe_radius / 2.,
point_density=point_density, vdwradii=vdwradii)
# same but with larger probe to get the corner seen from the shell
_, _, sPAG2 = sasa.aVSPointsGradients(objekt, shellThickness,
point_density=point_density, vdwradii=vdwradii)
except ZeroDivisionError:
# increment number of points as sometimes there is an error with no points on surface for an atom
point_density+=1
print('incremented point_density to %i' %(2**(2*point_density) + 2))
else:
break
# now add surface points of smaller surface to surface atoms
# sPAG will be list of surface atoms with
# [points in surface , exposed surface area, gradient vector pointing outward,
# number of points in smaller and larger surface if present]
sPAG = {satom: ((sPAGpr[satom][1], sPAGpr[satom][0], sPAGpr[satom][2]) if satom in sPAGpr else (None, 0, 0)) +
(len(sPAG0[satom][1]), len(sPAG2[satom][1]) if satom in sPAG2 else None) for satom in sPAG0}
allSASlayerVolume = 0
pointsonsphere = 2 ** (2 * point_density) + 2
for atom, surf in sPAG.items():
if surf[0] is None:
# buried atoms in innermost surface contributing to r/2 but not to r; needed for SESVolume
allSASlayerVolume += 4/3.*pi * probe_radius**3 * surf[3] / pointsonsphere
continue
# mean vector of surface points with length atom.vdW_radius+probe_radius is in the surface
# center in surface
v = (surf[0] - atom.posnm).mean(axis=0)
# R is vdWradius + probe_radius and sync to vdW used in aVSPointsGradients
R = la.norm(surf[0][0]-atom.posnm)
normv = la.norm(v)
if normv > 0:
surfmean = atom.posnm + R * v/normv
else:
# it happens that 2 points on opposite site are found and norm(v) is nan
# then we take just the first
v = surf[0][0]-atom.posnm
R = la.norm(surf[0][0]-atom.posnm)
surfmean = atom.posnm + R * v/la.norm(v)
# respective layer volume per point
pointShellVolume = 4/3*pi* ((R - probe_radius + shellThickness)**3 - (R - probe_radius)**3) / pointsonsphere
pointSASVolume = 4/3* pi * (R**3 - (R - probe_radius)**3) / pointsonsphere
# difference in sphere volumes x fraction of points
# SAS layer add overlap
SASlayervolume = pointSASVolume * len(surf[0]) # full points
# in corners the point difference approximates a probe sphere reaching into the corner
SASlayervolume += 4/3.*pi * probe_radius**3 * (surf[3]-len(surf[0])) / pointsonsphere
# same for hydration shell with shellThickness
# the result is not much dependent on accuracy of these calculations
# the more important part is SES calculation
# a second option would be to add SASlayervolume and use pointShellVolume only outside SAS
# The corner volume needs to be weighted half in this case
if surf[4] is not None:
shellVolume = pointShellVolume * surf[4] # full points
shellVolume += 4/3.*pi * shellThickness**3 * (len(surf[0])-surf[4]) / pointsonsphere
else:
# no full points
shellVolume = 4/3.*pi * shellThickness**3 * len(surf[0]) / pointsonsphere
# surface data = [surf area, SASlayervolume, shellVolume, surfmean]
atom.surfdata = np.r_[surf[1], SASlayervolume, shellVolume, surfmean]
atom.surface = surfacename
allSASlayerVolume += SASlayervolume
objekt.universe.SASVolume = volume
objekt.universe.SESVolume = volume - allSASlayerVolume
objekt.universe.SASArea = area
return
[docs]
def getDistanceMatrix(objekt, cutoff=None):
"""
Distance matrix for atoms in objekt.
objekt migth be a selection to get e.g. distances between CA or H atoms
Parameters
----------
objekt : AtomGroup or residueGroup
Objekt to get distance matrix.
cutoff : float
Cutoff radius. For larger distances 0 is returned.
Returns
-------
Distance matrix NxN (upper triagonal) in units nm.
Notes
-----
For larger matrices a faster neighbor search is used.
Examples
--------
::
import jscatter as js
uni=js.bio.scatteringUniverse('3pgk')
u = uni.select_atoms("protein and type H")
dd = js.bio.getDistanceMatrix(u)
u = uni.select_atoms("protein and name CA")
dd = js.bio.getDistanceMatrix(u,cutoff=2)
"""
atoms = objekt.atoms
dv = scipy.spatial.distance.pdist(atoms.posnm)
distances = scipy.spatial.distance.squareform(dv)
if cutoff is not None:
distances[distances > cutoff] = 0
return distances
def convexHullVolume(points):
return scipy.spatial.ConvexHull(points, incremental=False).volume
[docs]
def occupiedVolume(atoms, dl=0.12):
r"""
Calculates Voronoi cell volume for atom positions in a filled universe box. Only for filled boxes meaningful.
A universe box gets 26 copies of itself around the central original box (periodic boundary conditions in MD).
From these the layer with thickness dl around the center universe box
is used to get neigbors for the close to box boundary positions.
This guarranties proper neigbors, a filled box and :math:`\sum V_{atoms} = box volume`.
Parameters
----------
atoms : universe atoms or residues
Atom positions are used for Voronoi cell volume.
dl : float
Border size to include in convexHull as fraction from box size.
Returns
-------
array : floats
Voronoi cell volumes in units A³ for the atoms.
Notes
-----
scipy.spatial.Voronoi and scipy.spatial.convexHullVolume are used to calc the volume per atom.
"""
# get box dimension
vectors = MDAnalysis.lib.mdamath.triclinic_vectors(atoms.universe.dimensions)
size = len(atoms)
# points for 3x3x3 boxes around central box
points = np.zeros([27 * size, 3])
# copy along X
points[0 * size:1 * size, :] = atoms.posnm * 10 # in units A
# just in case someone moved the atoms out of the box we use here the center of geometry
points[0 * size:1 * size, :] -= points[0 * size:1 * size, :].mean(axis=0)
points[1 * size:2 * size, :] = points[:1 * size, :] + vectors[0]
points[2 * size:3 * size, :] = points[:1 * size, :] - vectors[0]
# copy along Y
points[3 * size:6 * size, :] = points[:3 * size, :] + vectors[1]
points[6 * size:9 * size, :] = points[:3 * size, :] - vectors[1]
# copy along Z
points[9 * size:18 * size, :] = points[:9 * size, :] + vectors[2]
points[18 * size:27 * size, :] = points[:9 * size, :] - vectors[2]
# projections along box vectors
nv0 = la.norm(vectors[0])
nv1 = la.norm(vectors[1])
nv2 = la.norm(vectors[2])
pro0 = points @ vectors[0] / nv0
pro1 = points @ vectors[1] / nv1
pro2 = points @ vectors[2] / nv2
# select points in central box with some border layer in neighbor boxes
# we need the neighbor points that the Voronoi can calc all volumes using neighbor points over box planes
val = (pro0 > -(0.5 + dl) * nv1) & (pro0 < (0.5 + dl) * nv0) & \
(pro1 > -(0.5 + dl) * nv1) & (pro1 < (0.5 + dl) * nv1) & \
(pro2 > -(0.5 + dl) * nv2) & (pro2 < (0.5 + dl) * nv2)
if mp.current_process().name != 'MainProcess':
v = scipy.spatial.Voronoi(points[val], incremental=False, qhull_options="QJ Pp")
vol = np.zeros(v.npoints)
for i, reg_num in enumerate(v.point_region):
indices = v.regions[reg_num]
if -1 in indices: # some regions can be open
vol[i] = np.inf
else:
vol[i] = convexHullVolume(v.vertices[indices])
vv = vol
else:
# do it in a pool
# here ThreadPool is ok as ConvexHull releases Gil in cython
v = scipy.spatial.Voronoi(points[val], incremental=False, qhull_options="QJ Pp")
with ThreadPool() as pool:
iter = [v.vertices[v.regions[reg_num]] for i, reg_num in enumerate(v.point_region)
if -1 not in v.regions[reg_num]]
chunksize, extra =divmod(len(iter), len(pool._pool) * 4)
chunksize = chunksize + (1 if extra else 0)
vol = pool.map(convexHullVolume, iter, chunksize=chunksize)
vv = np.array(vol)
# return only points in original box
return vv[:size]
def addNXAttributes(universe, vdwradiiA={}):
"""
Add topology attributes as needed for Xray/neutron scattering to existing universe.
Uses default values for a protein in D2O. See :py:class:`scatteringUniverse` for details.
Parameters
----------
universe : MDAnalysis universe
MDAnalysis universe.
vdwradiiA : dict,
Dictionary of van der Waals radii in units A.
Here an explicit list has to be given without default.
Returns
-------
None
"""
# get universe
u = universe
u._qlist = np.r_[0.01, 0.1, 1.0, 2.] # units 1/nm , force ndarray
u.tlist = 10**np.r_[0:6] # units ps
u.error = 200
u.amideHexFract = 0.9
u.histidinExchange = 0.5
# delocalisations
# Hydrogen atoms in proteins: Positions and dynamics
# Engler, Ostermann, Niimura, Parak PNAS 100, 10243 (2003) https://doi.org/10.1073/pnas.1834279100
# -> 0.21**0.5 / 10 = 0.045 nm approximate 290K
# How large B-factors can be in protein crystal structures.
# Carugo, BMC Bioinformatics 19, 61 (2018). https://doi.org/10.1186/s12859-018-2083-8
# B=8π^2 u^2 <25 A² => u =0.056A at maximum , we use 0.045 with B=0.16 A²
u.hDelocalisation = 0.045 # nm
u.xDelocalisation = 0.045 # nm
# according to Sorenson 0.22/A -> 0.045nm
u.solventDelocalisation = 0.045 # nm
# water radius
u.probe_radius = 0.13 # unit nm
u.shellThickness = 0.3 # nm
u.calphaCoarseGrainRadius = 0.342 # unit nm
u.iscalphamodel = False
# register scattering length and other new properties needed for Xray and neutron scattering
u.add_TopologyAttr("fax")
u.add_TopologyAttr("faxdumys")
u.add_TopologyAttr("fan")
u.add_TopologyAttr("fi2ns")
u.add_TopologyAttr("fi2xs")
u.add_TopologyAttr("deuteration")
u.add_TopologyAttr("hdexchange")
u.add_TopologyAttr("posnm")
u.add_TopologyAttr("surface")
u.add_TopologyAttr("surfdata")
u.add_TopologyAttr("surfdensity")
u.add_TopologyAttr("oVolume")
u.vdWradiiA = vdwradiiA.copy()
u.vdWradiinm = {k: v/10 for k, v in vdwradiiA.items()}
vdwr = np.zeros_like(u.atoms)
for t in list(set(u.atoms.types)):
ag = u.select_atoms('type ' + t)
vdwr[ag.ix] = u.vdWradiinm[t.upper()]
u.add_TopologyAttr('vdWradiinm', vdwr)
# set default values for scattering length densities
# u.atoms.deuteration = 0 # this is already the initial value
u.atoms.hdexchange = 'default'
# in above hdexchange the following two lines are already called, we don't do it twice
# these incorporate hdexchange and deuteration
# u.atoms.fans = 'types'
# u.atoms.fi2ns = 'types'
u.atoms.faxs = 'types'
u.atoms.fi2xs = 'types'
u.atoms.faxdumys = 'types'
u.atoms.surfdensity = 'default'
u.atoms.oVolume = 'default'
# force explicit residue ff calculation
u.explicitResidueFormFactorAmpl = True
[docs]
class scatteringUniverse(MDAnalysis.Universe):
def __init__(self, *args, **kwargs):
r"""
MDAnalysis universe with atoms from PDB or simulation for neutron/Xray scattering.
`scatteringUniverse` returns a `MDAnalysis <https://docs.mdanalysis.org/stable/index.html>`_ universe
which contains proteins/DNA and solvent.
It has methods e.g. for selection of parts or position analysis from MDAnalysis
complemented by scattering specific methods/attributes as atomic formfactors or volume determination.
Parameters describe additional scattering parameters like embedding solvent.
Others are passed to MDAnalysis.universe.
Parameters
----------
args : pdb structure file, universe or trajectories
PDB files, PDB ID or path to trajectories to build the universe as described for MDAnalysis.universe.
See `MDAnalysis <https://userguide.mdanalysis.org/stable/universe.html>`__ for different formats.
Already existing local PDB files are prefered.
biounit : bool, default False
If biounit is True or above args is a PDB biounit/assembly1 with ending *.pdb[biounit]*
as e.g. ``.pdb1`` the biounit is downloaded, merged and used for the universe.
See :py:func:`~mergePDBModel`.
addHydrogen : bool, default True
Add hydrogen to atomic coordinates (only) for PDB files.
- False: do not add hydrogens.
If the pdb file contains hydrogens or contains only Cα atoms set to False.
- True:
- By default :py:func:`~pdb2pqr` is used to add hydrogen in a fast mode
without debumping and position optimization, see :py:func:`~fastpdb2pqr`.
The resulting ``_h.pdb`` file is used that contains also ligands.
The '_h.pqr' file also contains charge information but no ligands by default.
This file can be used to populate the charge attribute in a second universe and
adding the ligands from the first universe using MERGE (but no ligand charge).
For more complicated cases see pdb2pqr documentation.
- For other formats as e.g. trajectories this option is ignored as these should contain hydrogens.
- If PyMol is installed, it can be used to add hydrogens, addHydrogen='pymol'.
Pymol is maybe bit faster and add hydrogens also to ligands.
No charges are added.
vdwradii : dict
Atomic van der Waals radii (units A) passed to universe are used to calculate SES and SAS volume.
The default is for protein atoms ``{'H': 1.09, 'C': 1.58, 'N': 0.84, 'O': 1.3, 'S': 1.68, 'P': 1.11}``
These values are smaller than the vdW radii from Bondi but result in a correct SESVolume and scattering
intensity. See :py:func:`~.bio.mda.getSurfaceVolumePoints` .
guess_bonds : MDAnalysis selection string, default='all'
Guess bonds for atoms selected by selection string.
Bonds are needed for correct hydrogen exchange (e.g. H bonded to 'O,N,S' exchange with D2O to D)
in neutron scattering. See `hdexchange`. For X-ray scattering or if all bonds are present in the topology
set to *False*.
For trajectories with a lot of solvent, it takes time to determine solvent bonds.
Selecting only parts like ``guess_bonds='protein'`` or ``guess_bonds='not segid seg_1_SOL'`` shortcuts this.
The solvent *H* needs to be explicitly set ``.hdexchange`` or the atomtype to *D*.
assignTypes : dict
Assign unknown atom type values from known atom names if the unknowns are non-standard.
Some coordinate formats use non-chemical symbols (e.g. lammps uses integers),
which need to be assigned some element atom types. We do this here to real atom types.
Dictionary of ``{'unknownname': 'existingname'}`` like ``{'mw': 'C'}`` or for lamps ``{'1':'C','2':'H'}``.
The key 'from_names' prepends ``guess_types(self.atoms.names)``
before above assignment ( {'from_names':1} ).
For custom setting (not existing element) assign corresponding data for an element to
``(js.bio.mda).xrayFFatomic, .xrayFFatomicdummy, .Nscatlength, .vdwradii``.
Notes
-----
The *scatteringUniverse* contains proteins/DNA with/without explicit solvent possibly in a box
and has attributes as X-ray/neutron scattering amplitudes for all atoms in the universe.
Parameters referring to solvent describe the embedding continuous solvent outside.
Without explicit solvent a virtual surface layer and volume determination is used
e.g. for PDB structures from the PDB databank or implicit solvent simulations.
Explicit solvent is in most cases simulated as protonated while neutron scattering experiments are
done with D2O/H2O mixtures. This needs to be accounted by adjusting ``.hdexchange`` for the solvent
or changing *H* to *D* for the solvent. See specific examples in :ref:`Biomacromolecules (bio)`
Use a **biological unit** for calculation using PDB structures from the PDB databank.
The PDB crystal structure is not always the biological unit in special for multimeric proteins.
The biological assembly can be retrieved from PDB servers as e.g.https://www.ebi.ac.uk/pdbe/ .
Look for biological unit or assembly. Check the structure in a PDB viewer as Pymol or VMD.
If existent the biological unit can be downloaded with *.pdb1* or *.pdb2* file ending,
see :py:func:`~.bio.mda.fetch_pdb`. The biological unit can then be merged into one model using
see :py:func:`~.bio.mda.mergePDBModel`.
The *scatteringUniverse* has **additional attributes for scattering** of atoms/residues.
All atoms need a defined type to determine the scattering amplitude.
These attributes like fax/fan atomic formfactors are used during scattering calculations as described in
:py:func:`~.bio.scatter.scatIntUniv` .
**mutable Universe attributes** :
- qlist : array
scattering vectors in unit 1/nm.
- tlist : array, optional
Times in units *ps* for dynamics.
- solvent :
Continuous solvent embedding the atoms/universe. Saved in attribute uni.solvent.
Use :py:func:`setSolvent` for details and how to change.
- temperature : float
Temperature in K. Use :py:func:`setSolvent`.
- solventDensity :
See and use :py:func:`setSolvent`.
- probe_radius : float 0.13
radius of the probe for surface determination in the rolling ball algorithm,
See :py:func:`~.bio.mda.getSurfaceVolumePoints` .
The default is 0.13 [nm] for water. This reduced value (often 0.14 nm) corresponds to the reduced
vdwradii of oxygen that reproduces SESVolume and results in the best protein density determination.
- shellThickness : float
Hydration shell thickness as a surface layer thickness. Default is 0.3 nm.
- point_density : int, default 5
Point density on surface (per atom) for SES and SAS calculation.
See :py:func:`~.bio.mda.getSurfaceVolumePoints` .
:math:`n_{points} = 2^{2 point_density}+2` .
- error : float
Determines how to calculate spherical averages (number of Fibonacci points on sphere).
- amideHexFract : float 0..1
Exchangeable fraction of backbone amide -NH hydrogen
- 0.9 for folded proteins
- For intrinsically unfolded proteins this might be higher due to improved access to the backbone.
(see hdexchange to change)
- histidinExchange : float 0..1
Exchangeable fraction histidine hydrogens (0.5) (see hdexchange to change)
- solventDelocalisation : float, default 0.045 nm.
Debye-Waller like solvent delocalisation.
See Sorenson et al [6]_.
- xDelocalisation : float, default 0.045 nm.
Debye-Waller like delocalisation of heavy atoms.
Crystallographic B factors are limited to :math:`B=8\pi^2 \langle u^2\rangle < 25 A²` [7]_
leading to :math:`u_{max} = 0.056A` at maximum. We use 0.045 nm with B=0.16 A².
- hDelocalisation : float, default 0.045 nm.
Debye-Waller like hydrogen delocalisation of non-solvent atoms.
:math:`\langle u^2\rangle \approx 0.21 A² \rightarrow u\approx 0.045 nm` at 290K [8]_.
- calphaCoarseGrainRadius : float 0.342 [nm]
vdwradii for Cα only structures for coarse grained calculations.
The value gives an average good approximation for the protein densities.
- iscalphamodel : bool
- explicitResidueFormFactorAmpl : bool
- ``True`` (default) : The residue formfactor amplitude (fan/fax) is calculated
explicit for each residue instead of using precalculated average values.
hdexchange is included like deuteration
- False : Precalculated residue formfactors are used from ``js.mda.xrayFFgroup`` and
``js.bio.neutronFFgroup``.
- Additional residue or monomer types can be added to the dict
for coarse grained calculations e.g. of polymer bead models. In this case set
``explicitResidueFormFactorAmpl=False``
**Atom attributes** can be changed for each atom. Predefined values are OK for most folded proteins
without explicit solvent. These attributes can be set for individual atoms or for groups like
::
urn.select_atoms('not protein and type H').deuteration =1
urn.select_atoms('not segid seg_1_SOL').hdexchange =1
- deuteration : atomic deuteration, effective only for H atoms.
Fractional values are interpreted as statistical deuteration e.g. of half the atoms in an ensemble.
For residue calculation use with ``uni.explicitResidueFormFactorAmpl = True`` to get updated
residue formfactors accordingly.
- hdexchange:
Fraction of hydrogens that exchange with solvent hydrogens for a group of atoms or individual atoms.
The default is for folded proteins, Intrinsically folded may be different e.g. for the backbone N-H.
To get number of exchanged H atoms ``uni.atoms.hdexchange.sum()``
Set for atomgroup as ``ag.hdexchange = (a,b,c)`` or ``ag.hdexchange = 'default'`` with
- a : H bonded to O,S,sidechain N ; default 1, all exchange
- b : backbone N-H : default uni.amideHexFract = 0.9
- c : histidine -H : default u.histidinExchange = 0.5
A single float sets a=b=c and keyword 'default' sets to universe defaults.
Usefull for e.g a disordered part that has different backbone exchange.
- surfdensity : scattering length density relative to bulk for surface atoms.
- Typically between 1.00 to 1.18 for proteins.
- If equal 1 no surface is assumed.
Example how to set for individual atoms/residues
E.g. set according to residue type or residue numbers ::
import jscatter as js
uni = js.bio.scatteringUniverse('3CLN')
uni.select_atoms('resname ARG HIS LYS ASP GLU').atoms.surfdensity = 1.1
protein = uni.select_atoms('protein')
# scatIntUniv uses prepScatGroups which uses getSurfaceVolumePoints to determine the surface
Sx = js.bio.scatIntUniv(protein ,mode='xray')
print('Mean universe atoms ', uni.atoms.surfdensity.mean())
print('Mean in surface atoms', protein.select_atoms('surface 1').surfdensity.mean())
# hydrophobicity scale [-4.5..4.5] dependent on residue type
md = 0.09 # maximum density in layer 9% different from bulk
for k, v in js.data.aaHydrophobicity.items():
uni.select_atoms('resname '+k.upper()).atoms.surfdensity = 1 + md * v[0]/4.5
print('Mean universe atoms ', uni.atoms.surfdensity.mean())
print('Mean in surface atoms', protein.select_atoms('surface 1').surfdensity.mean())
# set higher exchange for backbone NH in a segment (migth be unfolded at the surface)
protein.select_atoms('resnum 33:44').atoms.hdexchange = (1, 0.7, 0.5)
# use a partial deuteration of a small domain or even individual H atoms.
# maybe partial deuteration is possible in the future
protein.select_atoms('resnum 33:44').atoms.deuteration = 1
# deuteration of 2 amino acids types
protein.select_atoms('resname ARG LYS').atoms.deuteration = 1
**Fixed attributes**
Set according to other attributes or atom type for all atoms (residue values as appropriate averages)
(singular/plural):
- fax/faxs : atomic/residue xray formfactor amplitude, unit nm
- atomic xray formfactor amplitudes :math:`f_j(Q)` according to REZ et al. [2]_ .
See :py:func:`~.bio.scatter.scatIntUniv`
- Delocalisation is taken into account exchanging :math:`f_i(Q)` -> :math:`f^{\prime}_i(Q)`
with (:math:`\delta_X=universe.xDelocalisation` and :math:`\delta_H=universe.hDelocalisation`)
.. math:: f^{\prime}_i(Q) = exp(-Q^2\delta_{X,H}^2/2) f_i(Q)
- residue formfactor amplitudes for coarse grained calculation
are calculated using the Debye scattering equation with atomic positions :math:`r_j`
according to Yang et al. [4]_ if atoms are present:
.. math:: F_a^{residue}(Q) = \big\langle \sum_{j.k} f^{\prime}_j(Q)f^{\prime}_k(Q)
\frac{sin(Q(r_j-r_k))}{Q(r_j-r_k)} \big\rangle^\frac{1}{2}
This is obviously only valid for small scattering vectors where we can
neglect the atomic details and interferences in a residue, thus the real
:math:`F_a^{residue}(Q) >0`.
- Precalculated amino acid residue formfactors are stored in the dict ``js.mda.xrayFFgroup`` and
``js.bio.neutronFFgroup``.
Adding new ones for e.g. polymer monomers ::
monomerfa = js.ff.sphere(q=js.bio.QLIST, radius=1.2)
monomerfa.Y = (monomer.Y/monomer.I0)**0.5 * V**2 * x_contrast**2 # valid only for small q
js.bio.mda.xrayFFgroup['ETH'] = monomerfa
# naming the specific residues to use the new defined formfactor
# leads to automatic usage of the new formfactor amplitude.
# use select mechanism of MDAnalysis to be specific
uni.residues.resnames = 'ETH'
# set corresponding incoherent if needed (just as example for C2H4 monomer)
# here we assume one Cα atom per residue/monomer in coarse graining.
uni.residues.atoms.fi2xs = js.data.xrayFFatomic['C'][2]*2 + js.data.xrayFFatomic['H'][2] * 4
- fan/fans : atomic/residue neutron formfactor amplitude unit nm
- atomic neutron formfactor amplitudes which are Q independent with a scattering length according to [1]_
taken from https://www.ncnr.nist.gov/resources/n-lengths/list.html
- Delocalisation is taken into account exchanging :math:`f_i(Q)` -> :math:`f^{\prime}_i(Q)`
with (:math:`\delta_X=universe.xDelocalisation` and :math:`\delta_H=universe.hDelocalisation`)
.. math:: f^{\prime}_i(Q) = exp(-Q^2\delta_{X,H}^2/2)]f_i(Q)
- residue scattering is calculated as for fax based on atomic scattering amplitudes..
- fi2n/fi2ns : neutron incoherent scattering amplitude squared, unit nm². Same source as fan.
- fi2x/fi2xs : xray incoherent scattering amplitude squared according to [3]_ (compton scattering), unit nm².
- posnm : atom/residue center of geometry positions in unit nm. MDAnalysis uses Å in .position.
posnm is for convenience to get 1/nm wavevectors.
- surface : name of surface, to test if atoms belong to surface atoms. (Maybe later different surfaces)
- surfdata : surface atom data as [surface area, surface volume, shell volume, 3x surface area mean position]
- vdWradiinm : vdWradii in nm as defined in js.data.vdwradii
Set **default values** dependent on *.solvent* using
See :py:func:`setSolvent` .
- xsld : solvent xray scattering length density, unit 1/nm²=nm/nm³
- edensity : solvent electron density, unit e/nm³
- bcDensitySol : solvent neutron scattering length density, unit 1/nm²=nm/nm³
- b2_incSol : solvent neutron incoherent scattering length density squared, unit 1/nm=nm²/nm³
- d2oFract : solvent d/h mol fraction
- d2omassFract : solvent d/h mass fraction
Examples
--------
Load pdb protein structure from the PDB data bank by ID to *scatteringUniverse*.
The pdb file is corrected and hydrogen is added automatically.
The protein structure including H is saved to 3rn3_h.pdb.
.. literalinclude:: ../../examples/example_bio_loadPDB.py
:language: python
Load existing PDB file (the one with H from above)
::
# reload the generated structurestructure later with hydrogens using the local saved structure.
uni2 = js.bio.scatteringUniverse('3rn3_h.pdb',addHydrogen=False)
uni2.view()
A *scatteringUniverse* with a complete trajctory from MD simulation is created.
The PSF needs atomic types to be guessed from names to identify atoms in the used format.
You may need to install MDAnalysisTests to get the files.( `python -m pip install MDAnalysisTests`)
It migh be nesserary to transform the box that the protein is not crossing boundaries of the universe box.
.. literalinclude:: ../../examples/example_bio_loadTrajectory.py
:language: python
.. image:: ../../examples/images/uniformfactorstraj.jpg
:align: center
:width: 50 %
:alt: uniformfactors
`scatteringUniverse` does the same as this in MDAnalysis.
::
u = mda.Universe(PSF, DCD)
# determine types from names
u.atoms.types = guess_types(u.atoms.names)
u.atoms.guess_bonds(vdwradii={'H': 1.09, 'C': 1.58, 'N': 0.84, 'O': 1.3, 'S': 1.68, 'P': 1.11})
# add scattering attributes
upsf2 = js.bio.scatteringUniverse(u, addHydrogen=False, guess_bonds=False)
**Examples for PDB structures** without explicit solvent for small angle scattering
The example shows the validity of residue coarse graining up to around 3/nm.
With coarse graining in cubes (cubesize) the approximation seems best.
This might be useful to speed up computations that take long (e.g. ISF at low Q)
There is basically no difference between precalculated and averaged residue formfactors and explicit calculated
residue formfactors for each residue (uni.explicitResidueFormFactorAmpl = True)
The explicit ones include better deuteration of specific atoms.
Cα models loose some precision in volume respectivly in forward scattering.
C-alpha models need a .calphaCoarseGrainRadius = 0.342 nm because of the missing atoms.
In addition, the mean residue position is not the C-alpha position.
We use 0.342 nm as a good average to get same forward scattering over a bunch of proteins
(see example_bio_proteinCoarseGrainRadius.py).
.. literalinclude:: ../../examples/example_bio_comparecoarsegraining.py
:language: python
.. image:: ../../examples/images/uniformfactors.jpg
:align: center
:width: 50 %
:alt: uniformfactors
References
----------
.. [1] Neutron scattering lengths and cross-sections
V. F. Sears
Neutron News, 3, 26-37 (1992) https://doi.org/10.1080/10448639208218
.. [2] REZ et al.Acta Cryst. A50, 481-497 (1994)
.. [3] A new analytic approximation to atomic incoherent X-ray scattering intensities.
J. THAKKAR and DOUGLAS C. CHAPMAN
Acta Cryst. (1975). A31, 391
.. [4] A Rapid Coarse Residue-Based Computational Method for X-Ray Solution Scattering
Characterization of Protein Folds and Multiple Conformational States of Large Protein Complexes
S. Yang, S. Park, L. Makowski, B. Roux
Biophysical Journal 96, 4449–4463 (2009) doi: 10.1016/j.bpj.2009.03.036
.. [5] An improved method for calculating the contribution of solvent
to the X-ray diffraction pattern of biological molecules
R. D. B. Fraser, T. P. MacRae and E. Suzuki
J. Appl. Cryst. (1978). 11, 693-694 https://doi.org/10.1107/S0021889878014296
.. [6] What can x-ray scattering tell us about the radial distribution functions of water?
Jon M. Sorenson, Greg Hura, Robert M. Glaeser, et al.
J. Chem. Phys. 113, 9149 (2000); https://doi.org/10.1063/1.1319615
.. [7] How large B-factors can be in protein crystal structures.
Carugo et al
BMC Bioinformatics 19, 61 (2018). https://doi.org/10.1186/s12859-018-2083-8
.. [8] Hydrogen atoms in proteins: Positions and dynamics
Engler, Ostermann, Niimura, Parak
PNAS 100, 10243 (2003) https://doi.org/10.1073/pnas.1834279100
"""
addHydrogen = kwargs.pop('addHydrogen', True)
guess_bonds = kwargs.pop('guess_bonds', 'all')
ph = kwargs.pop('ph', None)
# add all vdW radii (in A) as defaults
vdwradiiA = {k: v * 10 for k, v in vdwradii_.items()}
vdwradiiA.update( {k: v for k, v in kwargs.pop('vdwradii', {}).items()}) # no uppercase translation
biounit = kwargs.pop('biounit', False)
assig_Types = kwargs.pop('assignTypes', {})
solvent = kwargs.pop('guess_bonds', '1h2o1')
temperature = kwargs.pop('temperature', 293.15)
# create universe, postpone bonds to end to have more control
kwargs.update({'guess_bonds': False, 'vdwradii': vdwradiiA})
if isinstance(args[0], str):
# creating scattering universe from a file or PDB ID that will be downloaded and hydrogens added
if os.path.exists(args[0]):
# prefer existing files
pass
elif os.path.splitext(args[0])[1] in ['', '.pdb1', '.pdb2'] and \
re.match('^[0-9][a-z0-9]{3}$', os.path.splitext(args[0])[0].lower()):
# download pdb id and requested biounit; pdb1,pdb2 is biounit, '' only PDB id
if os.path.splitext(args[0])[1] in ['.pdb1', '.pdb2'] or biounit:
arg0 = fetch_pdb(id=args[0], biounit=True)
arg0 = mergePDBModel(arg0)
else:
arg0 = fetch_pdb(id=args[0])
args = (arg0,) + args[1:]
if addHydrogen and (os.path.splitext(args[0])[1] == '.pdb' and isinstance(args[0], (str, os.PathLike))):
if pymol2 and addHydrogen == 'pymol':
pdb_h = addH_Pymol(args[0])
print('Used PyMol to add hydrogens.')
args = (pdb_h,) + args[1:]
else:
# add hydrogens using pdb2pqr and missing atoms
debump = kwargs.pop('debump', False)
opt = kwargs.pop('opt', False)
pqr_h, pdb_h = fastpdb2pqr(args[0], debump=debump, opt=opt, ph=ph)
print('Used pdb2pqr to add hydrogens.')
args = (pdb_h,) + args[1:]
# except ValueError:
# Warning('No hydrogens added, maybe not an atomic structure but coarse grained as CA model?')
# init universe with updated args
super(scatteringUniverse, self).__init__(*args, **kwargs)
elif isinstance(args[0], MDAnalysis.Universe):
# create universe with topology from input
nargs = (args[0]._topology.copy(),)+args[1:]
kwargs.update({'addHydrogen': False})
super(scatteringUniverse, self).__init__(*nargs, **kwargs)
self.trajectory = args[0].trajectory.copy()
self.dimensions = args[0].dimensions
elif isinstance(args[0], MDAnalysis.core.topology.Topology):
# create from topology e.g. from a copy
nargs = (args[0].copy(),)+args[1:]
kwargs.update({'addHydrogen': False})
super(scatteringUniverse, self).__init__(*nargs, **kwargs)
else:
raise TypeError('Cannot create universe from input arguments.')
# try to add types (to get elements for scattering amplitudes)
# e.g. psf files might contain 'numbers' as types, but we need element types
# later we might switch to elements when these are better defined
if assig_Types.pop('from_names', {}):
self.atoms.types = guess_types(self.atoms.names)
if assig_Types:
for k, v in assig_Types.items():
ak = self.select_atoms('type ' + k)
ak.types = np.array([v] * ak.n_atoms)
if not set(self.atoms.types).issubset(vdwradiiA.keys()) and hasattr(self.atoms, 'names'):
# in case try to determine from names
raise TypeError('Mismatch in types and vdwradii.',
set(self.atoms.types).difference(vdwradiiA.keys()),
'Translate to capital letters like vdwradii={"Na": js.bio.vdwradii["NA"]} ')
if guess_bonds:
self.guess_bonds(guess_bonds=guess_bonds, vdwradii=vdwradiiA)
# add default solvent H2O and temperature
self.setSolvent(solvent, temperature=temperature)
# now add all Attributes for N+X scattering
# prepare all needed scattering attributes
addNXAttributes(self, vdwradiiA)
[docs]
def setSolvent(self, solvent=None, density=None, temperature=None, numDensity=None, units='mol'):
"""
Set solvent emdedding the universe to calculate scattering lengths and contrast.
Usage only for changing solvent. Other parameters changed automatically.
Parameters
----------
solvent : list of string
Describes solvent composition with molar fractions.
If None universe.solvent is used.
A chemical formula with fraction+[lettercode+number]+.....
- e.g.'D2O1' or 'H2O1' for water and heavy water
- ['0.6D2O1','0.4H2O1'] for a mixture of 0.6 hwater and 0.4 water by mol fraction
- ['6D2O1','4H2O1'] for a mixture of 6 hwater and 4 water as mixed mol
- default is pure h2o
density : optional, default=None, float
Density of the solvent in units g/cm³.
Explicit given values force the density to be different from tabulated values!
By default (None) tabulated values according to formel.waterdensity as given in solvent are use.
See :py:func:'jscatter.formel.physics.waterdensity' for specific references.
numDensity: optional, default=None, float
Number density of solvent molecules in units 1/nm³.
Forces scaling of related values to different number density e.g. in a simulation box.
Overrides density above and is intended only for simulation boxes.
- 0 : resets to above density values.
- None : water with components as given in universe.solvent is used.
This is the default for protein scattering from PDB structures (like implicit solvent).
See :py:func:'jscatter.formel.waterdensity' for specific references.
- float: For explicit solvent universes from MD simulation the values gives
the total number density of all solvent molecules used in the MD simulation.
temperature : float
Temperature of the universe in units K.
units : 'mol'
Units used in solvent as mol or mass.
Anything except 'mol' (default) is mass fraction .
e.g. 1l Water with 123mmol NaCl ['55.5H2O1','0.123Na1Cl1']
See :py:func:'jscatter.formel.scatteringLengthDensityCalc'.
Returns
-------
Sets universe attributes
solventDensity, numDensitySol, bcDensitySol, edensitySol, b2_incSol,d2oFract, xsldSol
Notes
-----
- :py:func:`~jscatter.formel.scatteringLengthDensityCalc` is used
to calc solvent scattering length (units=mol).
- For a simulation box with water it is important to have the correct water numDensity some distance away from
the protein surface (outside of the hydration layer) to get the right contrast to the embedding solvent.
Simulation boxes have not always the correct waterdensity. This is automatically taken into account.
- For PDB structures or simulation with implicit solvent the density is waterdensity.
- For simulation boxes the numberdensity of the initial box is usually fixed and
not changed (constant volume simulation over constant pressure).
If the initial equal distributed water forms a hydration layer with higher density around the protein,
consequently the density outside the hydration layer must decrease.
To correctly determine the contrast we need the numberdensity of the solvent outside of the hydration layer.
We can extract this by selecting water some distance away from the protein surfece.
In the same way we can approximate the numberdensity in the hydration layer.
"""
u = self.universe
u.temperature = temperature if temperature is not None else u.temperature
u.solvent = solvent if solvent is not None else u.solvent
if numDensity == 0:
# to reset to water values
u.solventDensity = None
numDensity = None
# density = None forces to use tabulated values for water or buffers
u.solventDensity = density if density is not None else None
# determine from water if no explicit values are given
# water density is calculated in scatteringLengthDensityCalc from formel.waterdensity
xsld, edensity, nsld, incsld, mass, massfullprotonated, massfulldeuterated, dhfraction, numdensity, density = \
formel.scatteringLengthDensityCalc(u.solvent, u.solventDensity, T=u.temperature, mode='all2', units=units)
if numDensity is not None:
# scaling factor according to atom numberDensity
f = numDensity / sum(numdensity)
else:
f = 1
self.solventDensity = f * density
self._numDensitySol = f * sum(numdensity)
self._bcDensitySol = f * nsld
self._edensitySol = f * edensity
self._b2_incSol = f * incsld
self._d2oFract = dhfraction
self._xsldSol = f * xsld
[docs]
def guess_bonds(self, guess_bonds='all', vdwradii=None):
r"""
Guess bonds according to distance between atoms.
Bonds between atoms are created, if the two atoms are within
:math:`d < f \cdot (R_1 + R_2)` of each other, where :math:`R_{1,2}` are the VdW radii
of the atoms and :math:`f=0.55` is an ad-hoc *fudge_factor*.
This is the same algorithm that VMD uses but with f=0.6.
Parameters
----------
vdwradii : dict
Dictionary with {name:value[A]} pairs for vdWaals radii.
guess_bonds : MDAnalysis selection string, default='all'
Guess bonds for atoms selected by selection string.
Bonds are needed for correct hydrogen exchange (e.g. H bonded to 'O,N,S' exchange with D2O to D)
in neutron scattering. See hdexchange. For X-ray scattering or if all bonds are present in the topology
set to *False*.
For trajectories with a lot of solvent, it takes time to determine solvent bonds.
Selecting only parts like ``guess_bonds='protein'`` or ``guess_bonds='not segid seg_1_SOL'`` shortcuts this.
The solvent *H* needs to be explicitly set ``.hdexchange`` or the atomtype to *D*.
Returns
-------
None, bonds are created as atomAttribute ``bonds``.
Notes
-----
Atomic van der Waals radii (units A) passed to universe are used to identify bonds between atoms using the
universe.guess_bonds method of MDAnalysis.
The default radii are according to
`<https://en.wikipedia.org/wiki/Atomic_radii_of_the_elements_(data_page)#Van_der_Waals_radius>`_
(mainly Bondi reference).
These values are larger than the radii for SESvolume calculation (:py:func:`~.bio.mda.getSurfaceVolumePoints`)
to identify all bonds.
"""
try:
self.select_atoms(guess_bonds).guess_bonds(vdwradii)
except Exception as ex:
# e.g. LAMPSdata BONDS throw an error
message = "An exception of type {0} occurred. Arguments:\n{1!r}".format(type(ex).__name__, ex.args)
print('Bonds cannot be guessed because of following error:')
print(message)
@property
def bcDensitySol(self):
"""
Coherent neutron scattering length density of solvent.
"""
self.setSolvent()
return self._bcDensitySol
@bcDensitySol.setter
def bcDensitySol(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
@property
def b2_incSol(self):
"""
Incoherent neutron scattering length density solvent.
"""
self.setSolvent()
return self._b2_incSol
@b2_incSol.setter
def b2_incSol(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
@property
def d2oFract(self):
"""
D2O number fraction in solvent.
"""
self.setSolvent()
return self._d2oFract
@d2oFract.setter
def d2oFract(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
@property
def xsldSol(self):
"""X-ray scattering length of solvent."""
self.setSolvent()
return self._xsldSol
@xsldSol.setter
def xsldSol(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
@property
def edensitySol(self):
"""Electron density solvent in e/nm³"""
self.setSolvent()
return self._edensitySol
@edensitySol.setter
def edensitySol(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
@property
def numDensitySol(self):
"""Number density solvent in 1/nm³"""
self.setSolvent()
return self._numDensitySol
@numDensitySol.setter
def numDensitySol(self, v):
# only to get a proper Error
raise AttributeError('Change this property using universe.setSolvent!')
[docs]
def copy(self):
"""Return an independent copy of this Universe"""
# copy is made in __class__
# TODO check which additional attributes are needed to be copied
new = self.__class__(self._topology, addHydrogen=False, guess_bonds=False, vdwradii=self.vdWradiiA)
new.trajectory = self.trajectory.copy()
new.dimensions = self.dimensions.copy()
return new
[docs]
def view(self, select='all', frames=None, viewer=''):
"""
View the actual configuration in a selected viewer.
Parameters
----------
select : string, default = 'all'
Selection string as in select_atoms.
frames : array-like or slice or FrameIteratorBase or str, optional
An ensemble of frames to write. The ensemble can be a list or
array of frame indices, a mask of booleans, an instance of
:class:`slice`, or the value returned when a trajectory is indexed.
By default, `frames` is set to ``None`` and only the current frame
is written. If `frames` is set to "all", then all the frame from
trajectory are written.
viewer : 'pymol', 'vmd', 'chimera', or full path
Viewer to show pdb structure.
- If the programm is in the PATH the short name is enough.
- The full path can be specified.
- default is to use the first existing.
Examples
--------
Normal modes are just used to generate a trajectory.
::
import jscatter as js
# view single structure
uni = js.bio.scatteringUniverse('3RN3.pdb')
uni.view(select='protein')
# normal mode anmation saved as trajectory
uni = js.bio.scatteringUniverse(js.examples.datapath+'/arg61.pdb',addHydrogen=False)
u = uni.select_atoms("protein and name CA")
nm = js.bio.NM(u, cutoff=10)
moving = nm.animate([6,7,8,9], scale=30) # as trajectory
# view trajectory
moving.view(viewer="pymol", frames="all")
# animate and show
nm.animate([6, 7, 8, 9], scale=30).view(viewer="pymol", frames="all")
"""
if viewer:
viewerpath = shutil.which(viewer)
if not viewerpath:
raise FileNotFoundError(f'The executable {viewer} is not found in PATH.')
else:
if sys.platform.startswith('linux'):
# viewerpath = 'xdg-open' # this closes before the file was read by the opend pdb viewr
for viewer in ['pymol', 'vmd', 'chimera']:
viewerpath = shutil.which(viewer)
if viewerpath:
break
elif sys.platform.startswith('darwin'):
viewerpath = 'open'
elif sys.platform == 'win32':
os.startfile(tfile)
atoms = self.select_atoms(select)
with tempfile.NamedTemporaryFile(suffix='.pdb') as tfile:
with MDAnalysis.Writer(tfile.name, atoms.n_atoms, multiframe=True) as w:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
if frames is None:
# current frame
w.write(atoms)
elif isinstance(frames, slice):
for ts in self.trajectory[frames]:
w.write(atoms)
elif frames == 'all':
for ts in self.trajectory:
w.write(atoms)
else:
# whatever else was given like list of indices
for ts in self.trajectory[frames]:
w.write(atoms)
print('saved to temporary file ', tfile.name)
view_process = subprocess.run([viewerpath, tfile.name])
@property
def boxVolume(self):
""" Volume of universe box in A³ """
try:
return MDAnalysis.lib.mdamath.box_volume(self.dimensions)
except:
raise ValueError('No dimesnion in universe. ')
@property
def qlist(self):
"""
Scattering vectors
"""
return self._qlist
@qlist.setter
def qlist(self, ql):
# force ndarray
self._qlist = np.atleast_1d(ql)
[docs]
def fetch_pdb(id, path='./', biounit=False):
"""
Fetch id from pdb databank at http://www.rcsb.org/
id : string
PDB id
4 letter code of protein structure
path : string
path where to store the file
biounit : bool, int
Download biounit/assembly1 with ending *.pdb[biounit]* as e.g. *.pdb1*.
Returns
-------
Saves gunziped file and returns corresponding path.
Notes
-----
Biounit/assembly can be downloaded using file ending *.pdb1* or *.pdb2*
"""
ids = id.split('.')
end = ''
assembly = ''
if len(ids)>1 and ids[1][-1] in ['1', '2', '3']:
end = ids[1][-1]
assembly = '_' + end
elif isinstance(biounit, bool):
if biounit:
end = '1'
assembly = '_' + end
elif isinstance(biounit, int):
end = str(biounit)
assembly = '_' + end
outfile = path + f'{ids[0]}{assembly}.pdb'
url = f'http://www.rcsb.org/pdb/files/{ids[0]}.pdb{end}.gz'
print('Try to download > ', url)
print('save as ', outfile)
try:
with urllib.request.urlopen(url) as zfile:
with gzip.GzipFile(fileobj=io.BytesIO(zfile.read())) as uncompressed:
content = uncompressed.read()
with open(outfile, 'wb') as f:
f.write(content)
except Exception as e:
print(e)
return 0
return outfile
[docs]
def mergePDBModel(pdb):
"""
Merge models in PDB structure from biological assembly to single model.
Biological units are saved as multi model PDB files. mergePDBmodel merges these multiple models
into one model that can be read by MDAnalysis or other programs as multimeric protein.
Parameters
----------
pdb : string,filename
PDB id or filename with models to merge.
Returns
-------
merged filename
Examples
--------
Fetch ferritin (24-mer) 1lb3 biological assembly and merge to one frame.
The example needs some time
::
import jscatter as js
fer = js.bio.fetch_pdb('1lb3',biounit=True)
merged_filename = js.bio.mergePDBModel(fer)
# to show it
uni = js.bio.scatteringUniverse(merged_filename, addHydrogen=False)
uni.view(viewer='pymol')
"""
u = MDAnalysis.Universe(pdb)
segid = u.segments.segids
models = []
if u.trajectory.n_frames <=1:
raise TypeError('Only a single model is in pdb.')
for ts in range(u.trajectory.n_frames):
u.trajectory[ts]
models.append(u.copy())
segid = [chr(ord(id) + len(np.unique(segid)) ) for id in segid]
models[-1].segments.segids = np.array(segid, dtype=object)
mergeduniverse = MDAnalysis.core.universe.Merge(*[m.atoms for m in models])
pdb, end = os.path.splitext(pdb)
new = pdb + '_merged' + end
mergeduniverse.atoms.write(new)
return new
def _getargs(input_pdb, output, *args, **kwargs):
# transform args,kwargs tor arg for pdb2pqr as list of strings
argdefault = ['nodebump', 'noopt']
for default in argdefault:
if default not in args:
args = args + (default,)
arg = [input_pdb, output] + ['--' + a.replace('_', '-') for a in args]
if 'log-level' not in kwargs:
kwargs.update({'log-level':'ERROR'})
for k,v in kwargs.items():
arg = arg + ['--' + k.replace('_', '-'), v]
return arg
def get_old_header(pdblist, header='short'):
"""
Get old header from list of :mod:`pdb` objects.
header as 'short', 'all'
"""
header_types = (pdb2pqr_pdb.CRYST1, # added compared to pdb2pqr
#pdb2pqr_pdb.CONECT, # added compared to pdb2pqr
pdb2pqr_pdb.HEADER,
pdb2pqr_pdb.TITLE
)
if header == 'all':
header_types +=(
pdb2pqr_pdb.COMPND,
pdb2pqr_pdb.SOURCE,
pdb2pqr_pdb.KEYWDS,
pdb2pqr_pdb.EXPDTA,
pdb2pqr_pdb.AUTHOR,
pdb2pqr_pdb.REVDAT,
pdb2pqr_pdb.JRNL,
pdb2pqr_pdb.REMARK,
pdb2pqr_pdb.SPRSDE,
pdb2pqr_pdb.NUMMDL,
)
old_header = io.StringIO()
for pdb_obj in pdblist:
# there is a bug in the original function with break
if isinstance(pdb_obj, header_types):
old_header.write(str(pdb_obj))
old_header.write("\n")
return old_header.getvalue()
def print_pdb(args, pdb_lines, header_lines, missing_lines, is_cif):
"""Print PDB-format output to specified file
In PDB2PQR this function ignores hader lines which i want here
args : argparse.Namespace
command-line arguments
pdb_lines :
output lines (records)
header_lines :
header lines
missing_lines :
lines describing missing atoms
is_cif :
flag indicating CIF format
"""
with open(args.pdb_output, "wt") as outfile:
# Adding whitespaces if --whitespace is in the options
if missing_lines:
pass
for line in header_lines:
outfile.write(line)
for line in pdb_lines:
if line[0:3] != "TER" or not is_cif:
outfile.write(line)
[docs]
def pdb2pqr(input_pdb, output, *args, **kwargs):
"""
Adds hydrogens to pdb structure, optional determines charges and repairs missing atoms.
Interface to pdb2pqr in interactive shell. Original source is at [1]_, [2]_, Please cite [3]_,[4]_.
From original documentation at [2]_ :
Adding a limited number of missing heavy (non-hydrogen) atoms to biomolecular structures.
Estimating titration states and protonating biomolecules in a manner consistent with favorable
hydrogen bonding. Assigning charge and radius parameters from a variety of force fields.
Generating “PQR” output compatible with several popular computational modeling and analysis packages.
Parameters
----------
input_pdb : string
Path to input pdb file. If only the pdb ID the corresponding file is downloaded.
output : string
Path of output file.
args : strings
Positional arguments are prepended by --xxx to represent options without input parameter
If '-' is in key exchange it by underscore '_'.
kwargs : pairs key=value
Keyword arguments for options with input value.
passed as '--key value '
If '-' is in key exchange it by underscore '_'.
Notes
-----
The PDB2PQR tool prepares structures for further calculations (by APBS) by reconstructing
missing atoms, **adding hydrogens**, assigning atomic charges and radii from specified
force fields, and generating PQR files.
`APBS <http://www.poissonboltzmann.org/>`_ solves the equations of continuum electrostatics for
large biomolecular assemblies.
Several programs use a modified PDB format called PQR, in which atomic partial charge (Q)
and radius (R) fields follow the X,Y,Z coordinate fields in ATOM and HETATM records.
See `PDB2PQR Server <https://server.poissonboltzmann.org/pdb2pqr>`_
or `PDB2PQR Homepage <https://server.poissonboltzmann.org/pdb2pqr>`_
There are other programs that allow addition of hydrogens:
- `CHARMM GUI <http://www.charmm-gui.org/?doc=input/pdbreader>`_ provides a web-based graphical
user interface to generate various molecular simulation systems and input files.
The output of *Input Generator/PDB Reader* in the file *step1_pdbreader.pdb* contains hydrogen atoms.
- PSFGEN is a tool (included in **VMD** or `NAMD <https://www.ks.uiuc.edu/Research/namd/>`_ )
to generate a protein structure file (PSF) for MD Simulations from a PDB structure. It can be used from
`VMD <https://www.ks.uiuc.edu/Research/vmd/>`_ using the *Automatic PSF Builder*
or on the `command line <https://www.ks.uiuc.edu/Research/vmd/plugins/psfgen/>`_.
- HAAD
https://zhanglab.ccmb.med.umich.edu/HAAD/
https://doi.org/10.1371/journal.pone.0006701
**Original help from pdb2pqr30** ::
PDB2PQR v3.1.0+15.g41d841a.dirty: biomolecular structure conversion software.
positional arguments:
input_path Input PDB path or ID (to be retrieved from RCSB database
output_pqr Output PQR path
optional arguments:
-h, --help show this help message and exit
--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
Logging level (default: INFO)
Mandatory options:
One of the following options must be used
--ff {AMBER,CHARMM,PARSE,TYL06,PEOEPB,SWANSON}
The forcefield to use. (default: PARSE)
--userff USERFF The user-created forcefield file to use. Requires --usernames and overrides --ff (default: None)
--clean Do no optimization, atom addition, or parameter assignment, just return the original PDB file in aligned format. Overrides
--ff and --userff (default: False)
General options:
--nodebump Do not perform the debumping operation (default: True)
--noopt Do not perform hydrogen optimization (default: True)
--keep-chain Keep the chain ID in the output PQR file (default: False)
--assign-only Only assign charges and radii - do not add atoms, debump, or optimize. (default: False)
--ffout {AMBER,CHARMM,PARSE,TYL06,PEOEPB,SWANSON}
Instead of using the standard canonical naming scheme for residue and atom names, use the names from the given forcefield
(default: None)
--usernames USERNAMES
The user-created names file to use. Required if using --userff (default: None)
--apbs-input APBS_INPUT
Create a template APBS input file based on the generated PQR file at the specified location. (default: None)
--pdb-output PDB_OUTPUT
Create a PDB file based on input. This will be missing charges and radii (default: None)
--ligand LIGAND Calculate the parameters for the specified MOL2-format ligand at the path specified by this option. (default: None)
--whitespace Insert whitespaces between atom name and residue name, between x and y, and between y and z. (default: False)
--neutraln Make the N-terminus of a protein neutral (default is charged). Requires PARSE force field. (default: False)
--neutralc Make the C-terminus of a protein neutral (default is charged). Requires PARSE force field. (default: False)
--drop-water Drop waters before processing biomolecule. (default: False)
--include-header Include pdb header in pqr file. WARNING: The resulting PQR file will not work with APBS versions prior to 1.5 (default: False)
pKa options:
Options for titration calculations
--titration-state-method {propka}
Method used to calculate titration states. If a titration state method is selected, titratable residue charge states will be
set by the pH value supplied by --with_ph (default: None)
--with-ph PH pH values to use when applying the results of the selected pH calculation method. (default: 7.0)
PROPKA invocation options:
-f FILENAMES, --file FILENAMES
read data from <filename>, i.e. <filename> is added to arguments (default: [])
-r REFERENCE, --reference REFERENCE
setting which reference to use for stability calculations [neutral/low-pH] (default: neutral)
-c CHAINS, --chain CHAINS
creating the protein with only a specified chain. Specify " " for chains without ID [all] (default: None)
-i TITRATE_ONLY, --titrate_only TITRATE_ONLY
Treat only the specified residues as titratable. Value should be a comma-separated list of "chain:resnum" values; for example:
-i "A:10,A:11" (default: None)
-t THERMOPHILES, --thermophile THERMOPHILES
defining a thermophile filename; usually used in 'alignment-mutations' (default: None)
-a ALIGNMENT, --alignment ALIGNMENT
alignment file connecting <filename> and <thermophile> [<thermophile>.pir] (default: None)
-m MUTATIONS, --mutation MUTATIONS
specifying mutation labels which is used to modify <filename> according to, e.g. N25R/N181D (default: None)
--version show program's version number and exit
-p PARAMETERS, --parameters PARAMETERS
set the parameter file [{default:s}] (default: /home/biehl/local/lib/python3.9/site-packages/propka/propka.cfg)
-o PH, --pH PH setting pH-value used in e.g. stability calculations [7.0] (default: 7.0)
-w WINDOW WINDOW WINDOW, --window WINDOW WINDOW WINDOW
setting the pH-window to show e.g. stability profiles [0.0, 14.0, 1.0] (default: (0.0, 14.0, 1.0))
-g GRID GRID GRID, --grid GRID GRID GRID
setting the pH-grid to calculate e.g. stability related properties [0.0, 14.0, 0.1] (default: (0.0, 14.0, 0.1))
--mutator MUTATOR setting approach for mutating <filename> [alignment/scwrl/jackal] (default: None)
--mutator-option MUTATOR_OPTIONS
setting property for mutator [e.g. type="side-chain"] (default: None)
-d, --display-coupled-residues
Displays alternative pKa values due to coupling of titratable groups (default: False)
-l, --reuse-ligand-mol2-files
Reuses the ligand mol2 files allowing the user to alter ligand bond orders (default: False)
-k, --keep-protons Keep protons in input file (default: False)
-q, --quiet suppress non-warning messages (default: None)
--protonate-all Protonate all atoms (will not influence pKa calculation) (default: False)
References
----------
.. [1] https://pdb2pqr.readthedocs.io/en/latest/
.. [2] https://github.com/Electrostatics/electrostatics.github.io
.. [3] Improvements to the APBS biomolecular solvation software suite.
Jurrus E, et al.
Protein Sci 27 112-128 (2018).
.. [4] PDB2PQR: expanding and upgrading automated preparation of biomolecular structures for molecular simulations.
Dolinsky TJ, et al.
Nucleic Acids Res 35 W522-W525 (2007).
"""
arglist = _getargs(input_pdb, output, *args, **kwargs)
parser = build_main_parser()
p2q_args = parser.parse_args(arglist)
main_driver(p2q_args)
[docs]
def fastpdb2pqr(input_pdb, debump=False, opt=False, drop_water=True, ph=None, whitespace=True):
"""
A fast version of :py:func:`~.bio.mda.pdb2pqr`.
Speedup is achieved by omitting optimization, debumping, minimized logging and reducing options.
For full options use pdb2pqr.
Parameters
----------
input_pdb : str
Input pdb file.
debump : bool, default False
Debump added atoms, ensure that new atoms are not rebuilt too close to existing atoms.
opt : bool, default False
Perform hydrogen optimization, default is not to do it,
Adjusts hydrogen positions and flips certain side chains (His, Asn, Glu)
as needed to optimize hydrogen bonds.
drop_water : bool
Drop water atoms.
ph : float, default None
pH value to use for assignment of charges.
If None pH 7 is assumed but PROPKA is not used.
Cite [5]_ [6]_ if using charge assignments by PROPKA.
whitespace : bool
Insert whitespaces between atom name and residue name, between x and y, and between y and z.
This improves readability but breaks PDB file definition.
Returns
-------
input_pdb.pqr, input_pdb_h.pdb (without previous suffix)
Notes
-----
Uses default options of pdb2pqr except of these.
- debump = False
- opt = False
- drop_water = True ; this reduces just te number of atoms not to get errors in mda
- whitespace; mda has problems as somewhere split() is used instead of char numbers as defined for pqr files
- pdb_output = input_pdb with prefix appended '_h'
Examples
--------
Use fastpdb2pqr and combine the '_h.pdb' file including ligands but without charges with the '_h.pqr file'
that cotains charges but no ligands.
To get charge states of ligands please use the web services or programs mentioned in :py:func:`pdb2pqr` .
Charges can be added manually for the ligands.
::
import jscatter as js
import MDAnalysis as mda
# this adds hydrogens to uni with ligands and adds charges in the corresponding '.pqr' file
uligand = js.bio.scatteringUniverse('3pgk')
uligand.add_TopologyAttr('charges') # all charges are zero
ucharge = js.bio.scatteringUniverse('3pgk_h.pqr')
protein = ucharge.select_atoms('protein').atoms
for l,c in zip(uligand.select_atoms('protein').residues, ucharge.select_atoms('protein').residues):
try:
# this throws an error if len(charges) is different; in that way same residues get correct charge
l.atoms.charges =c.atoms.charges
except:
print('---',l.resnum,c.resnum)
uligand.atoms.charges.sum() # = -1
References
----------
.. [1] https://pdb2pqr.readthedocs.io/en/latest/
.. [2] https://github.com/Electrostatics/electrostatics.github.io
.. [3] Improvements to the APBS biomolecular solvation software suite.
Jurrus E, et al.
Protein Sci 27 112-128 (2018).
.. [4] PDB2PQR: expanding and upgrading automated preparation of biomolecular structures for molecular simulations.
Dolinsky TJ, et al.
Nucleic Acids Res 35 W522-W525 (2007).
.. [5] Improved Treatment of Ligands and Coupling Effects in Empirical Calculation and Rationalization of pKa Values
Sondergaard, Chresten R., Mats HM Olsson, Michal Rostkowski, and Jan H. Jensen.
Journal of Chemical Theory and Computation 7, (2011): 2284-2295.
.. [6] PROPKA3: consistent treatment of internal and surface residues in empirical pKa predictions.
Olsson, Mats HM, Chresten R. Sondergaard, Michal Rostkowski, and Jan H. Jensen.
Journal of Chemical Theory and Computation 7, no. 2 (2011): 525-537.
"""
# define fast arguments for adding H
output_pqr = os.path.splitext(input_pdb)[0] + '_h.pqr'
output_pdb = os.path.splitext(input_pdb)[0] + '_h.pdb'
if ph:
arglist = _getargs(input_pdb, output_pqr, with_ph=f'{ph:.1f}', titration_state_method='propka')
else:
arglist = _getargs(input_pdb, output_pqr)
parser = build_main_parser()
args = parser.parse_args(arglist)
# set some args to get fast result
args.debump = debump
args.opt = opt
args.assign_only = False
args.clean = False
args.userff = None
args.ffout = None
args.neutraln = False
args.neutralc = False
args.ligand = None
args.keep_chain = True
args.drop_water = drop_water
args.whitespace = whitespace
args.pdb_output = output_pdb
definition = pdb2pqr_io.get_definitions()
pdblist, is_cif = pdb2pqr_io.get_molecule(input_pdb)
if args.drop_water:
pdblist = pdb2pqr_drop_water(pdblist)
biomolecule, definition, ligand = pdb2pqr_setup_molecule(pdblist, definition, args.ligand)
biomolecule.set_termini(args.neutraln, args.neutralc)
biomolecule.update_bonds()
# do the magic
results = pdb2pqr_non_trivial(args=args,
biomolecule=biomolecule,
ligand=ligand,
definition=definition,
is_cif=is_cif)
# write output
print('write ', output_pqr)
pdb2pqr_print_pqr(args=args,
pqr_lines=results["lines"],
header_lines=results["header"],
missing_lines=results["missed_residues"],
is_cif=is_cif)
print('write ', output_pdb)
pdb_lines = pdb2pqr_io.print_biomolecule_atoms(biomolecule.atoms, chainflag=args.keep_chain, pdbfile=True)
header_lines = get_old_header(pdblist)
print_pdb(args=args,
pdb_lines=pdb_lines,
header_lines=header_lines,
missing_lines=results["missed_residues"],
is_cif=is_cif)
return output_pqr, output_pdb
[docs]
def addH_Pymol(pdbid):
"""
Add hydrogens to pdb file using PyMol if present.
This works only for PyMol installations if `pymol2` can be imported.
`pymol2` provides an additional interface to PyMol.
Alternativly this can be done by saving to PDB file ; using PymMol and save it again.
Parameters
----------
pdbid : string
PDB id or filename of PDB file
Returns
-------
Filename of saved file.
Notes
-----
From https://pymolwiki.org/index.php/H_Add ::
PyMOL fills missing valences but does no optimization of hydroxyl rotamers.
Also, many crystal structures have bogus or arbitrary ASN/GLN/HIS orientations.
Getting the proper amide rotamers and imidazole tautomers & protonation states
assigned is a nontrivial computational chemistry project involving
electrostatic potential calculations and a combinatorial search.
There's also the issue of solvent & counter-ions present in systems like
aspartyl proteases with adjacent carboxylates .
For higher accuracy (optimization) use :py:func:`~jscatter.bio.mda.pdb2pqr` .
"""
# TODO simplify this ?
if pymol2:
p1 = pymol2.PyMOL()
p1.start()
if os.path.isfile(pdbid):
pdb = pdbid
p1.cmd.load(pdb)
pdb, end = os.path.splitext(pdb)
else:
pdb = os.path.splitext(pdbid)[0].lower()
p1.cmd.fetch(pdb)
end = '.pdb'
# remove alternate conformations
# TODO select alternate location
p1.cmd.remove('not alt ""+A')
p1.cmd.alter('all', 'alt=""')
p1.cmd.h_add()
# save it
p1.cmd.save(pdb+'_h'+end)
p1.stop()
return pdb+'_h'+end
else:
raise ModuleNotFoundError('pymol not installed. Use pdb2pqr.')