# -*- coding: utf-8 -*-
# written by Ralf Biehl at the Forschungszentrum Jülich ,
# Jülich Center for Neutron Science 1 and Institute of Complex Systems 1
# Jscatter is a program to read, analyse and plot data
# Copyright (C) 2015-2024 Ralf Biehl
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
Functions to read DLS data, apply the CONTIN analysis and plot the CONTIN results.
"""
import io
import codecs
import collections
import locale
import numbers
import os
import sys
import time
import shutil
import subprocess
from math import pi
import numpy as np
from . import formel
from . import GracePlot
from .dataarray import dataArray as dA
from .dataarray import dataList as dL
# As CONTIN uses cgs units we use these here too
kb = 1.3806505e-16 # cm2 g s-2 K-1 = erg/K =1e6 J/K
path = os.path.realpath(os.path.dirname(__file__))
# find continexe
continexe = shutil.which('contin')
def _getfirst(a):
""" this allows to have float or list of float (take first)"""
try:
return a[0]
except:
return a
def _w2f(word):
"""converts if possible to float"""
try:
return float(word)
except ValueError:
return word
def _visc(T, **kw):
# T in degrees
# a viscosity calculation based on temperature
# calc viscosity in cPoise=1e3*[Pa*s]
if 'v' in kw and kw['v'][0] == 'd':
v = 1e3 * formel.viscosity(mat='d2o', T=T) # heavy water
elif 'v' in kw and kw['v'][0] == 'h':
v = 1e3 * formel.viscosity(mat='h2o', T=T)
elif 'v' in kw:
v = kw['v']
else:
v = 1e3 * formel.viscosity(mat='h2o', T=T) # default to normal water
return v # in cPoise
def _massweight(ci, qq, Ri):
"""
Transform intensity weight to mass/volume weight
"""
mw = ci * Ri ** 3 * qq ** 6 / (np.sin(qq * Ri) - qq * Ri * np.cos(qq * Ri)) ** 2
# norm = np.trapz(mw, Ri)
norm = mw.sum()
return mw/norm
def _numberweight(ci, qq, Ri):
# transform intensity weight to number weight
nw = ci * qq ** 6 / (np.sin(qq * Ri) - qq * Ri * np.cos(qq * Ri)) ** 2
# norm = np.trapz(nw, Ri)
norm = nw.sum()
return nw/norm
[docs]
def contin(datalist, Ngrid=256, tmin=-2, tmax=-2, bgr=0, distribution='dls', RDG=-1, timescale=1e-6, **kw):
r"""
Inverse Laplace transform known as CONTIN analysis developed by Steven Provencher [1,2,4]_ .
This is a wrapper for the original FORTRAN program from Steven Provencher.
The CONTIN analysis is developed for heterodisperse, polydisperse, and multimodal systems that cannot
be resolved with the cumulant method. The resolution for separating two different particle
populations is approximately a factor of five or higher and the difference in relative
intensities between two different populations should be less than 1:10−5. (from Wikipedia and own experience).
Iuser[i] and Rxx are defined in the original Fortran code.
Parameters
----------
datalist : dataList or single dataArray
Correlation data as measured e.g. by DLS.
Check type parameter dependent on input data g1 or g2!
Each dataArray is processed.
timescale : float, default 1e-6
Timescale factor. CONTIN uses internal units of seconds.
As typical instruments use microseconds the time axis needs to be scaled by this factor.
tmin,tmax : float,int
First/last time value to use in fit.
If tmin/tmax are negative the number of points are dropped at begin/end of input data.
e.g. min=-5 drops the 5 shortest time points; for max the 5 last points
Ngrid : int
Number of grid points between gmin and gmax.
gmin/gmax : float
First/last time value in distribution X scale.
If not given calculated from tmin/tmax according to used kernel.
qgmin/qgmax : float
Only for 'relax'. Same as gmin,gmax except that gmin=qgmin/q^2.
qtmin/qtmax : float
q^2*t scaled times.
bgr : float; default 0
If >0: assume background and fit it.
typ : int; default 0
- 0: input is g1 (field correlation)
- 1: input is intensity correlation g2 as measured by DLS; g1= sign(g2/R21-1)*abs(g2/R21-1)^0.5
- -1: input is g2-1, takes only the square root; sign preserved as for 1.
ALV and Malvern Zetasizer save g2-1
distribution : string, default='x' for DLS,
Selects distribution type (Iuser(10)) as {'m':1,'D':2,'L':2,'r':3,'u':4,'x':4,'d':4,'T':4}
using RUSER(15,16,17,19).
The kernel :math:`g_1=l^{R23}*exp(-R21*t*l^{R22})` describes always the field correlation.
- 'x' intensity weight relaxation times distribution l=T -> exp(-t/T)
R21=1, R22=-1, R23=0
- 'molweight'='mass' weight fraction molecular weight (mw) distribution l=mw -> mw*exp(-q**2*R18**t*mw**R22)
- R18 to be set in input: relates diffusion coefficient to mol weight
- R22 to be set in input: exponent e.g.for polymers IN THETA solvent Diffusion=R18*l^0.5
- R21 = R20**2*R18=q**2*R18
- R23=1
- l=mw and field(0)~ mw for weight fraction (intensity ~ mw**2)
- 'Diff' intensity weight diffusion coefficient distribution with l=Diff -> exp(-q**2*t*Diff);
- R23 =0
- R22 =1
- R21 =R20^2=q^2
- l is therefore Diffusion coefficient
- 'Laplace' Laplace transform with exp(-t*Gamma)
- R23=0
- R22=1
- sets R16=0 (wavelength) => R20=undefined and R21=1
- l is therefore relaxation rate with transform exp(-t*l)
- 'radius' weight fraction radius distribution of spheres satisfying stokes law with
- l=rh -> rh**3*exp(-q**2* [k_B*T*/(0.06*pi*visc*rh)] * t)
- R23=3
- R22=-1
- R21=k_B*R18*R20^2/(0.06pi*R19)
- and l=rh is a the hydrodynamic radius
- weight fraction as field(0) ~ V ~ rh**3 for spheres
- 'T1' intensity weight relaxation times distribution l=T1 -> T1*exp(-t/T1)
- for nmr rotational diffusion T1 with R23=-1 R21=1, R22=-1 neglecting T2
- intensity ~ number of contributing atoms ~M ~ V ~ Rh**3
- rot correlation time tc=(4pi*visc/kT)*Rh**3 -> intensity(0) ~ tc -> R23=1
- 'dls' intensity weight radius distribution of spheres satisfying stokes law
- with l=rh -> exp(-q**2* [k_B*T*/(0.06*pi*visc*rh)] * t)
- R21 = k_B*T*/(0.06*pi*visc)
- R22=-1, R23=0
- 'user' general case RUSER(15,16,17,19,21,22,23) all user specified
e.g.R16=0,R23=0,R22=-1,R21=0 is laplace with Gamma^-1
RDG : 1,-1, default = -1
If 1 use Rayleigh Debye Gans formfactor with wall thickness WALL [cm] of hollow sphere (default WALL =0)
distribution needs to be the 'radius' distribution.
l : float , R16 overrides value in dataArray if given
Overrides wavelength in data and set it to 632 nm
(except for Laplace and relaxation then l=0 overrides this because no wavelength is used).
n : float, R15, overrides value in dataArray if given
Refractive index of solvent, default 1.333.
a : float, R17, overrides value in dataArray if given
Scattering angle in degrees , default 90°.
T : float, R18, overrides value in dataArray if given
Absolute Temperature in K or factor ; default is 293K
WALL : float, R24, overrides value in dataArray if given
Wall thickness in cm for simulation of hollow spheres.
- =0 normal sphere as default
- If WALL<0 -> WALL=0
v : float, R19, overrides value in dataArray if given
Viscosity in centipoise; 1cPoise=1e-3 Pa*s
- If v='d2o' or 'h2o' the viscosity of d2o or h2o at the given Temperature T is used
- default is h2o at T
write : any
If write is given as keyword argument (write=1) the contin output is writen to a file with '.con' appended.
Returns
-------
dataList : input with best solution added as attributes prepended by 'contin_'
- .contin_bestFit : **best fit result**
- .contin_result_fit : fitted correlation function
- .contin_fits : sequence of "not best fits" in CONTIN with same structure as best Fit
- .contin_alpha : ALPHA of best solution
- .contin_alphalist : list of ALPHA in fits
bestFit dataArray with best solution distribution content
- .contin_bestFit contains
[*relaxationtimes, intensityweightci, errors, hydrodynamicradii, massweightci, numberweightci*]
The columns can be adressed with underscore as e.g. *.contin_bestFit._hydrodynamicradii*.
See second example.
Weighted ci are normalized that the *sum* over all points is equal 1.
The corresponding probability is the probability of the interval around the corresponding
relaxation time or Rh. Remember that because of the log scale the interval change width.
- .contin_bestFit.attr() shows all available parameters from CONTIN fit and other parameters
- .contin_bestFit.fitquality measure for the fit quality ALPHA and ...
Peak results from CONTIN :
- .contin_bestFit.peaks **intensity weight relaxation time distribution** as dict, primary result
- .contin_bestFit.ipeaks **intensity weight relaxation time distribution** for all peaks with :
- [weight : peak weight (CONTIN output)
- mean : peak mean (CONTIN output)
- std : peak standard deviation (CONTIN output)
- mean_err : error of mean (CONTIN output)
- imean : index of mean
- 1/(q**2*mean) : diffusion constant in cm**2/s determined from mean
- Rh : hydrodynamic radius in cm determined from mean
- q : wavevector in 1/cm
- 1/(q**2*mean) : diffusion constant in nm**2/ns
- Rh : hydrodynamic radius in nm determined from mean
- q] : wavevector in 1/nm
- .contin_bestFit.mpeaks : **peaks mass weight** same content as ipeaks for 2 strongest
- .contin_bestFit.npeaks : **peaks number weight** same content as ipeaks for 2 strongest
- .contin_bestFit.ipeaks_name : content of ipeaks
- .contin_bestFit.info :
- .contin_bestFit.baseline : baseline + error
- .contin_bestFit.momentEntireSolution : all peaks together
- .contin_bestFit.maximum : maximum value
- .contin_bestFit.filename :
- .contin_bestFit.imaximum : index maximum value
Notes
-----
CONTIN source and executable can be downloaded from http://s-provencher.com/pages/contin.shtml.
For using CONTIN and the wrapper you need a working CONTIN executable in your path.
::
# Download the FORTRAN source code from his web page in a browser or
wget http://s-provencher.com/pub/contin/contin.for.gz
gunzip contin.for.gz
# compile with gfortran
gfortran contin.for -o contin -fallow-argument-mismatch -std=legacy
# move to a place in your PATH e.g. $HOME/local/bin
mv contin $HOME/local/bin/contin
# check if it is executable, open a new shell
which contin
# if not found check if the path is in your PATH variable and set it in .bashrc or .profile
# if still not found may be its not executable; so make it
chmod u+x $HOME/local/bin/contin
If Peaks seem small please integrate them as the peak area determines the contributed intensity (see last example).
Peak area is strongly dependent on the peak point separation dxi = (x[i]-x[i-1]) in particular on a log scale.
This weight is in some plots added to the probability as Pi*dxi resulting in equal height.
See [5]_ for details.
::
ff=result.contin_bestFit
p1=(10<ff.X) &(ff.X<100) # peak borders 10 and 100
fp1=np.trapz(ff.Y[p1],ff.X[p1])/np.trapz(ff.Y,ff.X) # fraction of full signal
**An example dataset with noise**
In this synthetic example the noise causes a peak width of about 20% (std/mean) as it is
typically found for monodisperse samples measured short times (e.g. proteins on a Zetasizer in automatic mode).
This contribution is not caused by polydispersity and always present dependent on noise level.
Noise level can be reduced using longer measurement times, thus reducing the width.
In real measurements one needs to test with different measurement times if polydispersity is present
(thus independent on noise level).
- D = 0.05 nm²/ns corresponds to a hydrodynamic radius of 4.4 nm and results in a
relaxation time :math:`\Gamma= 1/(q^2D) \approx 58 \mu s`.
- The below plots shows the CONTIN result as correlation time distribution
with mean 58 µs ±12 µs and peak weight 95% in good approximation.
- The second plot shows the radius distribution for different weights.
Probabilities are for an interval around a point x value that the sum of all points is normalized.
(not the integral on the log scale).
- For real measurements one may cut noisy short times or dust contaminated long times.
::
import jscatter as js
import numpy as np
t=js.loglist(1,10000,1000) #times in microseconds
q=4*np.pi*1.333/632*np.sin(np.deg2rad(90)/2) # 90 degrees for 632 nm , unit is 1/nm**2
D=0.05*1000 # nm**2/ns * 1000 = units nm**2/microseconds
noise=0.0001 # typical < 1e-3
data=js.dA(np.c_[t,0.95*np.exp(-q**2*D*t)+noise*np.random.randn(len(t))].T)
# add attributes to overwrite defaults
data.Angle =90 # scattering angle in degrees
data.Temperature=293 # Temperature of measurement in K
data.Viscosity =1 # viscosity cPoise
data.Refractive =1.333 # refractive index
data.Wavelength =632 # wavelength
# do CONTIN
dr=js.dls.contin(data,distribution='x')
js.dls.contin_display(dr) # display overview
# same to demonstrate access to distributions
bf =dr[0].contin_bestFit
p=js.grace(1.,1.)
p.multi(2,1,vgap=0.25)
# access correlation function and relaxation time distribution
p[0].plot(data,sy=[1,1,1],legend='simulated data')
p[0].plot(dr[0],li=[1,3,3],sy=0,le='contin result')
p[0].plot(bf.X,bf.Y*10,sy=[2,0.5,2],le='relaxation time distribution')
p[0].xaxis(scale='log',label=r'\xG\f{} / µs',min=1,max=1e5)
p[0].yaxis(label=r'G\s1\N / P(\xG\f{})',min=0,max=1.1)
p[0].legend(x=300,y=0.7)
# Hydrodynamic radius distribution in different weights
p[1].plot(bf[3],bf[1],sy=[2,0.5,2],li=[1,1,''],le='intensity weight')
p[1].plot(bf[3],bf[4],sy=[2,0.5,3],li=[1,1,''],le='mass weight')
p[1].plot(bf[3],bf[5],sy=[2,0.5,4],li=[1,1,''],le='number weight')
p[1].xaxis(scale='log',label=r'R\sh\N / cm',min=1e-7,max=1e-5)
p[1].yaxis(label=r'P(R\sh\N)',min=0,max=0.1)
p[1].legend(x=1.5e-6,y=0.1)
# Rh of first peak
p[1].text(f'Rh from contin {bf.ipeaks[0,6]:.2g} cm',x=1.3e-6,y=0.04)
# p.save(js.examples.imagepath+'/contin.jpg')
# access peak values by
dr[0].contin_bestFit.ipeaks_name
dr[0].contin_bestFit.ipeaks
# calc std/mean of first peak
dr[0].contin_bestFit.ipeaks[0,2]/dr[0].contin_bestFit.ipeaks[0,1]
.. image:: ../../examples/images/contin.jpg
:align: center
:width: 50 %
:alt: sphere
**Bimodal distribution** and the suppression of larger aggregates in mass and number weight.
The two particles sizes with a factor of 20 in radius shows same contribution to the scattering.
For mass or number weight the dependence of the scattering intensity on :math:`R^6`
compensates the contribution of larger aggregates.
The area under both intensity weighted peaks is equal as can be shown by integration.
::
import jscatter as js
import numpy as np
t=js.loglist(0.125,10000,1000) #times in microseconds
q=4*np.pi*1.333/632*np.sin(np.deg2rad(90)/2) # 90 degrees for 632 nm , unit is 1/nm**2
D=0.05*1000 # nm**2/ns * 1000 = units nm**2/microseconds
noise=0.0001 # typical < 1e-3
data=js.dA(np.c_[t,0.45*np.exp(-q**2*D*t)+0.45*np.exp(-q**2*D*20*t) +noise*np.random.randn(len(t))].T)
# add attributes to overwrite defaults
data.Angle =90 # scattering angle in degrees
data.Temperature=293 # Temperature of measurement in K
data.Viscosity =1 # viscosity cPoise
data.Refractive =1.333 # refractive index
data.Wavelength =632 # wavelength
# do CONTIN
dr=js.dls.contin(data,distribution='x')
# same to demonstrate access to distributions
bf =dr[0].contin_bestFit
p=js.grace(1.,1.)
p.multi(2,1,vgap=0.25)
# access correlation function and relaxation time distribution
p[0].plot(data,sy=[1,1,1],legend='simulated data')
p[0].xaxis(scale='log',label=r'\xG\f{} / µs',min=1,max=1e5)
p[0].yaxis(label=r'G\s1\N / P(\xG\f{})')
p[0].plot(dr[0],li=[1,3,3],sy=0,le='contin result')
p[0].plot(bf.X,bf.Y*10,sy=[2,0.5,2],le='relaxation time distribution')
p[0].legend(x=20,y=0.8)
# Hydrodynamic radius distribution in different weights
p[1].plot(bf._hydrodynamicradii,bf._intensityweightci, sy=[2,0.5,2],li=[1,1,''],le='intensity weight')
p[1].plot(bf._hydrodynamicradii,bf._massweightci, sy=[2,0.5,3],li=[1,1,''],le='mass weight')
p[1].plot(bf._hydrodynamicradii,bf._numberweightci, sy=[2,0.5,4],li=[1,1,''],le='number weight')
p[1].xaxis(scale='log',label=r'R\sh\N / cm',min=1e-9,max=1e-5)
p[1].yaxis(label=r'P(R\sh\N)',min=0,max=0.12)
p[1].legend(x=1.5e-7,y=0.1)
# Rh of first two peaks
p[1].text(f'Rh_1 ={bf.ipeaks[0,6]:.3g} cm \\nRh_2 ={bf.ipeaks[1,6]:.3g} cm',x=1.3e-7,y=0.04)
# p.save(js.examples.imagepath+'/contin_bimodal.jpg')
# Peak integration resulting in about 0.5 for both
p1=(1<bf.X) &(bf.X<20) # peak borders 10 and 100
fp1=np.trapz(bf.Y[p1],bf.X[p1])/np.trapz(bf.Y,bf.X) #
p2=(40<bf.X) &(bf.X<400) # peak borders 10 and 100
fp2=np.trapz(bf.Y[p2],bf.X[p2])/np.trapz(bf.Y,bf.X) #
.. image:: ../../examples/images/contin_bimodal.jpg
:align: center
:width: 50 %
:alt: sphere
R20 (scattering vector) is calculated as R20= 4e-7*pi*R15/R16*sin(R17/2), if R16!=0. else R20=0
References
----------
.. [1] CONTIN: A general purpose constrained regularization program for inverting
noisy linear algebraic and integral equations
Provencher, S Computer Physics Communications 27: 229.(1982)
doi:10.1016/0010-4655(82)90174-6.
.. [2] http://s-provencher.com/pub/contin/cpc2.pdf.
.. [3] A constrained regularization method for inverting data represented by linear algebraic or integral equations
Provencher, S. W. Comp. Phys. Commu 27: 213–227. (1982)
doi:10.1016/0010-4655(82)90173-4
.. [4] Inverse problems in polymer characterization: Direct analysis of polydispersity with photon correlation
spectroscopy.
S.W. Provencher, Makromol. Chem. 180, 201 (1979).
.. [5] Transformation Properties of Probability Density Functions
Stanislav Sýkora
Permalink via DOI: 10.3247/SL1Math04.001
Original code description in CONTIN
::
C THE FOLLOWING ARE THE NECESSARY INPUT - 0460
C 0461
C DOUSIN = T (DOUSIN MUST ALWAYS BE .TRUE..) 0462
C 0463
C LUSER(3) = T, TO HAVE FORMF2, THE SQUARED FORM FACTORS, COMPUTED IN 0464
C USERK. 0465
C = F, TO SET ALL THE FORMF2 TO 1. (AS WOULD BE APPROPRIATE 0466
C WITH LAPLACE TRANSFORMS). 0467
C RUSER(24) MAY BE NECESSARY INPUT TO SPECIFY THE FORM FACTOR (E.G., 0468
C THE WALL THICKNESS OF A HOLLOW SPHERE) IF LUSER(3)=T. SEE 0469
C COMMENTS IN USERK. 0470
C IUSER(18) MAY BE NECESSARY INPUT IF LUSER(3)=T (E.G., TO SPECIFY THE 0471
C NUMBER OF POINTS OVER WHICH THE SQUARED FORM FACTORS WILL 0472
C BE AVERAGED). SEE COMMENTS IN USERK. 0473
C 0474
C RUSER(16) = WAVELENGTH OF INCIDENT LIGHT (IN NANOMETERS), 0475
C = 0, IF RUSER(20), THE MAGNITUDE OF THE SCATTERING VECTOR 0476
C (IN CM**-1), IS NOT TO BE COMPUTED. WHEN 0477
C RUSER(16)=0, RUSER(15) AND RUSER(17) NEED NOT BE 0478
C INPUT, AND CONTIN WILL SET RUSER(21)=1 0479
C (AS APPROPRIATE WITH LAPLACE TRANSFORMS). 0480
C 0481
C RUSER(15) = REFRACTIVE INDEX. 0482
C RUSER(17) = SCATTERING ANGLE (IN DEGREES). 0483
C 0484
C 0485
C IUSER(10) SELECTS SPECIAL CASES OF USERK FOR MORE CONVENIENT USE. 0486
C 0487
C IUSER(10) = 1, FOR MOLECULAR WEIGHT DISTRIBUTIONS FROM PCS 0488
C (WHERE THE SOLUTION, S(G), IS SUCH THAT S(G)DG IS 0489
C THE WEIGHT FRACTION WITH MOLECULAR WEIGHT BETWEEN 0490
C G AND G+DG). 0491
C CONTIN SETS - 0492
C RUSER(23) = 1., 0493
C RUSER(21) = RUSER(18)*RUSER(20)**2. 0494
C (SEE ABOVE DISCUSSION OF RUSER(16).) 0495
C YOU MUST INPUT - 0496
C RUSER(18) TO SATISFY THE EQUATION (IN CGS UNITS) - 0497
C (DIFFUSION COEFF.)=RUSER(18)*(MOL. WT.)**RUSER(22). 0498
C RUSER(22) (MUST ALSO BE INPUT, TYPICALLY ABOUT -.5). 0499
C 0500
C IUSER(10) = 2, FOR DIFFUSION-COEFFICIENT DISTRIBUTONS OR LAPLACE 0501
C TRANSFORMS (WHERE G IS DIFF. COEFF. IN CM**2/SEC 0502
C OR, E.G., TIME CONSTANT). 0503
C CONTIN SETS - 0504
C RUSER(23) = 0., 0505
C RUSER(22) = 1., 0506
C RUSER(21) = RUSER(20)**2 (SEE ABOVE DISCUSSION 0507
C OF RUSER(16).). 0508
C 0509
C IUSER(10) = 3, FOR SPHERICAL-RADIUS DISTRIBUTIONS, ASSUMING THE 0510
C EINSTEIN-STOKES RELATION (WHERE THE SOLUTION, S(G), 0511
C IS SUCH THAT S(G)DG IS THE WEIGHT FRACTION OF 0512
C PARTICLES WITH RADIUS (IN CM) BETWEEN G AND G+DG. 0513
C WEIGHT-FRACTION DISTRIBUTIONS YIELD BETTER SCALED 0514
C PROBLEMS THAN NUMBER-FRACTION DISTRIBUTIONS, WHICH 0515
C WOULD REQUIRE RUSER(23)=6.) 0516
C CONTIN SETS - 0517
C RUSER(23) = 3., 0518
C RUSER(22) = -1., 0519
C RUSER(21) = RUSER(20)**2*(BOLTZMANN CONST.)* 0520
C RUSER(18)/(.06*PI*RUSER(19)). 0521
C (SEE ABOVE DISCUSSION OF RUSER(16).) 0522
C YOU MUST HAVE INPUT - 0523
C RUSER(18) = TEMPERATURE (IN DEGREES KELVIN), 0524
C RUSER(19) = VISCOSITY (IN CENTIPOISE). 0525
C 0526
C IUSER(10) = 4, FOR GENERAL CASE, WHERE YOU MUST HAVE INPUT - 0527
C RUSER(J), J = 21, 22, 23. 0528
C 0529
C 0530
"""
# contin is called in a shell like: contin <input.txt >output.txt
# we mimic this by subprocess.run
if continexe is None:
raise Exception('There is no contin executable found. ' +
'Please compile and place executable at callable path. ' +
'See documentation of DLS module.')
if hasattr(datalist, '_isdataArray'):
datalist = dL(datalist)
elif isinstance(datalist, str):
datalist = dL(datalist)
if len(datalist) == 0:
raise NameError('There are no data in datalist')
# some consistency tests
if 'typ' in kw:
typ = kw['typ']
else:
typ = 0
if 'R22' in kw:
if kw['R22'] == 0:
raise ValueError('R22 is not allowed to be equal zero if given')
# some defaults and functions #######################################################
if distribution[0] == 'L':
# set R16 =0 to get a real laplace l is otherwise the wavelength
l = 0
elif 'l' in kw:
l = kw['l'] # which is set with a default here
else:
l = 632. # in nm
if 'T' in kw:
T = kw['T']
del kw['T']
if T < 273:
print('Warning: temperature below ZERO!! ')
elif 'R18' in kw:
T = kw['R18'] # here we misuse the T as the proportionality constant for kernel molweight
else:
T = 293. # in K
if 'n' in kw:
n = kw['n'] # default refractive index
else:
n = 1.333
if 'a' in kw:
a = kw['a'] # default angle
else:
a = 90. # in degrees
if bgr != 0:
bgr = 1 # no background
# wavevector in 1/cm
qq = lambda n, ll, theta: 4. * pi * n / (ll * 1e-7) * np.sin(np.deg2rad(theta) / 2.)
# hydrodynamic radius Rh(relaxationtime) in cm, gamma is relaxation time visc in cPoise
Rh = lambda gamma, q, T, visc: kb * T / (pi * 0.06 * visc) * q ** 2 * gamma
##########################################################
# DEFINES THE kind of the distribution kernel
distr = {'m': 1,
'D': 2, 'L': 2,
'r': 3,
'u': 4, 'x': 4, 'd': 4, 'T': 4}
if distribution[0] == 'm' and 'R18' not in kw.keys() and 'R21' not in kw.keys():
print('please provide R18 and R22 in input to function')
print("to describe D=R18*molWeight^R22 e.g.polymer R22=0.5")
return
if 'R22' in kw:
R22 = float(kw['R22'])
else:
R22 = 0
if 'R21' in kw:
R21 = float(kw['R21'])
else:
R21 = 0
if 'R23' in kw:
R23 = float(kw['R23'])
else:
R23 = 0
edist = ('x', 'T', 'd') # here we set R21-R23 explicitly in the later header part
if distribution[0] == 'x': # l^R23*exp(-R21*t*l^R22) with R21=1,R22=-1,R23=0 ==> l=1/T -> exp(-t/T)
R21 = 1
R22 = -1
R23 = 0
if distribution[0] == 'T': # l^R23*exp(-R21*t*l^R22) with R21=1,R22=-1,R23=1 ==> l=T -> T*exp(-t/T)
R21 = 1
R22 = -1
R23 = 1
if distribution[0] == 'd': # l^R23*exp(-R21*t*l^R22) with R22=-1,R23=0 ==> l=Rh -> 1*exp(-R21*t/Rh)
# I(t)=exp(- [ q**2*k_B*T*/(0.06*pi*visc)] * t/Rh )
# l-> rh ; t=t ; R21-> q**2* [k_B*T*/(0.06*pi*visc)]
R21 = kb * T / (0.06 * pi * _visc(T, **kw)) * qq(n, l, a) ** 2 # D*Rh * Q**2
R22 = -1
R23 = 0
# write a common header for CONTIN as ASCII inputfile
last = 1 # for multi dataset evaluation this is -1 (false) except for the last one
# we process here ONLY single files
elements = 40 # just to have an array for it; last 2 lines are "end" and NY
header = np.array([''] * elements, dtype='|S70') # a single fortran line
header[0] = 'filename'.ljust(70) # the loaded file
header[1] = 'LAST'.ljust(6) + str().rjust(5) + ('%15.4E' % last) # see above
# header[2]='GMNMX'.ljust(6)+str(1).rjust(5)+('%15.4E' % gmin) # first point of the distribution to fit
# header[3]='GMNMX'.ljust(6)+str(2).rjust(5)+('%15.4E' % gmax) # last point of the distribution to fit
header[4] = 'IWT'.ljust(6) + str().rjust(5) + (
'%15.4E' % 5) # fit strategy how to determine errors -> 5 from a prefit, results in 2 fits but good errors
# unweighted fit IWT=1 ->errors equal; IWT=4 direct input of errors not implemented
header[5] = 'NERFIT'.ljust(6) + str().rjust(5) + (
'%15.4E' % 0) # number of points around a point to determine error; safety margin default 10; we use 0
header[6] = 'NINTT'.ljust(6) + str().rjust(5) + (
'%15.4E' % -1) # number of equally spaced sets in tk; <0 means direct input as used here
header[7] = 'IFORMT'.ljust(6) + str().rjust(20) # format of time variable for direct input
header[8] = '(1E12.5)'.ljust(26) # 1 in a row
header[9] = 'IFORMY'.ljust(6) + str().rjust(20) # format of y variable for direct input correlation
header[10] = '(1E12.5)'.ljust(26) # 1 in a row
if 'IGRID' in kw.keys():
# Grid=2 is log grid ; 1 is equally spaced grid; default 2= log grid
header[11] = 'IGRID'.ljust(6) + str().rjust(5) + ('%15.4E' % float(kw['IGRID']))
header[12] = 'NLINF'.ljust(6) + str().rjust(5) + ('%15.4E' % bgr) # allows a single const background , 0 no bgr
header[13] = 'NG'.ljust(6) + str().rjust(5) + ('%15.4E' % Ngrid) # Ngrid points between gmin,gmax
header[14] = 'DOUSIN'.ljust(6) + str().rjust(5) + (
'%15.4E' % 1) # Do User INPUT ; to use the below given values anyway this is the default
header[15] = 'IUSER'.ljust(6) + str(10).rjust(5) + (
'%15.4E' % distr[distribution[0]]) # selects the kernel see help above
header[16] = 'RUSER'.ljust(6) + str(15).rjust(5) + ('%15.4E' % n) # refractive index
header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % l) # wavelength in nm
header[18] = 'RUSER'.ljust(6) + str(17).rjust(5) + ('%15.4E' % a) # scattering angle in degrees
header[19] = 'RUSER'.ljust(6) + str(18).rjust(5) + (
'%15.4E' % T) # absolute Temperature in K or proportionality constant
header[20] = 'RUSER'.ljust(6) + str(19).rjust(5) + ('%15.4E' % _visc(T, **kw)) # viscosity in centipoise
header[25] = 'RUSER'.ljust(6) + str(10).rjust(5) + ('%15.4E' % typ) # (0) means don't change; input is g1;
# (1) input is intensity correlation g2; => calculate (g2/R21-1)^0.5
# (-1) input is g2-1, takes only the square root
# ALV and Zetasizer Data are g2-1 -> -1
header[26] = 'LUSER'.ljust(6) + str(3).rjust(5) + (
'%15.4E' % RDG) # use rayleighDebyeGans formfactor (set to true => 1) or const 1 (set to false => -1 )
if 'R16' in kw.keys(): header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % float(kw['R16']))
if 'WALL' in kw.keys(): header[24] = 'RUSER'.ljust(6) + str(24).rjust(5) + ('%15.4E' % float(
kw['WALL'])) # wall thickness in cm in RDG for simulating hollow spheres =0 normal sphere
if 'ALPS1' in kw.keys(): header[27] = 'ALPST'.ljust(6) + str(1).rjust(5) + (
'%15.4E' % float(kw['ALPS1'])) # take this alpha in preliminary error analysis
if 'ALPS2' in kw.keys(): header[28] = 'ALPST'.ljust(6) + str(2).rjust(5) + (
'%15.4E' % float(kw['ALPS2'])) # take this alpha in final analysis and as choosen solution
if 'I18' in kw.keys(): header[29] = 'IUSER'.ljust(6) + str(18).rjust(5) + (
'%15.4E' % float(kw['I18'])) # formfactor average over 2 I18+1 points
if distribution[0] in edist or 'R21' in kw.keys(): header[22] = 'RUSER'.ljust(6) + str(21).rjust(5) + (
'%15.4E' % R21)
if distribution[0] in edist or 'R23' in kw.keys(): header[23] = 'RUSER'.ljust(6) + str(23).rjust(5) + (
'%15.4E' % R23)
if distribution[0] in edist or 'R22' in kw.keys(): header[21] = 'RUSER'.ljust(6) + str(22).rjust(5) + (
'%15.4E' % R22)
if 'IQUAD' in kw.keys(): header[30] = 'IQUAD'.ljust(6) + str().rjust(5) + (
'%15.4E' % float(kw['IQUAD'])) # quadrature default=3 Simpson 1 is direct; 2 is trapezoidal
if 'NORDER' in kw.keys(): header[30] = 'NORDER'.ljust(6) + str().rjust(5) + (
'%15.4E' % float(kw['NORDER'])) # order regularization; default 2
if 'PLEVEL' in kw.keys():
word = ('%5.2f' % float(kw['PLEVEL']))
header[31] = 'PLEVEL'.ljust(6)
header[32] = (word * 4) # 0.1<PLEVEL<0.9 best is not to use it, default =0.5
if 'NONNEG' in kw.keys():
header[33] = 'NONNEG'.ljust(6) + str().rjust(5) + (
'%15.4E' % float(kw['NONNEG'])) # no negative values in the solution default is 1;
else:
# default is 1 as a distribution has no negative values
header[33] = 'NONNEG'.ljust(6) + str().rjust(5) + ('%15.4E' % 1)
header[-2] = 'END'.ljust(26)
header[-1] = 'NY'.ljust(6) + str(0).rjust(5) # Number of datapoints is set per file later
# ende header für CONTIN als ASCII inputfile ##################################
# a default grid min max is needed if it is not given explicitly
# to transform tmin and tmax according to this function from kernel exp 1=t*R21*l**R22 -> l=(t*R21)**-1/R22
if distribution[0] == 'L':
transk = lambda t, n, l, a, T, v, R22, R21: 1 / t
elif distribution[0] == 'm':
transk = lambda t, n, l, a, T, v, R22, R21: (t * T * qq(n, l, a) ** 2) ** (-1 / R22)
elif distribution[0] == 'D':
transk = lambda t, n, l, a, T, v, R22, R21: (t * qq(n, l, a) ** 2) ** (-1)
elif distribution[0] == 'r':
transk = lambda t, n, l, a, T, v, R22, R21: (t * kb * T * qq(n, l, a) ** 2 / (0.06 * pi * v))
elif distribution[0] == 'd':
transk = lambda t, n, l, a, T, v, R22, R21: (t * kb * T * qq(n, l, a) ** 2 / (0.06 * pi * v))
elif distribution[0] == 'u' or distribution[0] == 'x':
transk = lambda t, n, l, a, T, v, R22, R21: (t * R21) ** (-1 / R22)
elif distribution[0] == 'T':
transk = lambda t, n, l, a, T, v, R22, R21: (t * R21) ** (-1 / R22)
###################################
# now look at the data and modify also the header if necessary
idata = 0
print('processing %i datasets ' % len(datalist))
for data in datalist:
idata += 1
print('evaluate Nr.:', idata)
try:
file = getattr(data, '@name')
except AttributeError:
file = time.strftime("%y%m%d%H%M%S", time.localtime())
header[0] = file
data.extract_comm(deletechars=':[]()"') # find extra parameters in comments
# extract datetime from comments from ALV instrument
try:
timestr = list(filter(lambda ll: ll.startswith('Time') or ll.startswith('Date'), data.comment))[0]
timestr = timestr.translate(None, '"').split()[2]
data.datetime = time.mktime(time.strptime(timestr, "%d.%m.%Y%H:%M:%S"))
except:
data.datetime=0
# take values from data
if 'n' not in kw:
try:
n = _getfirst(data.Refractive)
except AttributeError:
pass
if 'l' not in kw and distribution[0] != 'L' and 'R16' not in kw:
# noinspection PyBroadException
try:
l = _getfirst(data.Wavelength)
except AttributeError:
pass
if 'a' not in kw:
try:
a = _getfirst(data.Angle)
except AttributeError:
pass
if 'T' not in kw:
try:
T = _getfirst(data.Temperature)
except AttributeError:
pass
if 'v' not in kw:
try:
v = _getfirst(data.Viscosity)
except AttributeError:
v = _visc(T, **kw)
else:
v = _visc(T, **kw)
# or override it
if l != 0:
contin_wavevector = qq(n, l, a) # in 1/cm
else:
contin_wavevector = 0
# create header for contin with parameters from datafile
try:
if 'qtmin' in kw:
tmin = float(kw['qtmin']) / contin_wavevector ** 2
if 'qtmax' in kw:
tmax = float(kw['qtmax']) / contin_wavevector ** 2
except AttributeError:
raise Exception('dont use qtmin / qtmax with Laplace option. wavevector is zero')
itmin, itmax = data.X.searchsorted((tmin, tmax)) # searches where tmin,tmax fit into list and outputs the place
if tmin < 0:
itmin = -tmin # if negative purge first points
if tmax < 0:
itmax = tmax # same but if negative count from behind
try:
if 'gmin' not in kw and 'qgmin' not in kw:
gmin = transk(data.X[itmin], n, l, a, T, _visc(T, **kw), R22,
R21) # calculate min max of the grid if not given
elif 'qgmin' in kw:
gmin = float(kw['qgmin']) / contin_wavevector ** 2
else:
gmin = float(kw['gmin'])
if 'gmax' not in kw and 'qgmax' not in kw:
gmax = transk(data.X[itmax], n, l, a, T, _visc(T, **kw), R22, R21)
elif 'qgmax' in kw:
gmax = float(kw['qgmax']) / contin_wavevector ** 2
else:
gmax = float(kw['gmax'])
except ZeroDivisionError:
print('wavevector is zero; use qgmax/qgmin only with non laplace option')
header[2] = 'GMNMX'.ljust(6) + str(1).rjust(5) + ('%15.4E' % min(gmin, gmax)) # fit interval min
header[3] = 'GMNMX'.ljust(6) + str(2).rjust(5) + ('%15.4E' % max(gmin, gmax)) # fit interval max
if 'T' not in kw and 'R18' not in kw:
try:
T = _getfirst(data.Temperature)
except:
print('No temperature given, set to default 293.15K')
T = 273.15 + 20
header[19] = 'RUSER'.ljust(6) + str(18).rjust(5) + ('%15.4E' % T)
header[16] = 'RUSER'.ljust(6) + str(15).rjust(5) + ('%15.4E' % n)
header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % l)
header[18] = 'RUSER'.ljust(6) + str(17).rjust(5) + ('%15.4E' % a)
header[20] = 'RUSER'.ljust(6) + str(19).rjust(5) + ('%15.4E' % v)
# set the number of values
lenX = len(data.X[itmin:itmax])
header[-1] = 'NY'.ljust(6) + str(lenX).rjust(5)
# now write the header and the data to a input buffer
input = io.BytesIO()
input.writelines([b' ' + line + b'\n' for line in header if line != b''])
input.writelines([b' ' + (b'%8.5E' % line) + b'\n' for line in data.X[itmin:itmax]])
if 'test' in kw: # just as a test with simple exponential relax=valuefloat(kw['test'])
input.writelines(
[' ' + ('%8.5E' % line) + '\n' for line in 1e-3 + np.exp(-data.X[itmin:itmax] / float(kw['test']) * 2)])
else:
input.writelines([b' ' + (b'%8.5E' % line) + b'\n' for line in data.Y[itmin:itmax]])
# to check input
if 'write' in kw or 'w' in kw:
with open('./' + 'input.con', 'w') as f:
f.writelines(input.getvalue().decode('utf-8'))
# now run contin in a shell like environment with pipes
p = subprocess.run(continexe, input=input.getvalue(), capture_output=True)
input.close()
output = p.stdout.decode('utf-8')
error = p.stderr.decode('utf-8')
if p.returncode >0 or len(output) == 0:
if error != '':
for line in error.split('\n'):
print('contin_std_err>', line)
if len(output) == 0:
print('there was nothing in output yet')
return
# to check output
if 'write' in kw or 'w' in kw:
with open('./' + file + '.con', 'w') as f:
f.writelines(output)
# CONTIN finished, we look at the output ------------------------------------
# sort the output to the data
outblocks = output.split('UNREGULARIZED VARIABLES')[-1].split(file)
if len(outblocks) < 3:
print('last lines of CONTIN output')
print(outblocks[-1])
raise Exception('CONTIN ended with no result; use w=1 to get output for analysis')
# blocks of different alpha in result (without prefit results);
# first repeat input ; then preliminary res; last two choosen solution
# second last is fit ; last is result distribution
# take the fit block after Abscissa;splitlines and take only lenX;split line to words and convert to float
temp = np.r_[[[float(vv) for vv in line[:22].split()] for line in
outblocks[-2].split('ABSCISSA\n')[-1].splitlines()[:lenX]]].T
data.contin_result_fit = temp[[1, 0]] # resort to have xtime in 0 and fit_y in 1
data.contin_fits = []
for k in np.arange(len(outblocks))[1:-2]: # all solutions with different alpha
chosen = outblocks[k].splitlines()
# take the chosen block line 6 to 6+Ngrid;split into words and convert to float;
# sometimes D is used instead of E for float 1E5
# order of temp -> [y, error_y, t]
temp = np.r_[[[float(vv) for vv in line[:31].replace('D', 'E').split()] for line in chosen[6:Ngrid + 6]]].T
# fit quality in 3rd line of chosen last chosen block
try:
# hydrodynamic radius temp[2] is correlation time]
RR = Rh(temp[2] * timescale, contin_wavevector, T, v)
# intensity weight
ci_intw = temp[0]/temp[0].sum()
ci_intwerr = temp[1]/temp[0].sum()
# mass weighted contribution ci temp[0] is fraction
ci_massw = _massweight(temp[0], contin_wavevector, RR)
# number weighted contribution
ci_numw = _numberweight(temp[0], contin_wavevector, RR)
# the result output as [t, y, error_y, hydrodyn Radius, mass weight, number weight]
data.contin_fits.append(dA(np.c_[temp[2],ci_intw, ci_intwerr , RR, ci_massw, ci_numw].T))
except:
data.contin_fits.append(dA(np.c_[temp[[2, 0, 1]].T].T))
# fitquality from contin output
data.contin_fits[-1].fitquality = {}
name = [aa.lstrip() for aa in chosen[2].lstrip().split(' ')]
line = chosen[3].split()
if line[0] == '*':
value = [float(li) for li in line[1:]]
else:
value = [float(li) for li in line[:]]
for i in range(len(name)):
data.contin_fits[-1].fitquality[name[i]] = value[i]
# look at the peaks found at the end of solution block
lines = chosen[Ngrid + 6:]
linesl = []
data.contin_fits[-1].peaks = []
entireSol = -1
for i in range(len(lines)):
if lines[i][:5] == '0PEAK':
linesl.append(i) # first peak line number
if lines[i].strip()[:7] == 'MOMENTS':
entireSol = i # entire solution line number
# append if entire solution exists (does not for single peaks)
if entireSol != -1:
data.contin_fits[-1].momentEntireSolution = {}
for j in 1, 2, 3, 4, 5:
val = lines[entireSol + j][44:].replace('X (10**', '').replace(')', '').split()
data.contin_fits[-1].momentEntireSolution['mom_' + val[0]] = \
[_w2f(vv) for vv in [val[0], val[1] + 'E' + val[2]] + val[3:-1]]
data.contin_fits[-1].momentEntireSolution['std/mean'] = _w2f(lines[entireSol + 4][:45].split()[-1])
data.contin_fits[-1].baseline = [_w2f(vv) for vv in np.r_[lines[0][22:].replace('D', 'E').split()][[0, 2]]]
for i in linesl:
data.contin_fits[-1].peaks.append({}) # append a new peak
words = lines[i].split()
data.contin_fits[-1].peaks[-1]['nr'] = _w2f(words[1])
data.contin_fits[-1].peaks[-1]['minmax'] = [_w2f(words[4]), _w2f(words[6])]
data.contin_fits[-1].peaks[-1]['iminmax'] = [
abs(data.contin_fits[-1][0] - data.contin_fits[-1].peaks[-1]['minmax'][0]).argmin(),
abs(data.contin_fits[-1][0] - data.contin_fits[-1].peaks[-1]['minmax'][1]).argmin()]
for j in 1, 2, 3, 4, 5:
val = lines[i + j][44:].replace('X (10**', '').replace(')', '').split()
data.contin_fits[-1].peaks[-1]['mom_' + val[0]] = [_w2f(vv) for vv in
[val[0], val[1] + 'E' + val[2]] + val[3:-1]]
data.contin_fits[-1].peaks[-1]['std/mean'] = _w2f(lines[i + 4][:45].split()[-1])
data.contin_fits[-1].peaks[-1]['weight'] = data.contin_fits[-1].peaks[-1]['mom_0'][1]
data.contin_fits[-1].peaks[-1]['mean'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
data.contin_fits[-1].peaks[-1]['imean'] = data.contin_fits[-1][0].searchsorted(
data.contin_fits[-1].peaks[-1]['mean'])
data.contin_fits[-1].peaks[-1]['mean_errproz'] = data.contin_fits[-1].peaks[-1]['mom_1'][4]
data.contin_fits[-1].peaks[-1]['mean_err'] = data.contin_fits[-1].peaks[-1]['mom_1'][4] / 100. * \
data.contin_fits[-1].peaks[-1]['mom_1'][3]
# standard deviation is std=sqrt(mom2/mom1- mean**2)
data.contin_fits[-1].peaks[-1]['std'] = abs(
data.contin_fits[-1].peaks[-1]['mom_2'][1] / data.contin_fits[-1].peaks[-1]['mom_0'][1] -
data.contin_fits[-1].peaks[-1]['mom_1'][3] ** 2) ** 0.5
data.contin_fits[-1].peaks[-1]['mean_num'] = data.contin_fits[-1].peaks[-1]['mom_0'][3]
if distribution[0] == 'm': # mass fraction mol weight see provencher
data.contin_fits[-1].peaks[-1]['mean_mass'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
data.contin_fits[-1].peaks[-1]['mean_z'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
data.contin_fits[-1].peaks[-1]['mean_z+1'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
# now the chosen solution from CONTIN, which is the last given (repeated)
chosen = outblocks[-1].splitlines()
data.contin_alpha = float(chosen[3].split()[0]) # this is the choosen solution alpha
data.contin_alphalist = [f.fitquality['ALPHA'] for f in data.contin_fits] #
data.contin_bestFit = data.contin_fits[
data.contin_alphalist.index(data.contin_alpha)] # this is the choosen solution
data.contin_bestFit.imaximum = data.contin_bestFit[0].searchsorted(data.contin_bestFit[1].max())
data.contin_bestFit.columnname = \
'relaxationtimes; intensityweightci; errors; hydrodynamicradii; massweightci; numberweightci'
# a short info about the peaks
ipeaks = np.c_[[[p['weight'],
p['mean'],
p['std'],
p['mean_err'],
p['imean'],
(1 / (contin_wavevector ** 2 * p['mean'] * timescale) if contin_wavevector != 0 else 0),
Rh(p['mean'] * timescale, contin_wavevector, T, v),
contin_wavevector,
(1e5 / (contin_wavevector ** 2 * p['mean'] * timescale) if contin_wavevector != 0 else 0),
Rh(p['mean'] * timescale, contin_wavevector, T, v) * 1e7,
contin_wavevector * 1e-7]
for p in data.contin_bestFit.peaks]]
# sort for largest weight but take only npeak strongest
sequence = ipeaks[:, 0].argsort()[::-1] # [:3]
ipeaks = ipeaks[sequence, :]
# and sort the remaining for peak position in wavevector contin_wavevector
sequence = ipeaks[:, 5].argsort()
data.contin_bestFit.ipeaks = ipeaks[sequence, :]
data.contin_bestFit.ipeaks_name = ['weight',
'mean',
'std',
'mean_err',
'imean',
'mean_Deff',
'Rh',
'wavevector',
'mean_Deff_nmns',
'Rh_nm',
'wavevector_nm',
]
# ipeaks = np.c_[[[p['weight'],
# p['mean'],
# p['std'],
# p['mean_err'],
# p['imean']]
# for p in data.contin_bestFit.peaks]]
# # sort for largest weight but take only npeak strongest
# sequence = ipeaks[:, 0].argsort()[::-1][:3]
# ipeaks = ipeaks[sequence, :]
# data.contin_bestFit.ipeaks = ipeaks
# data.contin_bestFit.ipeaks_name = ['weight',
# 'mean',
# 'std',
# 'mean_err',
# 'imean']
try:
# calculate the mass weight properties
temp = []
for p in data.contin_bestFit.peaks:
imin = max(0, p['iminmax'][0] - 1)
imax = min(len(data.contin_bestFit[0]), p['iminmax'][1] + 1)
weight = np.trapz(data.contin_bestFit[4, imin:imax], data.contin_bestFit[0, imin:imax])
mmean = np.trapz(data.contin_bestFit[4, imin:imax] * data.contin_bestFit[0, imin:imax],
data.contin_bestFit[0, imin:imax]) / weight
std = np.sqrt(np.trapz((data.contin_bestFit[4, imin:imax] * data.contin_bestFit[0, imin:imax] ** 2),
data.contin_bestFit[0, imin:imax]) / weight - mmean ** 2)
temp.append([weight,
mmean,
std,
p['mean_err'],
min(data.contin_bestFit[0].searchsorted(mmean), len(data.contin_bestFit[0])),
(1 / (contin_wavevector ** 2 * mmean * timescale) if contin_wavevector != 0 else 0),
Rh(mmean * timescale, contin_wavevector, T, v),
contin_wavevector,
(1e5 / (contin_wavevector ** 2 * mmean * timescale) if contin_wavevector != 0 else 0),
Rh(mmean * timescale, contin_wavevector, T, v) * 1e7,
contin_wavevector * 1e-7])
mpeaks = np.c_[temp]
mpeaks[:, 0] = mpeaks[:, 0] / mpeaks[:, 0].sum()
mpeaks = mpeaks[sequence, :]
data.contin_bestFit.mpeaks = mpeaks
# calculate the number weight properties
temp = []
for p in data.contin_bestFit.peaks:
imin = max(0, p['iminmax'][0] - 1)
imax = min(len(data.contin_bestFit[0]), p['iminmax'][1] + 1)
weight = np.trapz(data.contin_bestFit[5, imin:imax], data.contin_bestFit[0, imin:imax])
nmean = np.trapz(data.contin_bestFit[5, imin:imax] * data.contin_bestFit[0, imin:imax],
data.contin_bestFit[0, imin:imax]) / weight
std = np.sqrt(np.trapz((data.contin_bestFit[5, imin:imax] * data.contin_bestFit[0, imin:imax] ** 2),
data.contin_bestFit[0, imin:imax]) / weight - nmean ** 2)
temp.append([weight,
nmean,
std,
p['mean_err'],
min(data.contin_bestFit[0].searchsorted(nmean), len(data.contin_bestFit[0])),
(1 / (contin_wavevector ** 2 * nmean * timescale) if contin_wavevector != 0 else 0),
Rh(nmean * timescale, contin_wavevector, T, v),
contin_wavevector,
(1e5 / (contin_wavevector ** 2 * nmean * timescale) if contin_wavevector != 0 else 0),
Rh(nmean * timescale, contin_wavevector, T, v) * 1e7,
contin_wavevector * 1e-7])
npeaks = np.c_[temp]
npeaks[:, 0] = npeaks[:, 0] / npeaks[:, 0].sum()
npeaks = npeaks[sequence, :]
data.contin_bestFit.npeaks = npeaks
except:
pass
return datalist
# noinspection PyIncorrectDocstring
[docs]
def contin_display(result_list, select=None, npeak=2, *args, **kw):
"""
A routine to plot CONTIN results to get an overview over CONTIN output.
Parameters
----------
result_list : dataList
Output of dls.contin.
select : list of int
Sequence of integers in result_list to select for output.
npeak : int
Number of peaks in output default 2.
dlogy :
shows distribution in y logscale
Notes
-----
| access diffusion of all first peaks by output[:,1,6]
| mean and std as
| output[:,1,6].mean()
| output[:,1,6].std()
"""
if select is None:
select = []
p = GracePlot()
p.multi(1, 3)
p1 = p.g[0]
p1.subtitle('correlation')
p2 = p.g[1]
if 'mass' in kw.keys():
p2title = 'mass weighted \\ndistribution'
elif 'number' in kw.keys():
p2title = 'number weighted \\ndistribution'
else: # intensity
p2title = 'intensity weighted \\ndistribution'
p2.subtitle(p2title)
p3 = p.g[2]
p3.subtitle('relaxation times')
if isinstance(select, numbers.Integral):
select = [select]
if not select:
select = range(len(result_list))
for i in select:
data = result_list[i]
p1.plot(data[0], data[1], line=0, symbol=-1)
p1.plot(data.contin_result_fit[0], data.contin_result_fit[1], line=-1, symbol=0)
p1.yaxis(min=0.0001, scale='n')
p1.xaxis(min=0.0001, scale='l')
p1.ylabel('g1(t)')
p1.xlabel(r't')
if 'mass' in args:
YY = data.contin_bestFit[4] / data.contin_bestFit[4].max()
peaks = data.contin_bestFit.mpeaks
elif 'number' in args:
YY = data.contin_bestFit[5] / data.contin_bestFit[5].max()
peaks = data.contin_bestFit.npeaks
else: # intensity
YY = data.contin_bestFit[1] / data.contin_bestFit[1].max()
peaks = data.contin_bestFit.ipeaks
if 'dlogy' in args:
p2.plot(data.contin_bestFit[0], YY,
legend='%.3g a=%.3g PROB1 %.3g' % (data.contin_alpha,
data.contin_bestFit.fitquality['ALPHA'],
data.contin_bestFit.fitquality['PROB1 TO REJECT']),
symbol=-1)
p2.xaxis(min=0.0001, scale='l')
p2.yaxis(min=0.0001, scale='l')
else:
# try:
p2.plot(data.contin_bestFit[0], YY,
legend='%.3g a=%.3g PROB1 %.3g' % (data.contin_alpha,
data.contin_bestFit.fitquality['ALPHA'],
data.contin_bestFit.fitquality['PROB1 TO REJECT']),
symbol=-1)
p2.xaxis(min=0.0001, scale='l')
# except: pass
p2.ylabel('distr(t)')
p2.xlabel(r't')
p3.ylabel(r'peak\smean\N ')
p3.xlabel(r'q / nm\S-1')
for pp in peaks[:npeak]:
p3.plot([pp[10]], [pp[1]], [pp[3] / pp[1]], symbol=-1, line=0)
return
# readmode
_mode = 'r'
# noinspection PyBroadException
[docs]
def readZetasizerNano(filename, NANOcoding="cp1257", NANOlocale=None):
"""
Read ascii file data exported from Malvern Zetasizer Nano.
Format of Zetasizer is one measurement per line with content defined in export macro.
First line gives names of columns as header line, so header line in NANO export is necessary.
Lists as Sizes, Volumes... need to be separated by "sample name" because of different length.
Parameters
----------
filename : string
Zetasizer filename
NANOcoding : string
UTF coding from Windows
NANOlocale : list of string
| encoding of weekday names in NAno textfile
| ['de_DE','UTF-8'] NANO set to German
| ['en_US','UTF-8'] NANO set to US
| ...
Returns
-------
dataList with dataArray for each measurement
Dataarray contains :
- correlation data [CorrelationDelayTime; g2minus1; g1] (if present otherwise empty dataArray)
- .distributions : Distribution fits with columns
[RelaxationTimes, Sizes, Intensities, Volumes, Diffusions] (if present)
- fit results of correlation function in attributes (some channels are discarded -> see Zetasizer settings)
- .distributionFitDelayTimes timechannels
- .distributionFitData measured points
- .distributionFit fit result
- Check .attr for more attributes like
SampleName, MeanCountRate, SizePeak1mean, SizePeak1width, Temperature
If no correlation or distributions are present the data are zero and only parameters are stored.
No unit conversion is done.
CorrelationData.Y contains 1-g2=g1**2 with 0 for negative 1-g2.
Notes
-----
The template for export in Zetasizer has some requirements:
- The header line should be included.
- Separator is the tab.
- First entries should be "Serial Number" "Type" "Sample Name" "Measurement Date and Time"
- Sequence data as "Sizes", "Intensities" should be separated by "Sample Name" as separator.
- Appending to same file is allowed if new header line is included.
Examples
--------
::
import jscatter as js
import numpy as np
# read example data
alldls = js.dls.readZetasizerNano(js.examples.datapath + '/dlsPolymerSolution.txt', NANOlocale=['de_DE', 'UTF-8'])
one=alldls[5]
p=js.grace()
p.multi(2,1,vgap=0.2)
p[0].plot(one,legend='correlation data (all)')
p[0].plot(one.distributions.X, one.distributions.Y / 10 , li=1, legend='intensity weighted distribution (x0.1)')
p[0].plot(one.DistributionFitDelayTimes,one.DistributionFitData,sy=[1,0.2,4],le='fitted data')
p[0].plot(one.DistributionFitDelayTimes,one.DistributionFit,sy=0,li=[1,2,5])
p[1].plot(one.distributions[1], one.distributions[2] , li=1, legend='radius distribution intensity weighted')
p[1].plot(one.distributions._Sizes, one.distributions._Volumes, li=1, legend='radius distribution volume weighted')
p[0].xaxis(scale='l', label=r'correlation time / µs ', tick=[10, 9])
p[1].xaxis(scale='l', label=r'radius / nm ')
p[0].yaxis(label=r'g1 / probability ')
p[1].yaxis(label=r'probability ')
p[0].legend(x=1000,y=1.9)
p[1].legend(x=50,y=30)
p[0].title('Zetasizer data with distribution fit')
p[0].subtitle('Results from instrument software')
# p.save(js.examples.imagepath+'/Zetasizerexample.jpg',format='jpeg',size=[600/300,600/300]) #as jpg file
.. image:: ../../examples/images/Zetasizerexample.jpg
:width: 50 %
:align: center
:alt: Graceexample
"""
if NANOlocale is None:
NANOlocale = ['en_US', 'UTF-8']
seqcolumnNames = ['CorrelationDelayTimes', 'CorrelationData',
'RelaxationTimes', 'Sizes', 'Intensities', 'Volumes', 'Diffusions',
'DistributionFitDelayTimes', 'DistributionFitData', 'DistributionFit']
# DSL data are writen on windows computer with specific encoding
# read it
# locale.resetlocale()
# get locale
old_loc = locale.getlocale(locale.LC_TIME)
# set it to german or english us dependent on DLS Computer language
locale.setlocale(locale.LC_TIME, NANOlocale)
#
with codecs.open(filename, _mode, NANOcoding) as f:
zeilen = f.readlines()
# creat lection for data formats
daynames = [locale.nl_langinfo(dd)for dd in [locale.DAY_1,locale.DAY_2,locale.DAY_3,locale.DAY_4,locale.DAY_5,
locale.DAY_6,locale.DAY_7]]
monthnames = [locale.nl_langinfo(dd)for dd in [locale.MON_1,locale.MON_2,locale.MON_3,locale.MON_4,locale.MON_5,
locale.MON_6,locale.MON_7,locale.MON_8,locale.MON_9,locale.MON_10,
locale.MON_11,locale.MON_12]]
selectfmt = {'%A': lambda a: a in daynames,
'%B': lambda a: a in monthnames,
'%d': lambda a: a in [str(i) for i in range(1, 32)],
'%Y': lambda a: a.isdigit() and int(a) > 1900,
'%H:%M:%S': lambda a: ':' in a}
output = dL()
# scan each line
for izeile, zeile in enumerate(zeilen):
# encode in utf8 for correct use in datatime and split line
worte = zeile.rstrip().replace(',', '.').split('\t')
para = {} # single parameters
data = {} # sequence of data
next = 0
# detect a header line
if any(x in worte[:4] for x in ['Serial Number', 'Record', 'Type', 'Sample Name']):
headerline = worte
header = collections.OrderedDict() # the key order is preserved in OrderedDict
for i, field in enumerate(headerline):
# if multiple entries as sequence it can be identified by '['
key = field.split('[')[0].split('(')[0]
if key not in header.keys():
header[''.join(key.split())] = [(i, field, key)]
else:
header[''.join(key.split())] += [(i, field, key)]
continue
if 'Trend' in worte[:4]:
continue # skip Trend lines
if 'Aggregation Point' in worte[:4]:
continue # skip line
for key in header.keys():
if '[' in header[key][0][1] \
or key in seqcolumnNames: # belongs to a sequence of data as distribution fit
start = next
try:
# try to find index of next "SampleName" as separator
next = worte.index(para['SampleName'][0], start)
except (ValueError, KeyError):
print('in line ', izeile, '', key,
' is missing a "sample Name" after the sequence. Check your export template')
continue
data[''.join(key.split())] = worte[start:next]
next += 1
else:
try:
# not a sequence; is only a parameter
if ''.join(key.split()) in para.keys(): # already there
para[''.join(key.split())] += worte[next]
else: # new one
para[''.join(key.split())] = [worte[next]]
except:
print('in line ', izeile, '', key,
' is missing parameter value. Check your export template')
continue
next += 1
if data == {}:
print('in line ', izeile, ' is missing data at all. Check your export template')
continue
# append to output as new dataArray
names = [r'RelaxationTimes', r'Sizes', r'Intensities', r'Numbers', r'Volumes', r'Diffusions']
columnnames = list(set(names).intersection(set(data.keys())))
columnnames = [name for name in names if name in columnnames]
if 'CorrelationDelayTimes' in data.keys() and len(data['CorrelationData']) > 0:
# we save here g1 instead of 1-g2 => take root later to prevent negative roots
cor = np.array(data['CorrelationData'], dtype=float)
output.append(dA(np.c_[np.array(data['CorrelationDelayTimes'], dtype=float),
np.sign(cor) * np.sqrt(np.abs(cor)), cor].T,
XYeYeX=[0, 1]))
del data['CorrelationDelayTimes']
del data['CorrelationData']
output[-1].columnname = 'CorrelationDelayTime; g2minus1; g1'
elif len(columnnames) > 0:
output.append(dA(np.c_[[np.array(data[name], dtype=float) for name in columnnames]], XYeYeX=[0, 1]))
output[-1].columnname = ''.join(['%s; ' % name for name in columnnames])
for coln in names[2:]:
if coln in columnnames:
output[-1].setColumnIndex(ix=0, iy=columnnames.index(coln), iey=None)
break
for name in columnnames:
del data[name]
else: # only parameters
output.append(dA(np.zeros(3), XYeYeX=[0, 1]))
output[-1].comment += ['only parameters in file']
# append parameters to result
for key, item in para.items():
if key == 'MeasurementDateandTime':
# make a computational time
timestring = ' '.join(item[0].replace(',', '').replace('.', '').split('\t'))
# guess format string , something like '%A %d %B %Y %H:%M:%S'
fmt = ''
for dkeys in timestring.split(' '):
for fk, fv in selectfmt.items():
if fv(dkeys):
fmt += fk + ' '
continue
fmt = fmt.rstrip()
tt = time.mktime(time.strptime(timestring, fmt))
setattr(output[-1], key.split('(')[0], time.ctime(tt))
setattr(output[-1], 'MeasurementTimeSeconds', tt)
elif key.split('(')[0] == u'T':
setattr(output[-1], 'Temperature', np.array(item[0], dtype=float))
else:
try:
setattr(output[-1], key.split('(')[0], np.array(item[0], dtype=float))
except:
setattr(output[-1], key.split('(')[0], item[0])
try:
# append distribution if not already above saved
distributions = dA(np.c_[[np.array(data[name], dtype=float) for name in columnnames]], XYeYeX=[0, 1])
distributions.columnname = ''.join(['%s; ' % name for name in columnnames])
for coln in names[2:]:
if coln in columnnames:
distributions.setColumnIndex(ix=0, iy=columnnames.index(coln), iey=None)
break
for name in columnnames:
del data[name]
output[-1].distributions = distributions
except:
pass
try:
distributions = []
columnnames = ''
for dist in ['DistributionFitDelayTimes', 'DistributionFitData', 'DistributionFit']:
try:
distributions.append(getattr(output[-1], dist))
columnnames += dist + ';'
delattr(output[-1], dist)
except:
pass
output[-1].distributionFit = dA(np.r_[distributions].squeeze())
output[-1].distributionFit.columnames = columnnames
output[-1].distributionFit.setColumnIndex(iey=None)
output[-1].distributionFit.comment = ['this is g1']
except:
pass
# append all remaining data to parameters ------------------------------------------------------
for key in data.keys():
try:
setattr(output[-1], key, np.array(data[key], dtype=float))
except:
pass
output[-1].filename = filename
# reset locale
locale.setlocale(locale.LC_TIME, old_loc)
return output