Source code for jscatter.dls

# -*- coding: utf-8 -*-
# written by Ralf Biehl at the Forschungszentrum Jülich ,
# Jülich Center for Neutron Science 1 and Institute of Complex Systems 1
#    Jscatter is a program to read, analyse and plot data
#    Copyright (C) 2015-2024  Ralf Biehl
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

"""
Functions to read DLS data, apply the CONTIN analysis and plot the CONTIN results.




"""

import io
import codecs
import collections
import locale
import numbers
import os
import sys
import time

import shutil
import subprocess
from math import pi

import numpy as np

from . import formel
from . import GracePlot
from .dataarray import dataArray as dA
from .dataarray import dataList as dL

# As CONTIN uses cgs units we use these here too

kb = 1.3806505e-16  # cm2 g s-2 K-1 = erg/K =1e6 J/K

path = os.path.realpath(os.path.dirname(__file__))

# find continexe
continexe = shutil.which('contin')


def _getfirst(a):
    """ this allows to have float or list of float (take first)"""
    try:
        return a[0]
    except:
        return a


def _w2f(word):
    """converts if possible to float"""
    try:
        return float(word)
    except ValueError:
        return word


def _visc(T, **kw):
    # T in degrees
    # a viscosity calculation based on temperature
    # calc viscosity  in cPoise=1e3*[Pa*s]
    if 'v' in kw and kw['v'][0] == 'd':
        v = 1e3 * formel.viscosity(mat='d2o', T=T)  # heavy water
    elif 'v' in kw and kw['v'][0] == 'h':
        v = 1e3 * formel.viscosity(mat='h2o', T=T)
    elif 'v' in kw:
        v = kw['v']
    else:
        v = 1e3 * formel.viscosity(mat='h2o', T=T)  # default to normal water
    return v  # in cPoise


def _massweight(ci, qq, Ri):
    """
    Transform intensity weight to mass/volume weight

    """
    mw = ci * Ri ** 3 * qq ** 6 / (np.sin(qq * Ri) - qq * Ri * np.cos(qq * Ri)) ** 2
    # norm = np.trapz(mw, Ri)
    norm = mw.sum()
    return mw/norm


def _numberweight(ci, qq, Ri):
    # transform intensity weight to number weight
    nw = ci * qq ** 6 / (np.sin(qq * Ri) - qq * Ri * np.cos(qq * Ri)) ** 2
    # norm = np.trapz(nw, Ri)
    norm = nw.sum()
    return nw/norm



[docs]
def contin(datalist, Ngrid=256, tmin=-2, tmax=-2, bgr=0, distribution='dls', RDG=-1, timescale=1e-6, **kw):
    r"""
    Inverse Laplace transform known as CONTIN analysis developed by Steven Provencher [1,2,4]_ .
    
    This is a wrapper for the original FORTRAN program from Steven Provencher.
    The CONTIN analysis is developed for heterodisperse, polydisperse, and multimodal systems that cannot
    be resolved with the cumulant method. The resolution for separating two different particle 
    populations is approximately a factor of five or higher and the difference in relative 
    intensities between two different populations should be less than 1:10−5. (from Wikipedia and own experience).
    Iuser[i] and Rxx are defined in the original Fortran code.

    Parameters
    ----------
    datalist : dataList or single dataArray
        Correlation data as measured e.g. by DLS.
        Check type parameter dependent on input data g1 or g2!
        Each dataArray is processed.
    timescale : float, default 1e-6
        Timescale factor. CONTIN uses internal units of seconds.
        As typical instruments use microseconds the time axis needs to be scaled by this factor.
    tmin,tmax : float,int
        First/last time value to use in fit.
        If tmin/tmax are negative the number of points are dropped at begin/end of input data.
        e.g. min=-5 drops the 5 shortest time points; for max the 5 last points
    Ngrid : int
        Number of grid points between gmin and gmax.
    gmin/gmax : float
        First/last time value in distribution X scale.
        If not given calculated from tmin/tmax according to used kernel.
    qgmin/qgmax : float
        Only for 'relax'.  Same as gmin,gmax except that gmin=qgmin/q^2.
    qtmin/qtmax : float
        q^2*t scaled times.
    bgr : float; default 0
        If >0: assume background and fit it.
    typ :  int;  default 0
         - 0:  input is g1 (field correlation)
         - 1:  input is intensity correlation g2 as measured by DLS; g1= sign(g2/R21-1)*abs(g2/R21-1)^0.5
         - -1: input is g2-1, takes only the square root; sign preserved as for 1.
               ALV and Malvern Zetasizer save g2-1
    distribution : string, default='x' for DLS,
        Selects distribution type (Iuser(10)) as {'m':1,'D':2,'L':2,'r':3,'u':4,'x':4,'d':4,'T':4}
        using RUSER(15,16,17,19).

        The kernel  :math:`g_1=l^{R23}*exp(-R21*t*l^{R22})` describes always the field correlation.
         - 'x' intensity weight relaxation times distribution l=T -> exp(-t/T)
               R21=1, R22=-1, R23=0
         - 'molweight'='mass'  weight fraction molecular weight (mw) distribution l=mw -> mw*exp(-q**2*R18**t*mw**R22)
            - R18 to be set in input: relates diffusion coefficient to mol weight
            - R22 to be set in input: exponent e.g.for polymers IN THETA solvent Diffusion=R18*l^0.5
            - R21 = R20**2*R18=q**2*R18
            - R23=1
            - l=mw and field(0)~ mw for weight fraction (intensity ~ mw**2)
         - 'Diff' intensity weight diffusion coefficient distribution with l=Diff -> exp(-q**2*t*Diff);
            - R23 =0
            - R22 =1
            - R21 =R20^2=q^2
            - l is therefore Diffusion coefficient
         - 'Laplace'    Laplace transform  with exp(-t*Gamma)
            - R23=0
            - R22=1
            - sets R16=0 (wavelength) => R20=undefined and R21=1
            - l is therefore relaxation rate with transform exp(-t*l)
         - 'radius'  weight fraction radius distribution of spheres satisfying stokes law with
            - l=rh -> rh**3*exp(-q**2* [k_B*T*/(0.06*pi*visc*rh)] * t)
            - R23=3
            - R22=-1
            - R21=k_B*R18*R20^2/(0.06pi*R19)
            - and l=rh is a the hydrodynamic radius
            - weight fraction as field(0) ~ V ~ rh**3 for spheres
         - 'T1' intensity weight relaxation times distribution l=T1 -> T1*exp(-t/T1)
            -   for nmr rotational diffusion T1 with R23=-1 R21=1, R22=-1 neglecting T2
            -   intensity ~ number of contributing atoms ~M ~ V ~ Rh**3
            -   rot correlation time tc=(4pi*visc/kT)*Rh**3 -> intensity(0) ~ tc -> R23=1
         - 'dls' intensity weight radius distribution of spheres satisfying stokes law
            -    with l=rh -> exp(-q**2* [k_B*T*/(0.06*pi*visc*rh)] * t)
            - R21 = k_B*T*/(0.06*pi*visc)
            - R22=-1, R23=0
         - 'user'       general case RUSER(15,16,17,19,21,22,23) all user specified
                         e.g.R16=0,R23=0,R22=-1,R21=0 is laplace with Gamma^-1
    RDG :  1,-1, default = -1   
        If 1 use Rayleigh Debye Gans formfactor with wall thickness WALL [cm] of hollow sphere (default WALL =0)
        distribution needs to be the 'radius' distribution.
    l : float , R16 overrides value in dataArray if given
        Overrides wavelength in data and set it to 632 nm
        (except for Laplace and relaxation then l=0 overrides this because no wavelength is used).
    n : float, R15, overrides value in dataArray if given
        Refractive index of solvent, default 1.333.
    a : float, R17, overrides value in dataArray if given
        Scattering angle in degrees , default 90°.
    T : float, R18, overrides value in dataArray if given
        Absolute Temperature in K or factor ; default is 293K
    WALL : float, R24, overrides value in dataArray if given
        Wall thickness  in cm for simulation of hollow spheres.
         - =0 normal sphere as default
         - If WALL<0 -> WALL=0
    v : float, R19, overrides value in dataArray if given
        Viscosity in centipoise; 1cPoise=1e-3 Pa*s
         - If v='d2o' or 'h2o' the viscosity of d2o or h2o at the given Temperature T is used
         - default is h2o at T
    write : any 
        If write is given as keyword argument (write=1) the contin output is writen to a file with '.con' appended.

    Returns
    -------
        dataList : input with best solution added as attributes prepended by 'contin_'
         - .contin_bestFit :              **best fit result**
         - .contin_result_fit :           fitted correlation function
         - .contin_fits :                 sequence of "not best fits" in CONTIN with same structure as best Fit
         - .contin_alpha :                ALPHA of best solution
         - .contin_alphalist :            list of ALPHA in fits

        bestFit dataArray with best solution distribution content
         - .contin_bestFit contains
              [*relaxationtimes, intensityweightci, errors, hydrodynamicradii, massweightci, numberweightci*]

              The columns can be adressed with underscore as e.g. *.contin_bestFit._hydrodynamicradii*.
              See second example.

           Weighted ci are normalized that the *sum* over all points is equal 1.
           The corresponding probability is the probability of the interval around the corresponding
           relaxation time or Rh. Remember that because of the log scale the interval change width.

         - .contin_bestFit.attr()      shows all available parameters from CONTIN fit and other parameters
         - .contin_bestFit.fitquality  measure for the fit quality ALPHA and ...

        Peak results from CONTIN :

         - .contin_bestFit.peaks       **intensity weight relaxation time distribution** as dict, primary result
         - .contin_bestFit.ipeaks      **intensity weight relaxation time distribution** for all peaks with :
            - [weight :                peak weight (CONTIN output)
            -   mean  :                peak mean  (CONTIN output)
            -   std   :                peak standard deviation  (CONTIN output)
            -   mean_err :             error of mean  (CONTIN output)
            -   imean :                index of mean
            -   1/(q**2*mean) :        diffusion constant in cm**2/s determined from mean
            -   Rh  :                  hydrodynamic radius in cm determined from mean
            -   q   :                  wavevector in 1/cm
            -   1/(q**2*mean) :        diffusion constant in nm**2/ns
            -   Rh :                   hydrodynamic radius in nm  determined from mean
            -   q] :                   wavevector in 1/nm

         - .contin_bestFit.mpeaks :     **peaks mass weight** same content as ipeaks for 2 strongest
         - .contin_bestFit.npeaks :     **peaks number weight** same content as ipeaks for 2 strongest
         - .contin_bestFit.ipeaks_name : content of ipeaks
         - .contin_bestFit.info :
         - .contin_bestFit.baseline :  baseline + error
         - .contin_bestFit.momentEntireSolution :  all peaks together
         - .contin_bestFit.maximum :   maximum value
         - .contin_bestFit.filename :
         - .contin_bestFit.imaximum :  index maximum value
     
    Notes
    -----
    CONTIN  source and executable can be downloaded from http://s-provencher.com/pages/contin.shtml.

    For using CONTIN and the wrapper you need a working CONTIN executable in your path.
    ::

     # Download the FORTRAN source code from his web page in a browser or
     wget http://s-provencher.com/pub/contin/contin.for.gz
     gunzip contin.for.gz
     # compile with gfortran
     gfortran contin.for -o contin  -fallow-argument-mismatch -std=legacy

     # move to a place in your PATH e.g. $HOME/local/bin
     mv contin $HOME/local/bin/contin

     # check if it is executable, open a new shell
     which contin
     # if not found check if the path is in your PATH variable and set it in .bashrc or .profile
     # if still not found may be its not executable; so make it
     chmod u+x $HOME/local/bin/contin

    If Peaks seem small please integrate them as the peak area determines the contributed intensity (see last example).
    Peak area is strongly dependent on the peak point separation dxi = (x[i]-x[i-1]) in particular on a log scale.
    This weight is in some plots added to the probability as Pi*dxi resulting in equal height.
    See [5]_ for details.
    ::

     ff=result.contin_bestFit
     p1=(10<ff.X) &(ff.X<100) # peak borders 10 and 100
     fp1=np.trapz(ff.Y[p1],ff.X[p1])/np.trapz(ff.Y,ff.X) # fraction of full signal

    **An example dataset with noise**
    In this synthetic example the noise causes a peak width of about 20% (std/mean) as it is
    typically found for monodisperse samples measured short times (e.g. proteins on a Zetasizer in automatic mode).
    This contribution is not caused by polydispersity and always present dependent on noise level.
    Noise level can be reduced using longer measurement times, thus reducing the width.
    In real measurements one needs to test with different measurement times if polydispersity is present
    (thus independent on noise level).

    - D = 0.05 nm²/ns corresponds to a hydrodynamic radius of 4.4 nm and results in a
      relaxation time :math:`\Gamma= 1/(q^2D) \approx 58 \mu s`.
    - The below plots shows the CONTIN result as correlation time distribution
      with mean 58 µs ±12 µs and peak weight 95% in good approximation.
    - The second plot shows the radius distribution for different weights.
      Probabilities are for an interval around a point x value that the sum of all points is normalized.
      (not the integral on the log scale).
    - For real measurements one may cut noisy short times or dust contaminated  long times.

    ::

     import jscatter as js
     import numpy as np
     t=js.loglist(1,10000,1000)   #times in microseconds
     q=4*np.pi*1.333/632*np.sin(np.deg2rad(90)/2) # 90 degrees for 632 nm , unit is 1/nm**2
     D=0.05*1000  # nm**2/ns * 1000 = units nm**2/microseconds
     noise=0.0001  # typical < 1e-3
     data=js.dA(np.c_[t,0.95*np.exp(-q**2*D*t)+noise*np.random.randn(len(t))].T)
     # add attributes to overwrite defaults
     data.Angle      =90    # scattering angle in degrees
     data.Temperature=293   # Temperature of measurement  in K
     data.Viscosity  =1     # viscosity cPoise
     data.Refractive =1.333 # refractive index
     data.Wavelength =632   # wavelength
     # do CONTIN
     dr=js.dls.contin(data,distribution='x')
     js.dls.contin_display(dr)         # display overview

     # same to demonstrate access to distributions
     bf =dr[0].contin_bestFit
     p=js.grace(1.,1.)
     p.multi(2,1,vgap=0.25)

     # access correlation function and relaxation time distribution
     p[0].plot(data,sy=[1,1,1],legend='simulated data')
     p[0].plot(dr[0],li=[1,3,3],sy=0,le='contin result')
     p[0].plot(bf.X,bf.Y*10,sy=[2,0.5,2],le='relaxation time distribution')
     p[0].xaxis(scale='log',label=r'\xG\f{} / µs',min=1,max=1e5)
     p[0].yaxis(label=r'G\s1\N / P(\xG\f{})',min=0,max=1.1)
     p[0].legend(x=300,y=0.7)

     # Hydrodynamic radius distribution in different weights
     p[1].plot(bf[3],bf[1],sy=[2,0.5,2],li=[1,1,''],le='intensity weight')
     p[1].plot(bf[3],bf[4],sy=[2,0.5,3],li=[1,1,''],le='mass weight')
     p[1].plot(bf[3],bf[5],sy=[2,0.5,4],li=[1,1,''],le='number weight')
     p[1].xaxis(scale='log',label=r'R\sh\N / cm',min=1e-7,max=1e-5)
     p[1].yaxis(label=r'P(R\sh\N)',min=0,max=0.1)
     p[1].legend(x=1.5e-6,y=0.1)

     # Rh of first peak
     p[1].text(f'Rh from contin {bf.ipeaks[0,6]:.2g} cm',x=1.3e-6,y=0.04)
     # p.save(js.examples.imagepath+'/contin.jpg')

     # access peak values by
     dr[0].contin_bestFit.ipeaks_name
     dr[0].contin_bestFit.ipeaks
     # calc std/mean of first peak
     dr[0].contin_bestFit.ipeaks[0,2]/dr[0].contin_bestFit.ipeaks[0,1]

    .. image:: ../../examples/images/contin.jpg
     :align: center
     :width: 50 %
     :alt: sphere

    **Bimodal distribution** and the suppression of larger aggregates in mass and number weight.
    The two particles sizes with a factor of 20 in radius shows same contribution to the scattering.
    For mass or number weight the dependence of the scattering intensity on :math:`R^6`
    compensates the contribution of larger aggregates.

    The area under both intensity weighted peaks is equal as can be shown by integration.
    ::

     import jscatter as js
     import numpy as np
     t=js.loglist(0.125,10000,1000)   #times in microseconds
     q=4*np.pi*1.333/632*np.sin(np.deg2rad(90)/2) # 90 degrees for 632 nm , unit is 1/nm**2
     D=0.05*1000  # nm**2/ns * 1000 = units nm**2/microseconds
     noise=0.0001  # typical < 1e-3
     data=js.dA(np.c_[t,0.45*np.exp(-q**2*D*t)+0.45*np.exp(-q**2*D*20*t) +noise*np.random.randn(len(t))].T)

     # add attributes to overwrite defaults
     data.Angle      =90    # scattering angle in degrees
     data.Temperature=293   # Temperature of measurement  in K
     data.Viscosity  =1     # viscosity cPoise
     data.Refractive =1.333 # refractive index
     data.Wavelength =632   # wavelength
     # do CONTIN
     dr=js.dls.contin(data,distribution='x')

     # same to demonstrate access to distributions
     bf =dr[0].contin_bestFit
     p=js.grace(1.,1.)
     p.multi(2,1,vgap=0.25)

     # access correlation function and relaxation time distribution
     p[0].plot(data,sy=[1,1,1],legend='simulated data')
     p[0].xaxis(scale='log',label=r'\xG\f{} / µs',min=1,max=1e5)
     p[0].yaxis(label=r'G\s1\N / P(\xG\f{})')
     p[0].plot(dr[0],li=[1,3,3],sy=0,le='contin result')
     p[0].plot(bf.X,bf.Y*10,sy=[2,0.5,2],le='relaxation time distribution')
     p[0].legend(x=20,y=0.8)

     # Hydrodynamic radius distribution in different weights
     p[1].plot(bf._hydrodynamicradii,bf._intensityweightci, sy=[2,0.5,2],li=[1,1,''],le='intensity weight')
     p[1].plot(bf._hydrodynamicradii,bf._massweightci, sy=[2,0.5,3],li=[1,1,''],le='mass weight')
     p[1].plot(bf._hydrodynamicradii,bf._numberweightci, sy=[2,0.5,4],li=[1,1,''],le='number weight')
     p[1].xaxis(scale='log',label=r'R\sh\N / cm',min=1e-9,max=1e-5)
     p[1].yaxis(label=r'P(R\sh\N)',min=0,max=0.12)
     p[1].legend(x=1.5e-7,y=0.1)

     # Rh of first two peaks
     p[1].text(f'Rh_1 ={bf.ipeaks[0,6]:.3g} cm \\nRh_2 ={bf.ipeaks[1,6]:.3g} cm',x=1.3e-7,y=0.04)
     # p.save(js.examples.imagepath+'/contin_bimodal.jpg')

     # Peak integration resulting in about 0.5 for both
     p1=(1<bf.X) &(bf.X<20) # peak borders 10 and 100
     fp1=np.trapz(bf.Y[p1],bf.X[p1])/np.trapz(bf.Y,bf.X) #
     p2=(40<bf.X) &(bf.X<400) # peak borders 10 and 100
     fp2=np.trapz(bf.Y[p2],bf.X[p2])/np.trapz(bf.Y,bf.X) #


    .. image:: ../../examples/images/contin_bimodal.jpg
     :align: center
     :width: 50 %
     :alt: sphere



    R20 (scattering vector) is calculated as R20= 4e-7*pi*R15/R16*sin(R17/2), if R16!=0. else R20=0

    References
    ----------
    .. [1] CONTIN: A general purpose constrained regularization program for inverting
           noisy linear algebraic and integral equations
           Provencher, S  Computer Physics Communications 27: 229.(1982)
           doi:10.1016/0010-4655(82)90174-6.
    .. [2] http://s-provencher.com/pub/contin/cpc2.pdf.
    .. [3] A constrained regularization method for inverting data represented by linear algebraic or integral equations
           Provencher, S. W. Comp. Phys. Commu 27: 213–227. (1982)
           doi:10.1016/0010-4655(82)90173-4
    .. [4] Inverse problems in polymer characterization: Direct analysis of polydispersity with photon correlation
           spectroscopy.
           S.W. Provencher, Makromol. Chem. 180, 201 (1979).
    .. [5] Transformation Properties of Probability Density Functions
           Stanislav Sýkora
           Permalink via DOI:  10.3247/SL1Math04.001

    Original code description in CONTIN
    ::

     C  THE FOLLOWING ARE THE NECESSARY INPUT -                                  0460
     C                                                                           0461
     C  DOUSIN = T (DOUSIN MUST ALWAYS BE .TRUE..)                               0462
     C                                                                           0463
     C  LUSER(3) = T, TO HAVE FORMF2, THE SQUARED FORM FACTORS, COMPUTED IN      0464
     C                USERK.                                                     0465
     C           = F, TO SET ALL THE FORMF2 TO 1. (AS WOULD BE APPROPRIATE       0466
     C                WITH LAPLACE TRANSFORMS).                                  0467
     C  RUSER(24) MAY BE NECESSARY INPUT TO SPECIFY THE FORM FACTOR (E.G.,       0468
     C            THE WALL THICKNESS OF A HOLLOW SPHERE) IF LUSER(3)=T.  SEE     0469
     C            COMMENTS IN USERK.                                             0470
     C  IUSER(18) MAY BE NECESSARY INPUT IF LUSER(3)=T (E.G., TO SPECIFY THE     0471
     C            NUMBER OF POINTS OVER WHICH THE SQUARED FORM FACTORS WILL      0472
     C            BE AVERAGED). SEE COMMENTS IN USERK.                           0473
     C                                                                           0474
     C  RUSER(16) = WAVELENGTH OF INCIDENT LIGHT (IN NANOMETERS),                0475
     C            = 0, IF RUSER(20), THE MAGNITUDE OF THE SCATTERING VECTOR      0476
     C                 (IN CM**-1), IS NOT TO BE COMPUTED.  WHEN                 0477
     C                 RUSER(16)=0, RUSER(15) AND RUSER(17) NEED NOT BE          0478
     C                 INPUT, AND CONTIN WILL SET RUSER(21)=1                    0479
     C                 (AS APPROPRIATE WITH LAPLACE TRANSFORMS).                 0480
     C                                                                           0481
     C  RUSER(15) = REFRACTIVE INDEX.                                            0482
     C  RUSER(17) = SCATTERING ANGLE (IN DEGREES).                               0483
     C                                                                           0484
     C                                                                           0485
     C  IUSER(10) SELECTS SPECIAL CASES OF USERK FOR MORE CONVENIENT USE.        0486
     C                                                                           0487
     C  IUSER(10) = 1, FOR MOLECULAR WEIGHT DISTRIBUTIONS FROM PCS               0488
     C                 (WHERE THE SOLUTION, S(G), IS SUCH THAT S(G)DG IS         0489
     C                 THE WEIGHT FRACTION WITH MOLECULAR WEIGHT BETWEEN         0490
     C                 G AND G+DG).                                              0491
     C                 CONTIN SETS -                                             0492
     C                   RUSER(23) = 1.,                                         0493
     C                   RUSER(21) = RUSER(18)*RUSER(20)**2.                     0494
     C                               (SEE ABOVE DISCUSSION OF RUSER(16).)        0495
     C                 YOU MUST INPUT -                                          0496
     C                   RUSER(18) TO SATISFY THE EQUATION (IN CGS UNITS) -      0497
     C                   (DIFFUSION COEFF.)=RUSER(18)*(MOL. WT.)**RUSER(22).     0498
     C                   RUSER(22) (MUST ALSO BE INPUT, TYPICALLY ABOUT -.5).    0499
     C                                                                           0500
     C  IUSER(10) = 2, FOR DIFFUSION-COEFFICIENT DISTRIBUTONS OR LAPLACE         0501
     C                 TRANSFORMS (WHERE G IS DIFF. COEFF. IN CM**2/SEC          0502
     C                 OR, E.G., TIME CONSTANT).                                 0503
     C                 CONTIN SETS -                                             0504
     C                   RUSER(23) = 0.,                                         0505
     C                   RUSER(22) = 1.,                                         0506
     C                   RUSER(21) = RUSER(20)**2 (SEE ABOVE DISCUSSION          0507
     C                                             OF RUSER(16).).               0508
     C                                                                           0509
     C  IUSER(10) = 3, FOR SPHERICAL-RADIUS DISTRIBUTIONS, ASSUMING THE          0510
     C                 EINSTEIN-STOKES RELATION (WHERE THE SOLUTION, S(G),       0511
     C                 IS SUCH THAT S(G)DG IS THE WEIGHT FRACTION OF             0512
     C                 PARTICLES WITH RADIUS (IN CM) BETWEEN G AND G+DG.         0513
     C                 WEIGHT-FRACTION DISTRIBUTIONS YIELD BETTER SCALED         0514
     C                 PROBLEMS THAN NUMBER-FRACTION DISTRIBUTIONS, WHICH        0515
     C                 WOULD REQUIRE RUSER(23)=6.)                               0516
     C                 CONTIN SETS -                                             0517
     C                   RUSER(23) = 3.,                                         0518
     C                   RUSER(22) = -1.,                                        0519
     C                   RUSER(21) = RUSER(20)**2*(BOLTZMANN CONST.)*            0520
     C                               RUSER(18)/(.06*PI*RUSER(19)).               0521
     C                               (SEE ABOVE DISCUSSION OF RUSER(16).)        0522
     C                 YOU MUST HAVE INPUT -                                     0523
     C                   RUSER(18) = TEMPERATURE (IN DEGREES KELVIN),            0524
     C                   RUSER(19) = VISCOSITY (IN CENTIPOISE).                  0525
     C                                                                           0526
     C  IUSER(10) = 4, FOR GENERAL CASE, WHERE YOU MUST HAVE INPUT -             0527
     C                 RUSER(J), J = 21, 22, 23.                                 0528
     C                                                                           0529
     C                                                                           0530



    """
    # contin is called in a shell like:  contin <input.txt >output.txt
    # we mimic this by subprocess.run

    if continexe is None:
        raise Exception('There is no contin executable found. ' +
                        'Please compile and place executable at callable path. ' +
                        'See documentation of DLS module.')

    if hasattr(datalist, '_isdataArray'):
        datalist = dL(datalist)
    elif isinstance(datalist, str):
        datalist = dL(datalist)
    if len(datalist) == 0:
        raise NameError('There are no data in datalist')

    # some consistency tests
    if 'typ' in kw:
        typ = kw['typ']
    else:
        typ = 0
    if 'R22' in kw:
        if kw['R22'] == 0:
            raise ValueError('R22 is not allowed to be equal zero if given')

    # some defaults and functions #######################################################
    if distribution[0] == 'L':
        # set R16 =0 to get a real laplace l is otherwise the wavelength
        l = 0
    elif 'l' in kw:
        l = kw['l']  # which is set with a default here
    else:
        l = 632.  # in nm
    if 'T' in kw:
        T = kw['T']
        del kw['T']
        if T < 273:
            print('Warning: temperature below ZERO!! ')
    elif 'R18' in kw:
        T = kw['R18']  # here we misuse the T as the proportionality constant for kernel molweight
    else:
        T = 293.  # in K
    if 'n' in kw:
        n = kw['n']  # default refractive index
    else:
        n = 1.333
    if 'a' in kw:
        a = kw['a']  # default angle
    else:
        a = 90.  # in degrees
    if bgr != 0:
        bgr = 1  # no background

    # wavevector in 1/cm
    qq = lambda n, ll, theta: 4. * pi * n / (ll * 1e-7) * np.sin(np.deg2rad(theta) / 2.)
    # hydrodynamic radius  Rh(relaxationtime) in cm, gamma is relaxation time visc in cPoise
    Rh = lambda gamma, q, T, visc: kb * T / (pi * 0.06 * visc) * q ** 2 * gamma

    ##########################################################

    # DEFINES THE kind of the distribution kernel
    distr = {'m': 1,
             'D': 2, 'L': 2,
             'r': 3,
             'u': 4, 'x': 4, 'd': 4, 'T': 4}
    if distribution[0] == 'm' and 'R18' not in kw.keys() and 'R21' not in kw.keys():
        print('please provide R18 and R22 in input to function')
        print("to describe D=R18*molWeight^R22    e.g.polymer R22=0.5")
        return
    if 'R22' in kw:
        R22 = float(kw['R22'])
    else:
        R22 = 0
    if 'R21' in kw:
        R21 = float(kw['R21'])
    else:
        R21 = 0
    if 'R23' in kw:
        R23 = float(kw['R23'])
    else:
        R23 = 0
    edist = ('x', 'T', 'd')  # here we set R21-R23 explicitly in the later header part
    if distribution[0] == 'x':  # l^R23*exp(-R21*t*l^R22) with  R21=1,R22=-1,R23=0  ==> l=1/T -> exp(-t/T)
        R21 = 1
        R22 = -1
        R23 = 0
    if distribution[0] == 'T':  # l^R23*exp(-R21*t*l^R22) with  R21=1,R22=-1,R23=1  ==> l=T -> T*exp(-t/T)
        R21 = 1
        R22 = -1
        R23 = 1
    if distribution[0] == 'd':  # l^R23*exp(-R21*t*l^R22) with  R22=-1,R23=0  ==> l=Rh -> 1*exp(-R21*t/Rh)
        # I(t)=exp(- [  q**2*k_B*T*/(0.06*pi*visc)] * t/Rh )
        # l-> rh ; t=t ;  R21->  q**2* [k_B*T*/(0.06*pi*visc)]
        R21 = kb * T / (0.06 * pi * _visc(T, **kw)) * qq(n, l, a) ** 2  # D*Rh * Q**2
        R22 = -1
        R23 = 0

    # write a common header for CONTIN as ASCII inputfile
    last = 1  # for multi dataset evaluation this is -1 (false) except for the last one
    # we process here ONLY single files
    elements = 40  # just to have an array for it; last 2 lines are "end" and NY
    header = np.array([''] * elements, dtype='|S70')  # a single fortran line
    header[0] = 'filename'.ljust(70)  # the loaded file
    header[1] = 'LAST'.ljust(6) + str().rjust(5) + ('%15.4E' % last)  # see above
    # header[2]='GMNMX'.ljust(6)+str(1).rjust(5)+('%15.4E' % gmin)     # first point of the distribution to fit
    # header[3]='GMNMX'.ljust(6)+str(2).rjust(5)+('%15.4E' % gmax)     # last  point of the distribution to fit
    header[4] = 'IWT'.ljust(6) + str().rjust(5) + (
            '%15.4E' % 5)  # fit strategy how to determine errors -> 5 from a prefit, results in 2 fits but good errors
    # unweighted fit IWT=1 ->errors equal; IWT=4 direct input of errors not implemented
    header[5] = 'NERFIT'.ljust(6) + str().rjust(5) + (
            '%15.4E' % 0)  # number of points around a point to determine error; safety margin default 10; we use 0
    header[6] = 'NINTT'.ljust(6) + str().rjust(5) + (
            '%15.4E' % -1)  # number of equally spaced sets in tk; <0 means direct input as used here
    header[7] = 'IFORMT'.ljust(6) + str().rjust(20)  # format of time variable for direct input
    header[8] = '(1E12.5)'.ljust(26)  # 1 in a row
    header[9] = 'IFORMY'.ljust(6) + str().rjust(20)  # format of y variable for direct input correlation
    header[10] = '(1E12.5)'.ljust(26)  # 1 in a row
    if 'IGRID' in kw.keys():
        # Grid=2 is log grid ; 1 is equally spaced grid; default 2= log grid
        header[11] = 'IGRID'.ljust(6) + str().rjust(5) + ('%15.4E' % float(kw['IGRID']))
    header[12] = 'NLINF'.ljust(6) + str().rjust(5) + ('%15.4E' % bgr)  # allows a single const background , 0 no bgr
    header[13] = 'NG'.ljust(6) + str().rjust(5) + ('%15.4E' % Ngrid)  # Ngrid  points between gmin,gmax
    header[14] = 'DOUSIN'.ljust(6) + str().rjust(5) + (
            '%15.4E' % 1)  # Do User INPUT ; to use the below given values anyway this is the default
    header[15] = 'IUSER'.ljust(6) + str(10).rjust(5) + (
            '%15.4E' % distr[distribution[0]])  # selects the kernel see help above
    header[16] = 'RUSER'.ljust(6) + str(15).rjust(5) + ('%15.4E' % n)  # refractive index
    header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % l)  # wavelength  in nm
    header[18] = 'RUSER'.ljust(6) + str(17).rjust(5) + ('%15.4E' % a)  # scattering angle in degrees
    header[19] = 'RUSER'.ljust(6) + str(18).rjust(5) + (
            '%15.4E' % T)  # absolute Temperature in K or proportionality constant
    header[20] = 'RUSER'.ljust(6) + str(19).rjust(5) + ('%15.4E' % _visc(T, **kw))  # viscosity in centipoise
    header[25] = 'RUSER'.ljust(6) + str(10).rjust(5) + ('%15.4E' % typ)  # (0) means don't change; input is g1;
    # (1) input is intensity correlation g2; =>  calculate (g2/R21-1)^0.5
    # (-1) input is g2-1,  takes only the square root
    # ALV and Zetasizer Data are g2-1 -> -1
    header[26] = 'LUSER'.ljust(6) + str(3).rjust(5) + (
            '%15.4E' % RDG)  # use rayleighDebyeGans formfactor (set to true => 1) or const 1 (set to false => -1 )
    if 'R16' in kw.keys(): header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % float(kw['R16']))
    if 'WALL' in kw.keys(): header[24] = 'RUSER'.ljust(6) + str(24).rjust(5) + ('%15.4E' % float(
        kw['WALL']))  # wall thickness  in cm in RDG for simulating hollow spheres =0 normal sphere
    if 'ALPS1' in kw.keys(): header[27] = 'ALPST'.ljust(6) + str(1).rjust(5) + (
            '%15.4E' % float(kw['ALPS1']))  # take this alpha in preliminary error analysis
    if 'ALPS2' in kw.keys(): header[28] = 'ALPST'.ljust(6) + str(2).rjust(5) + (
            '%15.4E' % float(kw['ALPS2']))  # take this alpha in final analysis and as choosen solution
    if 'I18' in kw.keys(): header[29] = 'IUSER'.ljust(6) + str(18).rjust(5) + (
            '%15.4E' % float(kw['I18']))  # formfactor average over 2 I18+1 points
    if distribution[0] in edist or 'R21' in kw.keys(): header[22] = 'RUSER'.ljust(6) + str(21).rjust(5) + (
            '%15.4E' % R21)
    if distribution[0] in edist or 'R23' in kw.keys(): header[23] = 'RUSER'.ljust(6) + str(23).rjust(5) + (
            '%15.4E' % R23)
    if distribution[0] in edist or 'R22' in kw.keys(): header[21] = 'RUSER'.ljust(6) + str(22).rjust(5) + (
            '%15.4E' % R22)
    if 'IQUAD' in kw.keys(): header[30] = 'IQUAD'.ljust(6) + str().rjust(5) + (
            '%15.4E' % float(kw['IQUAD']))  # quadrature default=3 Simpson       1 is direct; 2 is trapezoidal
    if 'NORDER' in kw.keys(): header[30] = 'NORDER'.ljust(6) + str().rjust(5) + (
            '%15.4E' % float(kw['NORDER']))  # order regularization; default 2
    if 'PLEVEL' in kw.keys():
        word = ('%5.2f' % float(kw['PLEVEL']))
        header[31] = 'PLEVEL'.ljust(6)
        header[32] = (word * 4)  # 0.1<PLEVEL<0.9  best is not to use it, default =0.5
    if 'NONNEG' in kw.keys():
        header[33] = 'NONNEG'.ljust(6) + str().rjust(5) + (
                '%15.4E' % float(kw['NONNEG']))  # no negative values in the solution default is 1;
    else:
        #   default is 1 as a distribution has no negative values
        header[33] = 'NONNEG'.ljust(6) + str().rjust(5) + ('%15.4E' % 1)
    header[-2] = 'END'.ljust(26)
    header[-1] = 'NY'.ljust(6) + str(0).rjust(5)  # Number of datapoints is set per file later
    # ende header für CONTIN als ASCII inputfile         ##################################

    # a default grid min max is needed if it is not given explicitly
    # to transform tmin and tmax  according to this function  from kernel exp   1=t*R21*l**R22  -> l=(t*R21)**-1/R22
    if distribution[0] == 'L':
        transk = lambda t, n, l, a, T, v, R22, R21: 1 / t
    elif distribution[0] == 'm':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * T * qq(n, l, a) ** 2) ** (-1 / R22)
    elif distribution[0] == 'D':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * qq(n, l, a) ** 2) ** (-1)
    elif distribution[0] == 'r':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * kb * T * qq(n, l, a) ** 2 / (0.06 * pi * v))
    elif distribution[0] == 'd':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * kb * T * qq(n, l, a) ** 2 / (0.06 * pi * v))
    elif distribution[0] == 'u' or distribution[0] == 'x':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * R21) ** (-1 / R22)
    elif distribution[0] == 'T':
        transk = lambda t, n, l, a, T, v, R22, R21: (t * R21) ** (-1 / R22)

    ###################################
    # now look at the data and modify also the header if necessary
    idata = 0
    print('processing  %i datasets ' % len(datalist))
    for data in datalist:
        idata += 1
        print('evaluate Nr.:', idata)
        try:
            file = getattr(data, '@name')
        except AttributeError:
            file = time.strftime("%y%m%d%H%M%S", time.localtime())
        header[0] = file
        data.extract_comm(deletechars=':[]()"')  # find extra parameters in comments

        # extract datetime from comments from ALV instrument
        try:
            timestr = list(filter(lambda ll: ll.startswith('Time') or ll.startswith('Date'), data.comment))[0]
            timestr = timestr.translate(None, '"').split()[2]
            data.datetime = time.mktime(time.strptime(timestr, "%d.%m.%Y%H:%M:%S"))
        except:
            data.datetime=0
        # take values from data
        if 'n' not in kw:
            try:
                n = _getfirst(data.Refractive)
            except AttributeError:
                pass
        if 'l' not in kw and distribution[0] != 'L' and 'R16' not in kw:
            # noinspection PyBroadException
            try:
                l = _getfirst(data.Wavelength)
            except AttributeError:
                pass
        if 'a' not in kw:
            try:
                a = _getfirst(data.Angle)
            except AttributeError:
                pass
        if 'T' not in kw:
            try:
                T = _getfirst(data.Temperature)
            except AttributeError:
                pass
        if 'v' not in kw:
            try:
                v = _getfirst(data.Viscosity)
            except AttributeError:
                v = _visc(T, **kw)
        else:
            v = _visc(T, **kw)

        # or override it
        if l != 0:
            contin_wavevector = qq(n, l, a)  # in 1/cm
        else:
            contin_wavevector = 0
        # create header for contin with parameters from datafile
        try:
            if 'qtmin' in kw:
                tmin = float(kw['qtmin']) / contin_wavevector ** 2
            if 'qtmax' in kw:
                tmax = float(kw['qtmax']) / contin_wavevector ** 2
        except AttributeError:
            raise Exception('dont use qtmin / qtmax with Laplace option. wavevector is zero')
        itmin, itmax = data.X.searchsorted((tmin, tmax))  # searches where tmin,tmax fit into list and outputs the place
        if tmin < 0:
            itmin = -tmin  # if negative purge first points
        if tmax < 0:
            itmax = tmax  # same but if negative count from behind
        try:
            if 'gmin' not in kw and 'qgmin' not in kw:
                gmin = transk(data.X[itmin], n, l, a, T, _visc(T, **kw), R22,
                              R21)  # calculate min max of the grid if not given
            elif 'qgmin' in kw:
                gmin = float(kw['qgmin']) / contin_wavevector ** 2
            else:
                gmin = float(kw['gmin'])
            if 'gmax' not in kw and 'qgmax' not in kw:
                gmax = transk(data.X[itmax], n, l, a, T, _visc(T, **kw), R22, R21)
            elif 'qgmax' in kw:
                gmax = float(kw['qgmax']) / contin_wavevector ** 2
            else:
                gmax = float(kw['gmax'])
        except ZeroDivisionError:
            print('wavevector is zero; use qgmax/qgmin only with non laplace option')
        header[2] = 'GMNMX'.ljust(6) + str(1).rjust(5) + ('%15.4E' % min(gmin, gmax))  # fit interval min
        header[3] = 'GMNMX'.ljust(6) + str(2).rjust(5) + ('%15.4E' % max(gmin, gmax))  # fit interval max
        if 'T' not in kw and 'R18' not in kw:
            try:
                T = _getfirst(data.Temperature)
            except:
                print('No temperature given, set to default 293.15K')
                T = 273.15 + 20
            header[19] = 'RUSER'.ljust(6) + str(18).rjust(5) + ('%15.4E' % T)
        header[16] = 'RUSER'.ljust(6) + str(15).rjust(5) + ('%15.4E' % n)
        header[17] = 'RUSER'.ljust(6) + str(16).rjust(5) + ('%15.4E' % l)
        header[18] = 'RUSER'.ljust(6) + str(17).rjust(5) + ('%15.4E' % a)
        header[20] = 'RUSER'.ljust(6) + str(19).rjust(5) + ('%15.4E' % v)

        # set the number of values
        lenX = len(data.X[itmin:itmax])
        header[-1] = 'NY'.ljust(6) + str(lenX).rjust(5)
        # now write the header and the data to a input buffer
        input = io.BytesIO()
        input.writelines([b' ' + line + b'\n' for line in header if line != b''])
        input.writelines([b' ' + (b'%8.5E' % line) + b'\n' for line in data.X[itmin:itmax]])
        if 'test' in kw:  # just as a test with simple exponential relax=valuefloat(kw['test'])
            input.writelines(
                [' ' + ('%8.5E' % line) + '\n' for line in 1e-3 + np.exp(-data.X[itmin:itmax] / float(kw['test']) * 2)])
        else:
            input.writelines([b' ' + (b'%8.5E' % line) + b'\n' for line in data.Y[itmin:itmax]])
        # to check input
        if 'write' in kw or 'w' in kw:
            with open('./' + 'input.con', 'w') as f:
                f.writelines(input.getvalue().decode('utf-8'))

        # now run contin in a shell like environment with pipes
        p = subprocess.run(continexe, input=input.getvalue(), capture_output=True)
        input.close()
        output = p.stdout.decode('utf-8')
        error = p.stderr.decode('utf-8')
        if p.returncode >0 or len(output) == 0:
            if error != '':
                for line in error.split('\n'):
                    print('contin_std_err>', line)
            if len(output) == 0:
                print('there was nothing in output yet')
                return

        # to check output
        if 'write' in kw or 'w' in kw:
            with open('./' + file + '.con', 'w') as f:
                f.writelines(output)

        # CONTIN finished, we look at the output ------------------------------------

        # sort the output to the data
        outblocks = output.split('UNREGULARIZED VARIABLES')[-1].split(file)
        if len(outblocks) < 3:
            print('last lines of CONTIN output')
            print(outblocks[-1])
            raise Exception('CONTIN ended with no result; use w=1 to get output for analysis')
        # blocks of different alpha in result (without prefit results);
        # first repeat input ; then preliminary res; last two choosen solution
        # second last is fit  ; last is result distribution

        # take the fit block after Abscissa;splitlines and take only lenX;split line to words and convert to float
        temp = np.r_[[[float(vv) for vv in line[:22].split()] for line in
                      outblocks[-2].split('ABSCISSA\n')[-1].splitlines()[:lenX]]].T
        data.contin_result_fit = temp[[1, 0]]  # resort to have xtime in 0 and fit_y in 1
        data.contin_fits = []
        for k in np.arange(len(outblocks))[1:-2]:  # all solutions with different alpha
            chosen = outblocks[k].splitlines()

            # take the chosen block line 6 to 6+Ngrid;split into words and convert to float;
            # sometimes D is used instead of E for float 1E5
            # order of temp -> [y, error_y, t]
            temp = np.r_[[[float(vv) for vv in line[:31].replace('D', 'E').split()] for line in chosen[6:Ngrid + 6]]].T
            # fit quality in 3rd line of chosen last chosen block
            try:
                # hydrodynamic radius temp[2] is correlation time]
                RR = Rh(temp[2] * timescale, contin_wavevector, T, v)
                # intensity weight
                ci_intw = temp[0]/temp[0].sum()
                ci_intwerr = temp[1]/temp[0].sum()
                # mass weighted contribution ci  temp[0] is fraction
                ci_massw = _massweight(temp[0], contin_wavevector, RR)
                # number weighted contribution
                ci_numw = _numberweight(temp[0], contin_wavevector, RR)
                # the result output as [t, y, error_y, hydrodyn Radius,  mass weight, number weight]
                data.contin_fits.append(dA(np.c_[temp[2],ci_intw, ci_intwerr , RR, ci_massw, ci_numw].T))
            except:
                data.contin_fits.append(dA(np.c_[temp[[2, 0, 1]].T].T))

            # fitquality from contin output
            data.contin_fits[-1].fitquality = {}
            name = [aa.lstrip() for aa in chosen[2].lstrip().split('    ')]
            line = chosen[3].split()
            if line[0] == '*':
                value = [float(li) for li in line[1:]]
            else:
                value = [float(li) for li in line[:]]
            for i in range(len(name)):
                data.contin_fits[-1].fitquality[name[i]] = value[i]
            # look at the peaks found at the end of solution block
            lines = chosen[Ngrid + 6:]
            linesl = []
            data.contin_fits[-1].peaks = []
            entireSol = -1
            for i in range(len(lines)):
                if lines[i][:5] == '0PEAK':
                    linesl.append(i)  # first peak line number
                if lines[i].strip()[:7] == 'MOMENTS':
                    entireSol = i  # entire solution line number
            # append if entire solution exists (does not for single peaks)
            if entireSol != -1:
                data.contin_fits[-1].momentEntireSolution = {}
                for j in 1, 2, 3, 4, 5:
                    val = lines[entireSol + j][44:].replace('X (10**', '').replace(')', '').split()
                    data.contin_fits[-1].momentEntireSolution['mom_' + val[0]] = \
                        [_w2f(vv) for vv in [val[0], val[1] + 'E' + val[2]] + val[3:-1]]
                data.contin_fits[-1].momentEntireSolution['std/mean'] = _w2f(lines[entireSol + 4][:45].split()[-1])
            data.contin_fits[-1].baseline = [_w2f(vv) for vv in np.r_[lines[0][22:].replace('D', 'E').split()][[0, 2]]]
            for i in linesl:
                data.contin_fits[-1].peaks.append({})  # append a new peak
                words = lines[i].split()
                data.contin_fits[-1].peaks[-1]['nr'] = _w2f(words[1])
                data.contin_fits[-1].peaks[-1]['minmax'] = [_w2f(words[4]), _w2f(words[6])]
                data.contin_fits[-1].peaks[-1]['iminmax'] = [
                    abs(data.contin_fits[-1][0] - data.contin_fits[-1].peaks[-1]['minmax'][0]).argmin(),
                    abs(data.contin_fits[-1][0] - data.contin_fits[-1].peaks[-1]['minmax'][1]).argmin()]
                for j in 1, 2, 3, 4, 5:
                    val = lines[i + j][44:].replace('X (10**', '').replace(')', '').split()
                    data.contin_fits[-1].peaks[-1]['mom_' + val[0]] = [_w2f(vv) for vv in
                                                                       [val[0], val[1] + 'E' + val[2]] + val[3:-1]]
                data.contin_fits[-1].peaks[-1]['std/mean'] = _w2f(lines[i + 4][:45].split()[-1])
                data.contin_fits[-1].peaks[-1]['weight'] = data.contin_fits[-1].peaks[-1]['mom_0'][1]
                data.contin_fits[-1].peaks[-1]['mean'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
                data.contin_fits[-1].peaks[-1]['imean'] = data.contin_fits[-1][0].searchsorted(
                    data.contin_fits[-1].peaks[-1]['mean'])
                data.contin_fits[-1].peaks[-1]['mean_errproz'] = data.contin_fits[-1].peaks[-1]['mom_1'][4]
                data.contin_fits[-1].peaks[-1]['mean_err'] = data.contin_fits[-1].peaks[-1]['mom_1'][4] / 100. * \
                                                             data.contin_fits[-1].peaks[-1]['mom_1'][3]
                # standard deviation is std=sqrt(mom2/mom1- mean**2)
                data.contin_fits[-1].peaks[-1]['std'] = abs(
                    data.contin_fits[-1].peaks[-1]['mom_2'][1] / data.contin_fits[-1].peaks[-1]['mom_0'][1] -
                    data.contin_fits[-1].peaks[-1]['mom_1'][3] ** 2) ** 0.5
                data.contin_fits[-1].peaks[-1]['mean_num'] = data.contin_fits[-1].peaks[-1]['mom_0'][3]
                if distribution[0] == 'm':  # mass fraction mol weight see provencher
                    data.contin_fits[-1].peaks[-1]['mean_mass'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
                    data.contin_fits[-1].peaks[-1]['mean_z'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
                    data.contin_fits[-1].peaks[-1]['mean_z+1'] = data.contin_fits[-1].peaks[-1]['mom_1'][3]
        # now the chosen solution from CONTIN, which is the last given (repeated)
        chosen = outblocks[-1].splitlines()
        data.contin_alpha = float(chosen[3].split()[0])  # this is the choosen solution alpha
        data.contin_alphalist = [f.fitquality['ALPHA'] for f in data.contin_fits]  #
        data.contin_bestFit = data.contin_fits[
            data.contin_alphalist.index(data.contin_alpha)]  # this is the choosen solution
        data.contin_bestFit.imaximum = data.contin_bestFit[0].searchsorted(data.contin_bestFit[1].max())
        data.contin_bestFit.columnname = \
            'relaxationtimes; intensityweightci; errors; hydrodynamicradii; massweightci; numberweightci'
        # a short info about the peaks
        ipeaks = np.c_[[[p['weight'],
                         p['mean'],
                         p['std'],
                         p['mean_err'],
                         p['imean'],
                         (1 / (contin_wavevector ** 2 * p['mean'] * timescale) if contin_wavevector != 0 else 0),
                         Rh(p['mean'] * timescale, contin_wavevector, T, v),
                         contin_wavevector,
                         (1e5 / (contin_wavevector ** 2 * p['mean'] * timescale) if contin_wavevector != 0 else 0),
                         Rh(p['mean'] * timescale, contin_wavevector, T, v) * 1e7,
                         contin_wavevector * 1e-7]
                        for p in data.contin_bestFit.peaks]]
        # sort for largest weight  but take only npeak strongest
        sequence = ipeaks[:, 0].argsort()[::-1]  # [:3]
        ipeaks = ipeaks[sequence, :]
        # and sort the remaining  for peak position in wavevector contin_wavevector
        sequence = ipeaks[:, 5].argsort()
        data.contin_bestFit.ipeaks = ipeaks[sequence, :]
        data.contin_bestFit.ipeaks_name = ['weight',
                                           'mean',
                                           'std',
                                           'mean_err',
                                           'imean',
                                           'mean_Deff',
                                           'Rh',
                                           'wavevector',
                                           'mean_Deff_nmns',
                                           'Rh_nm',
                                           'wavevector_nm',
                                           ]

        # ipeaks = np.c_[[[p['weight'],
        #                  p['mean'],
        #                  p['std'],
        #                  p['mean_err'],
        #                  p['imean']]
        #                 for p in data.contin_bestFit.peaks]]
        # # sort for largest weight  but take only npeak strongest
        # sequence = ipeaks[:, 0].argsort()[::-1][:3]
        # ipeaks = ipeaks[sequence, :]
        # data.contin_bestFit.ipeaks = ipeaks
        # data.contin_bestFit.ipeaks_name = ['weight',
        #                                    'mean',
        #                                    'std',
        #                                    'mean_err',
        #                                    'imean']
        try:
            # calculate the mass weight properties
            temp = []
            for p in data.contin_bestFit.peaks:
                imin = max(0, p['iminmax'][0] - 1)
                imax = min(len(data.contin_bestFit[0]), p['iminmax'][1] + 1)
                weight = np.trapz(data.contin_bestFit[4, imin:imax], data.contin_bestFit[0, imin:imax])
                mmean = np.trapz(data.contin_bestFit[4, imin:imax] * data.contin_bestFit[0, imin:imax],
                                 data.contin_bestFit[0, imin:imax]) / weight
                std = np.sqrt(np.trapz((data.contin_bestFit[4, imin:imax] * data.contin_bestFit[0, imin:imax] ** 2),
                                       data.contin_bestFit[0, imin:imax]) / weight - mmean ** 2)
                temp.append([weight,
                             mmean,
                             std,
                             p['mean_err'],
                             min(data.contin_bestFit[0].searchsorted(mmean), len(data.contin_bestFit[0])),
                             (1 / (contin_wavevector ** 2 * mmean * timescale) if contin_wavevector != 0 else 0),
                             Rh(mmean * timescale, contin_wavevector, T, v),
                             contin_wavevector,
                             (1e5 / (contin_wavevector ** 2 * mmean * timescale) if contin_wavevector != 0 else 0),
                             Rh(mmean * timescale, contin_wavevector, T, v) * 1e7,
                             contin_wavevector * 1e-7])
            mpeaks = np.c_[temp]
            mpeaks[:, 0] = mpeaks[:, 0] / mpeaks[:, 0].sum()
            mpeaks = mpeaks[sequence, :]
            data.contin_bestFit.mpeaks = mpeaks
            # calculate the number weight properties
            temp = []
            for p in data.contin_bestFit.peaks:
                imin = max(0, p['iminmax'][0] - 1)
                imax = min(len(data.contin_bestFit[0]), p['iminmax'][1] + 1)
                weight = np.trapz(data.contin_bestFit[5, imin:imax], data.contin_bestFit[0, imin:imax])
                nmean = np.trapz(data.contin_bestFit[5, imin:imax] * data.contin_bestFit[0, imin:imax],
                                 data.contin_bestFit[0, imin:imax]) / weight
                std = np.sqrt(np.trapz((data.contin_bestFit[5, imin:imax] * data.contin_bestFit[0, imin:imax] ** 2),
                                       data.contin_bestFit[0, imin:imax]) / weight - nmean ** 2)
                temp.append([weight,
                             nmean,
                             std,
                             p['mean_err'],
                             min(data.contin_bestFit[0].searchsorted(nmean), len(data.contin_bestFit[0])),
                             (1 / (contin_wavevector ** 2 * nmean * timescale) if contin_wavevector != 0 else 0),
                             Rh(nmean * timescale, contin_wavevector, T, v),
                             contin_wavevector,
                             (1e5 / (contin_wavevector ** 2 * nmean * timescale) if contin_wavevector != 0 else 0),
                             Rh(nmean * timescale, contin_wavevector, T, v) * 1e7,
                             contin_wavevector * 1e-7])
            npeaks = np.c_[temp]
            npeaks[:, 0] = npeaks[:, 0] / npeaks[:, 0].sum()
            npeaks = npeaks[sequence, :]
            data.contin_bestFit.npeaks = npeaks
        except:
            pass
    return datalist



# noinspection PyIncorrectDocstring

[docs]
def contin_display(result_list, select=None, npeak=2, *args, **kw):
    """
     A routine to plot CONTIN results to get an overview over CONTIN output.
     
     Parameters
     ----------
     result_list : dataList
         Output of dls.contin.
     select : list of int  
         Sequence of integers in result_list to select for output.
     npeak : int 
         Number of peaks in output  default 2.
     dlogy :
         shows distribution in y logscale


    Notes
    -----
    | access diffusion of all first peaks by output[:,1,6]
    | mean and std as
    | output[:,1,6].mean()
    | output[:,1,6].std()

    """
    if select is None:
        select = []
    p = GracePlot()
    p.multi(1, 3)
    p1 = p.g[0]
    p1.subtitle('correlation')
    p2 = p.g[1]
    if 'mass' in kw.keys():
        p2title = 'mass weighted \\ndistribution'
    elif 'number' in kw.keys():
        p2title = 'number weighted \\ndistribution'
    else:  # intensity
        p2title = 'intensity weighted \\ndistribution'
    p2.subtitle(p2title)
    p3 = p.g[2]
    p3.subtitle('relaxation times')
    if isinstance(select, numbers.Integral):
        select = [select]
    if not select:
        select = range(len(result_list))
    for i in select:
        data = result_list[i]
        p1.plot(data[0], data[1], line=0, symbol=-1)
        p1.plot(data.contin_result_fit[0], data.contin_result_fit[1], line=-1, symbol=0)
        p1.yaxis(min=0.0001, scale='n')
        p1.xaxis(min=0.0001, scale='l')
        p1.ylabel('g1(t)')
        p1.xlabel(r't')
        if 'mass' in args:
            YY = data.contin_bestFit[4] / data.contin_bestFit[4].max()
            peaks = data.contin_bestFit.mpeaks
        elif 'number' in args:
            YY = data.contin_bestFit[5] / data.contin_bestFit[5].max()
            peaks = data.contin_bestFit.npeaks
        else:  # intensity
            YY = data.contin_bestFit[1] / data.contin_bestFit[1].max()
            peaks = data.contin_bestFit.ipeaks
        if 'dlogy' in args:
            p2.plot(data.contin_bestFit[0], YY,
                    legend='%.3g a=%.3g PROB1 %.3g' % (data.contin_alpha,
                                                       data.contin_bestFit.fitquality['ALPHA'],
                                                       data.contin_bestFit.fitquality['PROB1 TO REJECT']),
                    symbol=-1)
            p2.xaxis(min=0.0001, scale='l')
            p2.yaxis(min=0.0001, scale='l')
        else:
            # try:
            p2.plot(data.contin_bestFit[0], YY,
                    legend='%.3g a=%.3g PROB1 %.3g' % (data.contin_alpha,
                                                       data.contin_bestFit.fitquality['ALPHA'],
                                                       data.contin_bestFit.fitquality['PROB1 TO REJECT']),
                    symbol=-1)
            p2.xaxis(min=0.0001, scale='l')
            # except: pass
        p2.ylabel('distr(t)')
        p2.xlabel(r't')
        p3.ylabel(r'peak\smean\N ')
        p3.xlabel(r'q / nm\S-1')

        for pp in peaks[:npeak]:
            p3.plot([pp[10]], [pp[1]], [pp[3] / pp[1]], symbol=-1, line=0)

    return



# readmode
_mode = 'r'


# noinspection PyBroadException

[docs]
def readZetasizerNano(filename, NANOcoding="cp1257", NANOlocale=None):
    """
    Read ascii file data exported from Malvern Zetasizer Nano.

    Format of Zetasizer is one measurement per line with content defined in export macro.
    First line gives names of columns as header line, so header line in NANO export is necessary.
    Lists as Sizes, Volumes... need to be separated by "sample name" because of different length.

    Parameters
    ----------
    filename : string
        Zetasizer filename
    NANOcoding : string
        UTF coding from Windows
    NANOlocale : list of string
        | encoding of weekday names in NAno textfile
        | ['de_DE','UTF-8'] NANO set to German
        | ['en_US','UTF-8'] NANO set to US
        | ...

    Returns
    -------
    dataList with dataArray for each measurement
     Dataarray contains :
       - correlation data [CorrelationDelayTime; g2minus1; g1] (if present otherwise empty dataArray)
       - .distributions : Distribution fits with columns
          [RelaxationTimes, Sizes, Intensities, Volumes, Diffusions] (if present)
       - fit results of correlation function in attributes (some channels are discarded -> see Zetasizer settings)
           - .distributionFitDelayTimes timechannels
           - .distributionFitData measured points
           - .distributionFit fit result
       - Check .attr for more attributes like
         SampleName, MeanCountRate, SizePeak1mean, SizePeak1width, Temperature

        If no correlation or distributions are present the data are zero and only parameters are stored.
        No unit conversion is done.

    CorrelationData.Y contains 1-g2=g1**2 with 0 for negative 1-g2.

    Notes
    -----
    The template for export in Zetasizer has some requirements:

    - The header line should be included.
    - Separator is the tab.
    - First entries should be "Serial Number" "Type" "Sample Name" "Measurement Date and Time"
    - Sequence data as "Sizes", "Intensities" should be separated by "Sample Name" as separator.
    - Appending to same file is allowed if new header line is included.

    Examples
    --------
    ::

     import jscatter as js
     import numpy as np
     # read example data
     alldls = js.dls.readZetasizerNano(js.examples.datapath + '/dlsPolymerSolution.txt', NANOlocale=['de_DE', 'UTF-8'])
     one=alldls[5]
     p=js.grace()
     p.multi(2,1,vgap=0.2)
     p[0].plot(one,legend='correlation data (all)')
     p[0].plot(one.distributions.X, one.distributions.Y /  10 , li=1, legend='intensity weighted distribution (x0.1)')
     p[0].plot(one.DistributionFitDelayTimes,one.DistributionFitData,sy=[1,0.2,4],le='fitted data')
     p[0].plot(one.DistributionFitDelayTimes,one.DistributionFit,sy=0,li=[1,2,5])
     p[1].plot(one.distributions[1], one.distributions[2] , li=1, legend='radius distribution intensity weighted')
     p[1].plot(one.distributions._Sizes, one.distributions._Volumes, li=1, legend='radius distribution volume weighted')

     p[0].xaxis(scale='l', label=r'correlation time / µs ', tick=[10, 9])
     p[1].xaxis(scale='l', label=r'radius / nm ')
     p[0].yaxis(label=r'g1 / probability ')
     p[1].yaxis(label=r'probability ')
     p[0].legend(x=1000,y=1.9)
     p[1].legend(x=50,y=30)
     p[0].title('Zetasizer data with distribution fit')
     p[0].subtitle('Results from instrument software')
     # p.save(js.examples.imagepath+'/Zetasizerexample.jpg',format='jpeg',size=[600/300,600/300]) #as jpg file

    .. image:: ../../examples/images/Zetasizerexample.jpg
         :width: 50 %
         :align: center
         :alt: Graceexample


    """
    if NANOlocale is None:
        NANOlocale = ['en_US', 'UTF-8']
    seqcolumnNames = ['CorrelationDelayTimes', 'CorrelationData',
                      'RelaxationTimes', 'Sizes', 'Intensities', 'Volumes', 'Diffusions',
                      'DistributionFitDelayTimes', 'DistributionFitData', 'DistributionFit']
    # DSL data are writen on windows computer with specific encoding
    # read it
    # locale.resetlocale()
    # get locale
    old_loc = locale.getlocale(locale.LC_TIME)
    # set it to german or english us dependent on DLS Computer language
    locale.setlocale(locale.LC_TIME, NANOlocale)
    #
    with codecs.open(filename, _mode, NANOcoding) as f:
        zeilen = f.readlines()

    # creat lection for data formats
    daynames = [locale.nl_langinfo(dd)for dd in [locale.DAY_1,locale.DAY_2,locale.DAY_3,locale.DAY_4,locale.DAY_5,
                                                  locale.DAY_6,locale.DAY_7]]
    monthnames = [locale.nl_langinfo(dd)for dd in [locale.MON_1,locale.MON_2,locale.MON_3,locale.MON_4,locale.MON_5,
                                                   locale.MON_6,locale.MON_7,locale.MON_8,locale.MON_9,locale.MON_10,
                                                   locale.MON_11,locale.MON_12]]

    selectfmt = {'%A': lambda a: a in daynames,
                 '%B': lambda a: a in monthnames,
                 '%d': lambda a: a in [str(i) for i in range(1, 32)],
                 '%Y': lambda a: a.isdigit() and int(a) > 1900,
                 '%H:%M:%S': lambda a: ':' in a}

    output = dL()
    # scan each line
    for izeile, zeile in enumerate(zeilen):
        # encode in utf8 for correct use in datatime and split line
        worte = zeile.rstrip().replace(',', '.').split('\t')
        para = {}  # single parameters
        data = {}  # sequence of data
        next = 0

        # detect a header line
        if any(x in worte[:4] for x in ['Serial Number', 'Record', 'Type', 'Sample Name']):
            headerline = worte
            header = collections.OrderedDict()  # the key order is preserved in OrderedDict
            for i, field in enumerate(headerline):
                # if multiple entries as sequence it can be identified by '['
                key = field.split('[')[0].split('(')[0]
                if key not in header.keys():
                    header[''.join(key.split())] = [(i, field, key)]
                else:
                    header[''.join(key.split())] += [(i, field, key)]
            continue

        if 'Trend' in worte[:4]:
            continue  # skip Trend lines

        if 'Aggregation Point' in worte[:4]:
            continue  # skip line

        for key in header.keys():
            if '[' in header[key][0][1] \
                    or key in seqcolumnNames:  # belongs to a sequence of data as distribution fit
                start = next
                try:
                    # try to find index of next "SampleName" as separator
                    next = worte.index(para['SampleName'][0], start)
                except (ValueError, KeyError):
                    print('in line ', izeile, '', key,
                          ' is missing a "sample Name" after the sequence. Check your export template')
                    continue
                data[''.join(key.split())] = worte[start:next]
                next += 1
            else:
                try:
                    # not a sequence; is only a parameter
                    if ''.join(key.split()) in para.keys():  # already there
                        para[''.join(key.split())] += worte[next]
                    else:  # new one
                        para[''.join(key.split())] = [worte[next]]
                except:
                    print('in line ', izeile, '', key,
                          ' is missing parameter value. Check your export template')
                    continue
                next += 1

        if data == {}:
            print('in line ', izeile, ' is missing data at all. Check your export template')
            continue
        # append to output as new dataArray
        names = [r'RelaxationTimes', r'Sizes', r'Intensities', r'Numbers', r'Volumes', r'Diffusions']
        columnnames = list(set(names).intersection(set(data.keys())))
        columnnames = [name for name in names if name in columnnames]
        if 'CorrelationDelayTimes' in data.keys() and len(data['CorrelationData']) > 0:
            # we save here g1 instead of 1-g2 => take root later to prevent negative roots
            cor = np.array(data['CorrelationData'], dtype=float)
            output.append(dA(np.c_[np.array(data['CorrelationDelayTimes'], dtype=float),
                                   np.sign(cor) * np.sqrt(np.abs(cor)), cor].T,
                             XYeYeX=[0, 1]))
            del data['CorrelationDelayTimes']
            del data['CorrelationData']
            output[-1].columnname = 'CorrelationDelayTime; g2minus1; g1'
        elif len(columnnames) > 0:
            output.append(dA(np.c_[[np.array(data[name], dtype=float) for name in columnnames]], XYeYeX=[0, 1]))
            output[-1].columnname = ''.join(['%s; ' % name for name in columnnames])
            for coln in names[2:]:
                if coln in columnnames:
                    output[-1].setColumnIndex(ix=0, iy=columnnames.index(coln), iey=None)
                    break
            for name in columnnames:
                del data[name]
        else:  # only parameters
            output.append(dA(np.zeros(3), XYeYeX=[0, 1]))
            output[-1].comment += ['only parameters in file']

        # append parameters to result
        for key, item in para.items():
            if key == 'MeasurementDateandTime':
                # make a computational time
                timestring = ' '.join(item[0].replace(',', '').replace('.', '').split('\t'))
                # guess format string , something like '%A %d %B %Y %H:%M:%S'
                fmt = ''
                for dkeys in timestring.split(' '):
                    for fk, fv in selectfmt.items():
                        if fv(dkeys):
                            fmt += fk + ' '
                            continue
                fmt = fmt.rstrip()
                tt = time.mktime(time.strptime(timestring, fmt))
                setattr(output[-1], key.split('(')[0], time.ctime(tt))
                setattr(output[-1], 'MeasurementTimeSeconds', tt)
            elif key.split('(')[0] == u'T':
                setattr(output[-1], 'Temperature', np.array(item[0], dtype=float))
            else:
                try:
                    setattr(output[-1], key.split('(')[0], np.array(item[0], dtype=float))
                except:
                    setattr(output[-1], key.split('(')[0], item[0])
        try:
            # append distribution if not already above saved
            distributions = dA(np.c_[[np.array(data[name], dtype=float) for name in columnnames]], XYeYeX=[0, 1])
            distributions.columnname = ''.join(['%s; ' % name for name in columnnames])
            for coln in names[2:]:
                if coln in columnnames:
                    distributions.setColumnIndex(ix=0, iy=columnnames.index(coln), iey=None)
                    break
            for name in columnnames:
                del data[name]
            output[-1].distributions = distributions
        except:
            pass
        try:
            distributions = []
            columnnames = ''
            for dist in ['DistributionFitDelayTimes', 'DistributionFitData', 'DistributionFit']:
                try:
                    distributions.append(getattr(output[-1], dist))
                    columnnames += dist + ';'
                    delattr(output[-1], dist)
                except:
                    pass
            output[-1].distributionFit = dA(np.r_[distributions].squeeze())
            output[-1].distributionFit.columnames = columnnames
            output[-1].distributionFit.setColumnIndex(iey=None)
            output[-1].distributionFit.comment = ['this is g1']
        except:
            pass
        # append all remaining data to parameters ------------------------------------------------------
        for key in data.keys():
            try:
                setattr(output[-1], key, np.array(data[key], dtype=float))
            except:
                pass
        output[-1].filename = filename
    # reset locale
    locale.setlocale(locale.LC_TIME, old_loc)
    return output