Source code for CONAN.funcs

import numpy as np
from scipy.stats import binned_statistic
import scipy 
from scipy import stats
import scipy.interpolate as si
import concurrent.futures

#TODO corfac can take very long. results sometimes are difficult to understand. It should be revised.



[docs]
prior_distributions = dict( N  = stats.norm,
                            U  = stats.uniform,
                            LU = stats.loguniform,
                            LN = stats.lognorm,
                            T  = stats.truncnorm,
                            )


def corfac(rarr, tarr, earr, indlist, nphot, njumpphot):
    """
    Compute the red noise factor, white noise factor, combined factor, and
    the white noise factor for chi2red==1.

    Parameters
    ----------
    rarr: 1D float ndarray
        The residuals.
    tarr: 1D float ndarray
        The time array.
    earr: 1D float ndarray
        The error array.
    indlist: 2D int ndarray
        The indices of the residuals.
    nphot: Int
        The number of photometric light curves.
    njumpphot: Int
        The number of jump parameters in the photometric light curves.

    Returns
    -------
    bw: 1D float ndarray
        The white noise factor.
    br: 1D float ndarray    
        The red noise factor.
    brt: 1D float ndarray       
        The red noise timescale.
    cf: 1D float ndarray
        The combined factor.

    """
    bw = np.ones(nphot)
    br = np.ones(nphot)
    cf = np.ones(nphot)
    brt = np.zeros(nphot)
    tscales = np.arange(16) + 5  # time bin sizes between 5 and 20 minutes

    def compute_br(j):
        res = np.copy(rarr[indlist[j][0]])
        tt = np.copy(tarr[indlist[j][0]])
        err = np.copy(earr[indlist[j][0]])

        # calculate the red noise factor
        brs = np.zeros(len(tscales))
        for l in range(len(tscales)):
            tscale = tscales[l] / 60. / 24.  # timescales
            min1 = 1. / 60. / 24.  # 1 minute in days

            # calculate the shifting bins: shift by 1 minute each
            nshifts = np.copy(tscales[l])  # number of minutes of bin == number of shifts
            brs2 = np.zeros(nshifts)  # the br amplitudes for all shifts
            for ll in range(nshifts):
                # calculate the bin edges
                TT0 = tt[0] + ll * min1  # starting point is start + ll * 1 minute
                nbin = int((np.max(tt) - TT0) / tscale)
                binlims = np.zeros(nbin + 1)
                bincens = np.zeros(nbin)
                binnps = np.zeros(nbin)  # number of points per bin
                binlims[0] = np.copy(TT0)
                binind = []
                for k in range(1, nbin + 1):
                    binlims[k] = binlims[k - 1] + tscale
                for k in range(nbin):
                    bincens[k] = binlims[k] + 0.5 * tscale
                    binnps[k] = len(tt[(tt > binlims[k]) & (tt < binlims[k + 1])])

                mbin = np.nanmean(binnps)  # mean number of points per bin
                resbin, dump, dump2 = binned_statistic(tt, res, statistic='mean', bins=binlims)
                sigNred = np.nanstd(res) / np.sqrt(mbin) * np.sqrt(nbin / (nbin - 1))
                sigNreal = np.nanstd(resbin)
                brs2[ll] = sigNreal / sigNred

            brs[l] = np.nanmedian(brs2)

        br[j] = np.max(brs)

        if br[j] < 1.:
            br[j] = 1.

        brt[j] = tscales[brs.argmax()]

        # calculate the white noise factor
        #   - apply b_red to the data errors
        #   - calculate the b_white values for the adapted errors

        err2 = err * br[j]
        tscale = 15. / 60. / 24.  # timescale is 20 minutes
        min1 = 1. / 60. / 24.  # 1 minute in days

        # calculate the shifting bins: shift by 1 minute each
        nshifts = np.copy(tscales[l])  # number of minutes of bin == number of shifts
        bws = np.zeros(nshifts)  # the bw amplitudes for all shifts
        for ll in range(nshifts):
            # calculate the bin edges
            TT0 = tt[0] + ll * min1  # starting point is start + ll * 1 minute
            nbin = int((np.max(tt) - TT0) / tscale)
            binlims = np.zeros(nbin + 1)
            bincens = np.zeros(nbin)
            binnps = np.zeros(nbin)  # number of points per bin
            binlims[0] = np.copy(TT0)
            binind = []
            bws2 = np.zeros(nbin)
            for k in range(1, nbin + 1):
                binlims[k] = binlims[k - 1] + tscale
            for k in range(nbin):
                bsig = np.nanstd(res[(tt > binlims[k]) & (tt < binlims[k + 1])])
                beme = np.nanmean(err[(tt > binlims[k]) & (tt < binlims[k + 1])])  # NOTE: either this or err2, i.e. the red-noise adapted errors
                bws2[k] = bsig / beme

            bws[ll] = np.nanmean(bws2)

        bw[j] = np.nanmean(bws)

        cf[j] = br[j] * bw[j]

        return bw[j], br[j], brt[j], cf[j]

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(compute_br, range(nphot)))

    for j, result in enumerate(results):
        bw[j], br[j], brt[j], cf[j] = result

    # now compute CF for chi2red==1
    cfn = np.array([])

    for j in range(nphot):
        res = rarr[indlist[j][0]]
        err = earr[indlist[j][0]]
        nfree = len(res) - njumpphot
        pjit = 0.0002
        pjit, dump = scipy.optimize.leastsq(redchisqmin, pjit, args=(nfree, res, err))
        pjit = np.abs(pjit)
        cfn = np.append(cfn, pjit)

    return bw, br, brt, cf, cfn

def redchisqmin(jit, nfree, res, err):
    redchisq = np.sum(res**2 / (err**2 + jit**2)) / nfree
    return 1 - redchisq

def jitter(rarr, earr, indlist, nphot, nRV, njumpRV):
    
    jitters = np.array([])
    
    for j in range(nRV):
        res=rarr[indlist[nphot+j][0]]
        err=earr[indlist[nphot+j][0]]
        nfree = len(res)-njumpRV[j]
        jit=1
        jit, dump = scipy.optimize.leastsq(redchisqmin, jit, args=(nfree, res, err))
        jit = np.abs(jit)
        jitters = np.append(jitters,jit)
            
    return jitters


def credregionML(posterior=None, percentile=0.6827, pdf=None, xpdf=None):
    """
    Compute a smoothed posterior density distribution and the minimum
    density for a given percentile of the highest posterior density.
    These outputs can be used to easily compute the HPD credible regions.

    Parameters
    ----------
    posterior: 1D float ndarray
        A posterior distribution.
    percentile: Float
        The percentile (actually the fraction) of the credible region.
        A value in the range: (0, 1).
    pdf: 1D float ndarray
        A smoothed-interpolated PDF of the posterior distribution.
    xpdf: 1D float ndarray
        The X location of the pdf values.
    Returns
    -------
    pdf: 1D float ndarray
        A smoothed-interpolated PDF of the posterior distribution.
    xpdf: 1D float ndarray
        The X location of the pdf values.
    HPDmin: Float
        The minimum density in the percentile-HPD region.

    Example
    -------
    >>> import numpy as np
    >>> npoints = 100000
    >>> posterior = np.random.normal(0, 1.0, npoints)
    >>> pdf, xpdf, HPDmin = credregion(posterior)
    >>> # 68% HPD credible-region boundaries (somewhere close to +/-1.0):
    >>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin]))
    >>> # Re-compute HPD for the 95% (withour recomputing the PDF):
    >>> pdf, xpdf, HPDmin = credregion(pdf=pdf, xpdf=xpdf, percentile=0.9545)
    >>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin]))
    """

    if pdf is None and xpdf is None:
        # Thin if posterior has too many samples (> 120k):
        thinning = np.amax([1, int(np.size(posterior)/120000)])
        # Compute the posterior's PDF:
        kernel = stats.gaussian_kde(posterior[::thinning])
        # Remove outliers:
        mean = np.mean(posterior)
        std  = np.std(posterior)
        k = 6
        lo = np.amax([mean-k*std, np.amin(posterior)])
        hi = np.amin([mean+k*std, np.amax(posterior)])
        # Use a Gaussian kernel density estimate to trace the PDF:
        x  = np.linspace(lo, hi, 100)
        # Interpolate-resample over finer grid (because kernel.evaluate
        #  is expensive):
        f    = si.interp1d(x, kernel.evaluate(x))
        xpdf = np.linspace(lo, hi, 3000)
        pdf  = f(xpdf)

    # Sort the PDF in descending order:
    ip = np.argsort(pdf)[::-1]
    # Sorted CDF:
    cdf = np.cumsum(pdf[ip])
    # Indices of the highest posterior density:
    iHPD = np.where(cdf >= percentile*cdf[-1])[0][0]
    mHPD = np.argmax(pdf)
    # Minimum density in the HPD region:
    HPDmin = np.amin(pdf[ip][0:iHPD])
    return pdf, xpdf, HPDmin, mHPD


def grweights(earr,indlist,grnames,groups,ngroup,nphot):
    # calculate the total of the error in each groups
    
    #ewarr = np.zeros(len(indlist[nphot-1][0]))
    #for j in range(len(ewarr)):
    #    ewarr[j]=earr[j]
        
    ewarr=np.copy(earr)  # error weight array
    
    for j in range(ngroup):
        jj=j+1
        # select LCs belonging to this group
        ind = np.where(np.array(groups) == jj)[0]
        nlc=len(ind)            # number of lcs in this group
        nplc=len(indlist[ind[0]][0])      # number of lc points
        errsum=np.zeros(nplc)   # this will contain the error sum for the group
        # sum the errors up at each time step
        for k in range(nlc):
            if (len(indlist[ind[k]][0]) != nplc):
                print('group LCs dont have the same number of points')
            errsum=errsum+np.divide(1,np.power(earr[indlist[ind[k]][0]],2))

    # calculate the weights for each lightcurve

        for k in range(nlc):
            ewarr[indlist[ind[k]][0]]=np.power(ewarr[indlist[ind[k]][0]],2)*errsum
     
 #   print np.divide(1.,ewarr[indlist[0]]), np.divide(1.,ewarr[indlist[1]]), np.divide(1.,ewarr[indlist[2]])
 #   print nothing
    return(ewarr)

def grtest_emcee(chains_out):
    # note: chains_out has the dimensions (nchains,nsteps,ndim)
    nc,ns,nd = np.shape(chains_out)
    mcs = np.mean(chains_out, axis=1)      # mcs should be an (nchains, ndim) array
    mc = np.mean(mcs, axis=0)              # mc should be an (ndim) array
    BV = ns/(nc-1.) * np.sum((mcs-mc)**2, axis=0)  # BV should be an (ndim) array
    mvs = np.var(chains_out, axis=1)       # mvs should be an (nchains, ndim) array
    WV = np.mean(mvs, axis=0)              # WV should be an (ndim) array
    VV = (ns-1.)/ns * WV + (nc + 1.)/(ns*nc) * BV # VV should be an (ndim) array
    GR = VV/WV                             # GR should be an (ndim) array
    
    return GR