import numpy as np
from scipy.stats import binned_statistic
import scipy
from scipy import stats
import scipy.interpolate as si
import concurrent.futures
#TODO corfac can take very long. results sometimes are difficult to understand. It should be revised.
[docs]
prior_distributions = dict( N = stats.norm,
U = stats.uniform,
LU = stats.loguniform,
LN = stats.lognorm,
T = stats.truncnorm,
)
def corfac(rarr, tarr, earr, indlist, nphot, njumpphot):
"""
Compute the red noise factor, white noise factor, combined factor, and
the white noise factor for chi2red==1.
Parameters
----------
rarr: 1D float ndarray
The residuals.
tarr: 1D float ndarray
The time array.
earr: 1D float ndarray
The error array.
indlist: 2D int ndarray
The indices of the residuals.
nphot: Int
The number of photometric light curves.
njumpphot: Int
The number of jump parameters in the photometric light curves.
Returns
-------
bw: 1D float ndarray
The white noise factor.
br: 1D float ndarray
The red noise factor.
brt: 1D float ndarray
The red noise timescale.
cf: 1D float ndarray
The combined factor.
"""
bw = np.ones(nphot)
br = np.ones(nphot)
cf = np.ones(nphot)
brt = np.zeros(nphot)
tscales = np.arange(16) + 5 # time bin sizes between 5 and 20 minutes
def compute_br(j):
res = np.copy(rarr[indlist[j][0]])
tt = np.copy(tarr[indlist[j][0]])
err = np.copy(earr[indlist[j][0]])
# calculate the red noise factor
brs = np.zeros(len(tscales))
for l in range(len(tscales)):
tscale = tscales[l] / 60. / 24. # timescales
min1 = 1. / 60. / 24. # 1 minute in days
# calculate the shifting bins: shift by 1 minute each
nshifts = np.copy(tscales[l]) # number of minutes of bin == number of shifts
brs2 = np.zeros(nshifts) # the br amplitudes for all shifts
for ll in range(nshifts):
# calculate the bin edges
TT0 = tt[0] + ll * min1 # starting point is start + ll * 1 minute
nbin = int((np.max(tt) - TT0) / tscale)
binlims = np.zeros(nbin + 1)
bincens = np.zeros(nbin)
binnps = np.zeros(nbin) # number of points per bin
binlims[0] = np.copy(TT0)
binind = []
for k in range(1, nbin + 1):
binlims[k] = binlims[k - 1] + tscale
for k in range(nbin):
bincens[k] = binlims[k] + 0.5 * tscale
binnps[k] = len(tt[(tt > binlims[k]) & (tt < binlims[k + 1])])
mbin = np.nanmean(binnps) # mean number of points per bin
resbin, dump, dump2 = binned_statistic(tt, res, statistic='mean', bins=binlims)
sigNred = np.nanstd(res) / np.sqrt(mbin) * np.sqrt(nbin / (nbin - 1))
sigNreal = np.nanstd(resbin)
brs2[ll] = sigNreal / sigNred
brs[l] = np.nanmedian(brs2)
br[j] = np.max(brs)
if br[j] < 1.:
br[j] = 1.
brt[j] = tscales[brs.argmax()]
# calculate the white noise factor
# - apply b_red to the data errors
# - calculate the b_white values for the adapted errors
err2 = err * br[j]
tscale = 15. / 60. / 24. # timescale is 20 minutes
min1 = 1. / 60. / 24. # 1 minute in days
# calculate the shifting bins: shift by 1 minute each
nshifts = np.copy(tscales[l]) # number of minutes of bin == number of shifts
bws = np.zeros(nshifts) # the bw amplitudes for all shifts
for ll in range(nshifts):
# calculate the bin edges
TT0 = tt[0] + ll * min1 # starting point is start + ll * 1 minute
nbin = int((np.max(tt) - TT0) / tscale)
binlims = np.zeros(nbin + 1)
bincens = np.zeros(nbin)
binnps = np.zeros(nbin) # number of points per bin
binlims[0] = np.copy(TT0)
binind = []
bws2 = np.zeros(nbin)
for k in range(1, nbin + 1):
binlims[k] = binlims[k - 1] + tscale
for k in range(nbin):
bsig = np.nanstd(res[(tt > binlims[k]) & (tt < binlims[k + 1])])
beme = np.nanmean(err[(tt > binlims[k]) & (tt < binlims[k + 1])]) # NOTE: either this or err2, i.e. the red-noise adapted errors
bws2[k] = bsig / beme
bws[ll] = np.nanmean(bws2)
bw[j] = np.nanmean(bws)
cf[j] = br[j] * bw[j]
return bw[j], br[j], brt[j], cf[j]
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(compute_br, range(nphot)))
for j, result in enumerate(results):
bw[j], br[j], brt[j], cf[j] = result
# now compute CF for chi2red==1
cfn = np.array([])
for j in range(nphot):
res = rarr[indlist[j][0]]
err = earr[indlist[j][0]]
nfree = len(res) - njumpphot
pjit = 0.0002
pjit, dump = scipy.optimize.leastsq(redchisqmin, pjit, args=(nfree, res, err))
pjit = np.abs(pjit)
cfn = np.append(cfn, pjit)
return bw, br, brt, cf, cfn
def redchisqmin(jit, nfree, res, err):
redchisq = np.sum(res**2 / (err**2 + jit**2)) / nfree
return 1 - redchisq
def jitter(rarr, earr, indlist, nphot, nRV, njumpRV):
jitters = np.array([])
for j in range(nRV):
res=rarr[indlist[nphot+j][0]]
err=earr[indlist[nphot+j][0]]
nfree = len(res)-njumpRV[j]
jit=1
jit, dump = scipy.optimize.leastsq(redchisqmin, jit, args=(nfree, res, err))
jit = np.abs(jit)
jitters = np.append(jitters,jit)
return jitters
def credregionML(posterior=None, percentile=0.6827, pdf=None, xpdf=None):
"""
Compute a smoothed posterior density distribution and the minimum
density for a given percentile of the highest posterior density.
These outputs can be used to easily compute the HPD credible regions.
Parameters
----------
posterior: 1D float ndarray
A posterior distribution.
percentile: Float
The percentile (actually the fraction) of the credible region.
A value in the range: (0, 1).
pdf: 1D float ndarray
A smoothed-interpolated PDF of the posterior distribution.
xpdf: 1D float ndarray
The X location of the pdf values.
Returns
-------
pdf: 1D float ndarray
A smoothed-interpolated PDF of the posterior distribution.
xpdf: 1D float ndarray
The X location of the pdf values.
HPDmin: Float
The minimum density in the percentile-HPD region.
Example
-------
>>> import numpy as np
>>> npoints = 100000
>>> posterior = np.random.normal(0, 1.0, npoints)
>>> pdf, xpdf, HPDmin = credregion(posterior)
>>> # 68% HPD credible-region boundaries (somewhere close to +/-1.0):
>>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin]))
>>> # Re-compute HPD for the 95% (withour recomputing the PDF):
>>> pdf, xpdf, HPDmin = credregion(pdf=pdf, xpdf=xpdf, percentile=0.9545)
>>> print(np.amin(xpdf[pdf>HPDmin]), np.amax(xpdf[pdf>HPDmin]))
"""
if pdf is None and xpdf is None:
# Thin if posterior has too many samples (> 120k):
thinning = np.amax([1, int(np.size(posterior)/120000)])
# Compute the posterior's PDF:
kernel = stats.gaussian_kde(posterior[::thinning])
# Remove outliers:
mean = np.mean(posterior)
std = np.std(posterior)
k = 6
lo = np.amax([mean-k*std, np.amin(posterior)])
hi = np.amin([mean+k*std, np.amax(posterior)])
# Use a Gaussian kernel density estimate to trace the PDF:
x = np.linspace(lo, hi, 100)
# Interpolate-resample over finer grid (because kernel.evaluate
# is expensive):
f = si.interp1d(x, kernel.evaluate(x))
xpdf = np.linspace(lo, hi, 3000)
pdf = f(xpdf)
# Sort the PDF in descending order:
ip = np.argsort(pdf)[::-1]
# Sorted CDF:
cdf = np.cumsum(pdf[ip])
# Indices of the highest posterior density:
iHPD = np.where(cdf >= percentile*cdf[-1])[0][0]
mHPD = np.argmax(pdf)
# Minimum density in the HPD region:
HPDmin = np.amin(pdf[ip][0:iHPD])
return pdf, xpdf, HPDmin, mHPD
def grweights(earr,indlist,grnames,groups,ngroup,nphot):
# calculate the total of the error in each groups
#ewarr = np.zeros(len(indlist[nphot-1][0]))
#for j in range(len(ewarr)):
# ewarr[j]=earr[j]
ewarr=np.copy(earr) # error weight array
for j in range(ngroup):
jj=j+1
# select LCs belonging to this group
ind = np.where(np.array(groups) == jj)[0]
nlc=len(ind) # number of lcs in this group
nplc=len(indlist[ind[0]][0]) # number of lc points
errsum=np.zeros(nplc) # this will contain the error sum for the group
# sum the errors up at each time step
for k in range(nlc):
if (len(indlist[ind[k]][0]) != nplc):
print('group LCs dont have the same number of points')
errsum=errsum+np.divide(1,np.power(earr[indlist[ind[k]][0]],2))
# calculate the weights for each lightcurve
for k in range(nlc):
ewarr[indlist[ind[k]][0]]=np.power(ewarr[indlist[ind[k]][0]],2)*errsum
# print np.divide(1.,ewarr[indlist[0]]), np.divide(1.,ewarr[indlist[1]]), np.divide(1.,ewarr[indlist[2]])
# print nothing
return(ewarr)
def grtest_emcee(chains_out):
# note: chains_out has the dimensions (nchains,nsteps,ndim)
nc,ns,nd = np.shape(chains_out)
mcs = np.mean(chains_out, axis=1) # mcs should be an (nchains, ndim) array
mc = np.mean(mcs, axis=0) # mc should be an (ndim) array
BV = ns/(nc-1.) * np.sum((mcs-mc)**2, axis=0) # BV should be an (ndim) array
mvs = np.var(chains_out, axis=1) # mvs should be an (nchains, ndim) array
WV = np.mean(mvs, axis=0) # WV should be an (ndim) array
VV = (ns-1.)/ns * WV + (nc + 1.)/(ns*nc) * BV # VV should be an (ndim) array
GR = VV/WV # GR should be an (ndim) array
return GR