"""
This defines a class that can be used to estimate the PDF of physical
quantities from a set of input photometry in various bands, together
with a training data set.
"""
import numpy as np
import scipy.interpolate as interp
import os
import os.path as osp
import ctypes
from ctypes import POINTER
from ctypes import c_void_p
from ctypes import c_int
from ctypes import c_uint
from ctypes import c_ulong
from ctypes import c_double
from ctypes import c_bool
import numpy.ctypeslib as npct
import random
from copy import deepcopy
from warnings import warn
try:
import emcee
mc_avail = True
except:
mc_avail = False
pass
##################################################################
# Define some types for use later #
##################################################################
array_1d_double = npct.ndpointer(dtype=c_double, ndim=1,
flags="CONTIGUOUS")
array_1d_ulong = npct.ndpointer(dtype=c_ulong, ndim=1,
flags="CONTIGUOUS")
array_1d_int = npct.ndpointer(dtype=c_int, ndim=1,
flags="CONTIGUOUS")
##################################################################
# Define the cluster_slug class #
##################################################################
[docs]class bp(object):
"""
A class that can be used to estimate the PDF of the physical
properties of stellar population from a training set plus a set of
measured photometric values.
Properties
priors : array, shape (N) | callable | None
prior probability on each data point; interpretation
depends on the type passed; array, shape (N): values are
interpreted as the prior probability of each data point;
callable: the callable must take as an argument an array
of shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point; None:
no reweighting is performed, so all data points in the library
have equal prior probability, i.e. the prior is the same as
the sampling of points in the library
pobs : array, shape (N) | callable | None
the probability that a particular object would be observed,
which is used, like prior, to weight the library;
interpretation depends on type. None means all objects are
equally likely to be observed, array is an array giving the
observation probability of each object in the library, and
callable means must be a function that takes an array
containing the photometry, of shape (N, nhpot), as an
argument, and returns an array of shape (N) giving the
probability of observation for that object
bandwidth : 'auto' | float | array, shape (M)
bandwidth for kernel density estimation; if set to
'auto', the bandwidth will be estimated automatically; if
set to a scalar quantity, the same bandwidth is used for all
dimensions
nphys : int
number of physical properties in the library
nphot : int
number of photometric properties in the library
ndim : int
nphys + nphot
"""
##################################################################
# Initializer method
##################################################################
[docs] def __init__(self, dataset, nphys, filters=None, bandwidth='auto',
ktype='gaussian', priors=None, pobs=None,
sample_density=None, reltol=1.0e-2, abstol=1.0e-10,
leafsize=16, nosort=None, thread_safe=True,
caching='none'):
"""
Initialize a bp object.
Parameters
dataset : array, shape (N, M)
training data set; this is a set of N sample stellar
populations, having M properties each; the first nphys
represent physical properties (e.g., log mass, log age),
while the next M - nphys represent photometric
properties
nphys : int
number of physical properties in dataset
filters : listlike of strings
names of photometric filters; not used, but can be
stored for convenience
bandwidth : 'auto' | float | array, shape (M)
bandwidth for kernel density estimation; if set to
'auto', the bandwidth will be estimated automatically; if
set to a scalar quantity, the same bandwidth is used for all
dimensions
ktype : string
type of kernel to be used in densty estimation; allowed
values are 'gaussian' (default), 'epanechnikov', and
'tophat'; only Gaussian can be used with error bars
priors : array, shape (N) | callable | None
prior probability on each data point; interpretation
depends on the type passed; array, shape (N): values are
interpreted as the prior probability of each data point;
callable: the callable must take as an argument an array
of shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point; None:
all data points have equal prior probability
pobs : array, shape (N) | callable | None
the probability that a particular object would be observed,
which is used, like prior, to weight the library;
interpretation depends on type. None means all objects are
equally likely to be observed, array is an array giving the
observation probability of each object in the library, and
callable means must be a function that takes an array
containing the photometry, of shape (N, nhpot), as an
argument, and returns an array of shape (N) giving the
probability of observation for that object
sample_density : array, shape (N) | callable | 'auto' | None
the density of the data samples at each data point; this
need not match the prior density; interpretation depends
on the type passed; array, shape (N): values are
interpreted as the density of data sampling at each
sample point; callable: the callable must take as an
argument an array of shape (N, nphys), and return an
array of shape (N) giving the sampling density at each
point; 'auto': the sample density will be computed
directly from the data set; note that this can be quite
slow for large data sets, so it is preferable to specify
this analytically if it is known; None: data are assumed
to be uniformly sampled
reltol : float
relative error tolerance; errors on all returned
probabilities p will satisfy either
abs(p_est - p_true) <= reltol * p_est OR
abs(p_est - p_true) <= abstol,
where p_est is the returned estimate and p_true is the
true value
abstol : float
absolute error tolerance; see above
leafsize : int
number of data points in each leaf of the KD tree
nosort : arraylike of bool, shape (N) | None
if specified, this keyword causes the KD tree not to be
sorted along the dimensions for which nosort is True
thread_safe : bool
if True, bayesphot will make extra copies of internals
as needed to ensure thread safety when the computation
routines (logL, mpdf, mcmc, bestmatch, make_approx_phot,
make_approx_phys, mpdf_approx) are used with
multiprocessing; this incurs a minor performance
penalty, and can be disabled by setting to False if the
code will not be run with the multiprocessing module
caching : 'aggressive' | 'lazy' | 'none'
strategy for caching subsets of the data with some
dimensions marginalised out; behavior is as follows:
'agressive'
on construction, store sorted data for fast
calculation of 1D PDFs of variables by themselves,
and 1D PDFs of all physical variables marginalised
over all other physical variables; this
significantly increases the memory footprint and
construction time, but greatly speeds up
subsequent evaluation of any of these quantities,
and is generally the best choice for prudction
work in parallel
'lazy'
sorted data sets for fast computation are created
and cached as needed; this will make the first
computation of any marginal PDF slower, but speed
up all subsequent ones, without imposing any
extra time at initial construction; this mode is
generally best for interactive work, but is not
thread_safe = True; memory cost depends on how
many different marginal PDF combinations are
calculated, but is always less than aggressive
'none'
no caching is performed automatically; the user
may still manually cache data by calling
the make_cache method
Returns
Nothing
Raises
IOError, if the bayesphot c library cannot be found
Notes
Because the data sets passed in may be large, this class
does not make copies of any of its arguments, and instead
modifies them in place. However, this means it is the
responsibility of the user not to alter the any of the
arguments once they are passed to this class; for example,
dataset must not be modified after it is passed. Altering the
arguments, except through this class's methods, may cause
incorrect results to be generated.
Caching with 'aggressive' or 'lazy' does create significant
memory overhead.
"""
# Load the c library
self.__clib = npct.load_library("bayesphot",
osp.realpath(__file__))
# Check for diagnostic mode
self.__clib.diagnostic_mode.restype = c_bool
self.__clib.diagnostic_mode.argtypes = None
self.__diag_mode = bool(self.__clib.diagnostic_mode())
# Define interfaces to all the c library functions
self.__clib.build_kd.restype = c_void_p
self.__clib.build_kd.argtypes \
= [ array_1d_double, # x
c_ulong, # ndim
c_ulong, # npt
ctypes.
POINTER(c_double), # wgt
c_ulong, # leafsize
array_1d_double, # bandwidth
c_int, # ktype
c_ulong, # minsplit
POINTER(c_ulong) ] # sortmap
self.__clib.build_kd_sortdims.restype = c_void_p
self.__clib.build_kd_sortdims.argtypes \
= [ array_1d_double, # x
c_ulong, # ndim
c_ulong, # npt
ctypes.
POINTER(c_double), # wgt
c_ulong, # leafsize
array_1d_double, # bandwidth
c_int, # ktype
array_1d_int, # nosort
POINTER(c_ulong) ] # sortmap
self.__clib.copy_kd.restype = c_void_p
self.__clib.copy_kd.argtypes = [ c_void_p ] # kd
self.__clib.free_kd.restype = None
self.__clib.free_kd.argtypes = [ c_void_p ]
self.__clib.free_kd_copy.restype = None
self.__clib.free_kd_copy.argtypes = [ c_void_p ]
self.__clib.kd_change_wgt.restype = None
self.__clib.kd_change_wgt.argtypes \
= [ POINTER(c_double), # wgt
c_void_p ] # kd
self.__clib.kd_change_bandwidth.restype = None
self.__clib.kd_change_bandwidth.argtypes \
= [ array_1d_double, # bandwidth
c_void_p ] # kd
self.__clib.kd_neighbors.restype = None
self.__clib.kd_neighbors.argtypes \
= [ c_void_p, # kd
array_1d_double, # xpt
POINTER(c_ulong), # dims
c_ulong, # ndim
c_ulong, # nneighbor
c_bool, # bandwidth_units
array_1d_double, # pos
POINTER(c_double), # dptr
array_1d_double ] # d2
self.__clib.kd_neighbors_vec.restype = None
self.__clib.kd_neighbors_vec.argtypes \
= [ c_void_p, # kd
array_1d_double, # xpt
POINTER(c_ulong), # dims
c_ulong, # ndim
c_ulong, # npt
c_ulong, # nneighbor
c_bool, # bandwidth_units
array_1d_double, # pos
POINTER(c_double), # dptr
array_1d_double ] # d2
self.__clib.kd_neighbors_point.restype = None
self.__clib.kd_neighbors_point.argtypes \
= [ c_void_p, # kd
c_ulong, # idxpt
c_ulong, # nneighbor
c_bool, # bandwidth_units
array_1d_ulong, # idx
array_1d_double ] # d2
self.__clib.kd_neighbors_point_vec.restype = None
self.__clib.kd_neighbors_point_vec.argtypes \
= [ c_void_p, # kd
array_1d_ulong, # idxpt
c_ulong, # npt
c_ulong, # nneighbor
c_bool, # bandwidth_units
array_1d_ulong, # idx
array_1d_double ] # d2
self.__clib.kd_neighbors_all.restype = None
self.__clib.kd_neighbors_all.argtypes \
= [ c_void_p, # kd
c_ulong, # nneighbor
c_bool, # bandwidth_units
array_1d_ulong, # idx
array_1d_double ] # d2
self.__clib.kd_pdf.restype = c_double
if self.__diag_mode:
self.__clib.kd_pdf.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
c_double, # reltol
c_double, # abstol
POINTER(c_ulong), # nodecheck
POINTER(c_ulong), # leafcheck
POINTER(c_ulong) ] # termcheck
else:
self.__clib.kd_pdf.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
c_double, # reltol
c_double ] # abstol
self.__clib.kd_pdf_grid.restype = None
self.__clib.kd_pdf_grid.argtypes \
= [ c_void_p, # kd
array_1d_double, # xfixed
array_1d_ulong, # dimfixed
c_ulong, # ndimfixed
c_ulong, # nfixed
array_1d_double, # xgrid
array_1d_ulong, # dimgrid
c_ulong, # ndimgrid
c_ulong, # ngrid
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_pdf_reggrid.restype = None
self.__clib.kd_pdf_reggrid.argtypes \
= [ c_void_p, # kd
array_1d_double, # xfixed
array_1d_ulong, # dimfixed
c_ulong, # ndimfixed
c_ulong, # nfixed
array_1d_double, # xgridlo
array_1d_double, # xgridhi
array_1d_ulong, # ngrid
array_1d_ulong, # dimgrid
c_ulong, # ndimgrid
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_pdf_vec.restype = None
if self.__diag_mode:
self.__clib.kd_pdf_vec.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
POINTER(c_double), # bandwidth
c_ulong, # npt
c_double, # reltol
c_double, # abstol
array_1d_double, # pdf
array_1d_ulong, # nodecheck
array_1d_ulong, # leafcheck
array_1d_ulong ] # termcheck
else:
self.__clib.kd_pdf_vec.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
POINTER(c_double), # bandwidth
c_ulong, # npt
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_pdf_int.restype = c_double
if self.__diag_mode:
self.__clib.kd_pdf_int.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
array_1d_ulong, # dims
c_ulong, # ndim
c_double, # reltol
c_double, # abstol
array_1d_ulong, # nodecheck
array_1d_ulong, # leafcheck
array_1d_ulong ] # termcheck
else:
self.__clib.kd_pdf_int.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
array_1d_ulong, # dims
c_ulong, # ndim
c_double, # reltol
c_double ] # abstol
self.__clib.kd_pdf_int_vec.restype = None
if self.__diag_mode:
self.__clib.kd_pdf_int_vec.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
POINTER(c_double), # bandwidth
array_1d_ulong, # dims
c_ulong, # ndim
c_ulong, # npt
c_double, # reltol
c_double, # abstol
array_1d_double, # pdf
array_1d_ulong, # nodecheck
array_1d_ulong, # leafcheck
array_1d_ulong ] # termcheck
else:
self.__clib.kd_pdf_int_vec.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
POINTER(c_double), # bandwidth
array_1d_ulong, # dims
c_ulong, # ndim
c_ulong, # npt
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_pdf_int_grid.restype = None
self.__clib.kd_pdf_int_grid.argtypes \
= [ c_void_p, # kd
array_1d_double, # xfixed
array_1d_ulong, # dimfixed
c_ulong, # ndimfixed
c_ulong, # nfixed
array_1d_double, # xgrid
array_1d_ulong, # dimgrid
c_ulong, # ndimgrid
c_ulong, # ngrid
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_pdf_int_reggrid.restype = None
self.__clib.kd_pdf_int_reggrid.argtypes \
= [ c_void_p, # kd
array_1d_double, # xfixed
array_1d_ulong, # dimfixed
c_ulong, # ndimfixed
c_ulong, # nfixed
array_1d_double, # xgridlo
array_1d_double, # xgridhi
array_1d_ulong, # ngrid
array_1d_ulong, # dimgrid
c_ulong, # ndimgrid
c_double, # reltol
c_double, # abstol
array_1d_double ] # pdf
self.__clib.kd_rep.restype = c_ulong
self.__clib.kd_rep.argtypes \
= [ c_void_p, # kd
array_1d_double, # x
array_1d_ulong, # dims
c_ulong, # ndim
c_double, # reltol
POINTER(c_ulong), # dim_return
c_ulong, # ndim_return
POINTER(POINTER(
c_double)), # xpt
POINTER(POINTER(
c_double)) ] # wgts
self.__clib.free_kd_rep.restype = None
self.__clib.free_kd_rep.argtypes \
= [ POINTER(POINTER(
c_double)), # xpt
POINTER(POINTER(
c_double)) ] # wgts
self.__clib.squeeze_rep.restype = c_ulong
self.__clib.squeeze_rep.argtypes \
= [ c_ulong, # npts
c_ulong, # ndim
array_1d_double, # h
c_double, # tol
POINTER(POINTER(
c_double)), # xpt
POINTER(POINTER(
c_double)) ] # wgts
self.__clib.kd_pdf_draw.restype = None
self.__clib.kd_pdf_draw.argtypes \
= [ c_void_p, # kd
POINTER(c_double), # x
POINTER(c_ulong), # dims
c_ulong, # ndim
c_ulong, # nsample
c_int, # draw_method
c_void_p, # rng
array_1d_double ] # out
self.__clib.rng_init.restype = c_void_p
self.__clib.rng_init.argtypes \
= [ c_ulong ] # seed
self.__clib.rng_free.restype = None
self.__clib.rng_free.argtypes \
= [ c_void_p ] # r
# Record some of the input parameters
if (ktype == 'gaussian'):
self.__ktype = 2
elif (ktype == 'epanechnikov'):
self.__ktype = 0
elif (ktype == 'tophat'):
self.__ktype = 1
self.leafsize = leafsize
self.__abstol = abstol
self.__reltol = reltol
self.thread_safe = thread_safe
# Store data set
self.__dataset = np.ascontiguousarray(dataset)
# Store list of available filters
self.__filters = deepcopy(filters)
# Initialize internal data
self.__ndata = self.__dataset.shape[0]
self.__nphys = nphys
self.__nphot = self.__dataset.shape[1] - self.__nphys
self.__auto_bw = None
self.__auto_bw_set = False
self.__priors = None
self.__pobs = None
self.__prior_data = None
self.__pobs_data = None
self.__kd_phys = None
self.__cache = []
# Build the initial kernel density estimation object, using a
# dummy bandwidth; record mapping from indices of input data
# to sorted indices, in case we need it to interpret arrays of
# sample density, observation probability, or prior
self.__bandwidth = np.ones(self.__nphys + self.__nphot)
self.__idxmap = np.zeros(self.__ndata, dtype=c_ulong)
if nosort is None:
self.__kd = self.__clib.build_kd(
np.ravel(self.__dataset), self.__dataset.shape[1],
self.__ndata, None, self.leafsize, self.__bandwidth,
self.__ktype, 0,
self.__idxmap.ctypes.data_as(POINTER(c_ulong)))
else:
nosort_c = np.zeros(nosort.shape, dtype=np.intc)
nosort_c[:] = nosort == True
self.__kd = self.__clib.build_kd_sortdims(
np.ravel(self.__dataset), self.__dataset.shape[1],
self.__ndata, None, self.leafsize, self.__bandwidth,
self.__ktype, nosort_c,
self.__idxmap.ctypes.data_as(POINTER(c_ulong)))
self.__idxmap_inv = np.argsort(self.__idxmap)
# Store sample density
if hasattr(sample_density, '__iter__'):
self.__sden = np.array(sample_density)[self.__idxmap]
else:
self.__sden = sample_density
self.__sample_density = None
dummy = self.sample_density
# Initialize the bandwidth
self.bandwidth = bandwidth
# Set priors and observation probability
self.priors = priors
self.pobs = pobs
# Set the caching policy, and, if aggressive, construct the
# data caches
self.caching = caching
if caching == 'lazy' and thread_safe:
warn(
"bp: cannot use lazy caching in thread safe"
" mode; caching policy changed to 'none'")
self.caching == 'none'
if self.caching == 'aggressive':
# Construct cached data sets for calculation of PDFs over
# 1 dimension with all others marginalised out
for d in range(self.ndim):
margindims = list(range(0,d)) + \
list(range(d+1, self.ndim))
self.make_cache(margindims)
# Construct cached data sets for calculation of 1d
# marginal PDFs of physical variables over photometric
# ones
if self.__nphys > 1:
for d in range(self.__nphys):
margindims = list(range(0,d)) + \
list(range(d+1, self.__nphys))
self.make_cache(margindims)
##################################################################
# De-allocation method
##################################################################
def __del__(self):
if hasattr(self, '__kd'):
if self.__kd is not None:
self.__clib.free_kd(self.__kd)
if hasattr(self, '__kd_phys'):
if self.__kd_phys is not None:
self.__clib.free_kd(self.__kd_phys)
if hasattr(self, '__rng'):
self.__clib.rng_free(self.__rng)
##################################################################
# Return a copy of the list of available filters
##################################################################
def filters(self):
return deepcopy(self.__filters)
##################################################################
# Property to get the sampling density
##################################################################
@property
def sample_density(self):
"""
The density with which the library was sampled, evaluated for
each simulation in the library
"""
if self.__sample_density is None:
# Choose computation method
if hasattr(self.__sden, '__call__'):
# Callable, so pass the physical data to the
# callable and store the result
self.__sample_density \
= self.__sden(self.__dataset[:,:self.__nphys])
elif type(self.__sden) is np.ndarray or self.__sden is None:
# Array, so treat treat this as the data
self.__sample_density = self.__sden
elif self.__sden == 'auto':
# We've been asked to calculate the sample density
# ourselves, so do so
# Create unweighted kernel density object for just
# the physical parameters if we have not done so
# already
if self.__kd_phys is None:
self.__dataset_phys \
= np.copy(self.__dataset[:,:self.__nphys])
self.__kd_phys \
= self.__clib.build_kd(
np.ravel(self.__dataset_phys),
self.__nphys, self.__ndata,
None, self.leafsize, self.__bandwidth,
self.__ktype, 0)
# Use the unweighted kernel density object to
# evaluate the raw sample density near each data
# point, or near a sub-sample which we can
# interpolate from
self.__sample_density = np.zeros(self.__ndata)
nsamp = 500
if self.__ndata < nsamp:
# Few data points, just use them all; note
# that we cannot pass self.__dataset_phys,
# because it is not in the same order as
# the full data set anymore
pts = np.ravel(self.__dataset[:,:self.__nphys])
if not self.__diag_mode:
self.__clib.kd_pdf_vec(
self.__kd_phys, pts, None,
self.__ndata, self.reltol, self.abstol,
self.__sample_density)
else:
nodecheck = np.zeros(self.__ndata, dtype=c_ulong)
leafcheck = np.zeros(self.__ndata, dtype=c_ulong)
termcheck = np.zeros(self.__ndata, dtype=c_ulong)
self.__clib.kd_pdf_vec(
self.__kd_phys, pts, None,
self.__ndata, self.reltol, self.abstol,
self.__sample_density_sorted, nodecheck,
leafcheck, termcheck)
else:
# Many data points, so choose a sample at random
idxpt = np.array(
random.sample(np.arange(self.__ndata),
nsamp), dtype=c_ulong)
pos = np.copy(self.__dataset_phys[idxpt,:])
# Add points at the edges of the dataset
# to ensure we enclose all the points
lowlim = np.amin(self.__dataset_phys,
axis=0)
pos = np.append(pos, lowlim)
hilim = np.amax(self.__dataset_phys,
axis=0)
pos = np.append(pos, hilim)
# Compute density at selected points
sample_density = np.zeros(nsamp+2)
if not self.__diag_mode:
self.__clib.kd_pdf_vec(
self.__kd_phys, np.ravel(pos), None,
nsamp+2, self.reltol, self.abstol,
sample_density)
else:
nodecheck = np.zeros(nsamp+2, dtype=c_ulong)
leafcheck = np.zeros(nsamp+2, dtype=c_ulong)
termcheck = np.zeros(nsamp+2, dtype=c_ulong)
self.__clib.kd_pdf_vec(
self.__kd_phys, np.ravel(pos), None,
nsamp+2, self.reltol, self.abstol,
sample_density, nodecheck,
leafcheck, termcheck)
# Now interpolate the sample points to all
# points in the data set
pts = np.ravel(self.__dataset[:,:self.__nphys])
self.__sample_density \
= np.exp(
interp.griddata(pos,
np.log(sample_density),
pts,
method='linear')).flatten()
# Return result, sorted back into the order of the original
# data
if self.__sample_density is not None:
return self.__sample_density[self.__idxmap_inv]
else:
return None
@sample_density.setter
def sample_density(self, sden):
"""
This function sets the sampling density for the library
Parameters
sden : array, shape (N) | callable | None
sample density each data point; interpretation depends
on the type passed:
array, shape (N) :
values are interpreted as the sampling density at
each data point in the dataset used to construct
the bp object
callable :
the callable must take as an argument an array of
shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point
None :
all data points have equal prior probability
Returns
Nothing
"""
# Erase current sample density information, then force
# recomputation
self.__sample_density = None
if hasattr(sden, '__iter__'):
# If passed an array, assume it is ordered as was the
# original data set, not our re-ordered version, so
# re-order before storing
self.__sden = np.array(sden)[self.__idxmap]
else:
self.__sden = sden
dummy = self.sample_density
# Update any cached objects as well
for c in self.__cache:
c['bp'].sample_density = dummy
##################################################################
# Define the priors and pobs properties
##################################################################
@property
def priors(self):
"""
The current set of prior probabilities for every
simulation in the library; data returned are in the same order
as the data originally used to construct the library
"""
if self.__priors is None:
return None
else:
return self.__prior_data[self.__idxmap_inv]
@priors.setter
def priors(self, prior):
"""
This function sets the prior probabilities to use
Parameters
prior : array, shape (N) | callable | None
prior probability on each data point; interpretation
depends on the type passed:
array, shape (N) :
values are interpreted as the prior probability of
each data point in the dataset used to construct
the bp object
callable :
the callable must take as an argument an array of
shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point
None :
all data points have equal prior probability
Returns
Nothing
"""
# If prior is an arraylike, sort it using our index map
if hasattr(prior, '__iter__'):
pr = np.array(prior)[self.__idxmap]
else:
pr = prior
# If the prior is unchanged, do nothing
if (type(pr) == np.ndarray) and \
(type(self.__priors) == np.ndarray):
if np.array_equal(pr, self.__priors):
return
elif (type(pr) == type(self.__priors)) and \
(pr == self.__priors):
return
# If priors and pobs are both None, just remove all weighting
if pr is None and self.pobs is None:
self.__clib.kd_change_wgt(None, self.__kd)
self.__priors = None
self.__prior_data = None
for c in self.__cache:
c['bp'].priors = None
return
else:
# If we're here, we have a non-trival prior or pobs;
# record the new prior, and, if it is a callable, call it
self.__priors = pr
if hasattr(self.__priors, '__call__'):
self.__prior_data \
= self.__priors(self.__dataset[:,:self.__nphys]) \
.flatten()
else:
self.__prior_data = self.__priors
# Compute the weights from the ratio of the prior times
# the observation probability to the sample density, then
# adjust the weights in the kd; note that a bit of
# trickery is required, because the sample_density property
# returns the sample density sorted in the original data
# order, not in the sorted order, which is what we need
dummy = self.sample_density # Force re-computation
wgt = np.ones(self.__ndata)
if self.__sample_density is not None:
wgt *= 1.0 / self.__sample_density
if self.__prior_data is not None:
wgt *= self.__prior_data
if self.__pobs_data is not None:
wgt *= self.__pobs_data
self.__wgt = wgt
self.__clib.kd_change_wgt(self.__wgt.ctypes.data_as(
POINTER(c_double)), self.__kd)
# Updated cached bp objects
for c in self.__cache:
c['bp'].priors = self.__prior_data
@property
def pobs(self):
"""
The current set of observation probabilities for every
simulation in the library; data returned are in the same order
as the data originally used to construct the library
"""
if self.__pobs_data is None:
return None
else:
return self.__pobs_data[self.__idxmap_inv]
@pobs.setter
def pobs(self, p_obs):
"""
This function sets the observation probabilities to use
Parameters
p_obs : array, shape (N) | callable | None
probability of observation for each data point;
interpretation depends on the type passed:
array, shape (N) :
values are interpreted as the probability of
observation each data point in the dataset used to
construct the bp object
callable :
the callable must take as an argument an array of
shape (N, nphot), and return an array of shape (N)
giving the probability of observation for each
data point
None :
all data points have equal probability of
observation
Returns
Nothing
"""
# If prior is an arraylike, sort it using our index map
if hasattr(p_obs, '__iter__'):
po = np.array(p_obs)[self.__idxmap]
else:
po = p_obs
# If the observation probability is unchanged, do nothing
if (type(po) == np.ndarray) and \
(type(self.__pobs) == np.ndarray):
if np.array_equal(po, self.__pobs):
return
elif (type(po) == type(self.__pobs)) and \
(po == self.__pobs):
return
# If pobs and prior are both None, just remove all weighting
if po is None and self.priors is None:
self.__clib.kd_change_wgt(None, self.__kd)
self.__pobs = None
self.__pobs_data = None
for c in self.__cache:
c['bp'].pobs = None
return
else:
# If we're here, we have a non-trivial pobs or prior; if
# the new pobs is a callable, call it and save the result
self.__pobs = po
if hasattr(self.__pobs, '__call__'):
self.__pobs_data \
= self.__pobs(self.__dataset[:,self.__nphys:]).\
flatten()
else:
self.__pobs_data = self.__pobs
# Compute the weights on the points, and change the kd
# object appropriately
dummy = self.sample_density # Force re-computation
wgt = np.ones(self.__ndata)
if self.__sample_density is not None:
wgt *= 1.0 / self.__sample_density
if self.__prior_data is not None:
wgt *= self.__prior_data
if self.__pobs_data is not None:
wgt *= self.__pobs_data
self.__wgt = wgt
self.__clib.kd_change_wgt(self.__wgt.ctypes.data_as(
POINTER(c_double)), self.__kd)
# Update cached objects
for c in self.__cache:
c['bp'].pobs = None
##################################################################
# Define the bandwidth property
##################################################################
@property
def bandwidth(self):
"""
The current bandwidth
"""
return deepcopy(self.__bandwidth)
@bandwidth.setter
def bandwidth(self, bw):
if np.array_equal(self.__bandwidth, bw):
# If new bandwidth equals old bandwidth, do nothing
return
elif bw is not 'auto':
# If we've been given a specified bandwidth, set to that
if hasattr(bw, '__iter__'):
self.__bandwidth = np.copy(bw)
else:
self.__bandwidth \
= np.zeros(self.__nphys + self.__nphot) + bw
self.__auto_bw_set = False
else:
# Automatic bandwidth setting
# Are we already set on auto? If so, just return
if self.__auto_bw_set:
return
# Do we have a stored value for the automatic bandwidth?
# If not, we need to compute it.
if self.__auto_bw is None:
# Find 10th nearest neighbors
nneighbor=10
if self.__ndata > 5000:
# For data sets with > 5000 samples, just use a
# sub-sample of 5,000, which is more than enough
# to get a reasonable estimate of the distribution
idxpt = np.array(
random.sample(np.arange(self.__ndata),
5000), dtype=c_ulong)
neighbors = np.zeros(nneighbor*5000,
dtype=c_ulong)
d2 = np.zeros(nneighbor*5000)
self.__clib.kd_neighbors_point_vec(
self.__kd, idxpt, 5000, nneighbor, False,
neighbors, d2)
else:
# For smaller data sets, use it all
neighbors = np.zeros(nneighbor*self.__ndata,
dtype=c_ulong)
d2 = np.zeros(nneighbor*self.__ndata)
idxpt = np.arange(self.__ndata, dtype=c_ulong)
self.__clib.kd_neighbors_all(self.__kd, nneighbor,
False, neighbors, d2)
# Take the bandwidth in each dimension to be the 90th
# percentile of the 10th nearest neighbor distance
offset = np.abs(self.__dataset[idxpt,:] -
self.__dataset[
neighbors[nneighbor-1::nneighbor],:])
self.__auto_bw = np.zeros(self.__nphys+self.__nphot)
for i in range(self.__nphys+self.__nphot):
self.__auto_bw[i] = np.percentile(offset[:,i], 95)
# Set to the auto bandwidth
self.__bandwidth = np.copy(self.__auto_bw)
self.__auto_bw_set = True
# Set the new bandwidth
self.__clib.kd_change_bandwidth(self.__bandwidth, self.__kd)
# If we have stored priors, and we're using automatic sample
# density setting, we need to recompute them for the
# new bandwidth; zero out the stored sample density, and
# adjust the kernel density estimator for the physical
# parameters to the new bandwidth before doing so
if type(self.__sden) is str:
if self.__sden == 'auto':
self.__sample_density = None
if self.__kd_phys is not None:
self.__clib.kd_change_bandwidth(
self.__bandwidth[:self.__nphys], self.__kd_phys)
pr = self.priors
self.priors = None
self.priors = pr
# Update cached objects
for c in self.__cache:
c['bp'] = bw
##################################################################
# Utility methods to change the tolerances. We need properties for
# these because, if we have cached data sets, we need to update
# their tolerances too.
##################################################################
@property
def abstol(self):
return self.__abstol
@abstol.setter
def abstol(self, newtol):
self.__abstol = newtol
for c in self.__cache:
c['bp'].abstol = self.__abstol
@property
def reltol(self):
return self.__reltol
@reltol.setter
def reltol(self, newtol):
self.__reltol = newtol
for c in self.__cache:
c['bp'].reltol = self.__reltol
##################################################################
# Utitlity methods to return the number of physical and
# photometric quantities, and the total number of dimensions,
# without allowing them to be changed
##################################################################
@property
def nphys(self):
"""
This is the number of physical properties for the bayesphot
object.
"""
return self.__nphys
@nphys.setter
def nphys(self, val):
raise ValueError("can't set bayesphot.nphys!")
@property
def nphot(self):
"""
This is the number of photometric properties for the bayesphot
object.
"""
return self.__nphot
@nphot.setter
def nphot(self, val):
raise ValueError("can't set bayesphot.nphot!")
@property
def ndim(self):
"""
This is the number of physical plus photometric properties for
the bayesphot object.
"""
return (self.__nphys + self.__nphot)
@ndim.setter
def ndim(self, val):
raise ValueError("can't set bayesphot.ndim!")
##################################################################
# Utility methods to set a new bandwidth to account for
# photometric errors, to reset to the original bandwidth, and to
# free a temporary kd_tree holder, all in a thread-safe way. These
# are intended for internal use.
##################################################################
# Broaden the bandwidth by the photometric error, and return a KD
# object with the new bandwidth; if we are thread-safe, this is a
# newly-created KD object, which will need to be freed later; if a
# value for kd_cur is passed in, the bandwidth will be changed for
# it, and no allocation will be done regardless of thread safety
def __change_bw_err(self, photerr, kd_cur=None, err_only=False):
if err_only:
bandwidth = np.copy(self.__bandwidth)
bandwidth[self.__nphys:] = photerr
else:
err = np.zeros(self.__bandwidth.size)
err[self.__nphys:] = photerr
bandwidth = np.sqrt(self.__bandwidth**2+err**2)
if kd_cur is not None:
kd_tmp = kd_cur
elif not self.thread_safe:
kd_tmp = self.__kd
else:
kd_tmp = self.__clib.copy_kd(self.__kd)
self.__clib.kd_change_bandwidth(bandwidth, kd_tmp)
return kd_tmp
# Free a temporary KD object
def __free_bw_err(self, kd_tmp):
if self.thread_safe:
self.__clib.free_kd_copy(kd_tmp)
# Restore bandwidth to the default, de-allocating the temporary KD
# object if needed
def __restore_bw_err(self, kd_tmp):
if not self.thread_safe:
self.__clib.kd_change_bandwidth(
self.__bandwidth, self.__kd)
else:
self.__clib.free_kd_copy(kd_tmp)
##################################################################
# Methods to create and destroy caches
##################################################################
[docs] def make_cache(self, margindims):
"""
This method builds a cache to do faster calculation of PDFs
where certain dimensions are marginalised out. If such caches
exist, they are used automatically by all the computation
methods.
Parameters
margindims : listlike of integers
list of dimensions to be marginalised out; physical
dimensions go from 0 - nphys-1, photometric dimensions
from nphys to nphys + nphot - 1
Returns
Nothing
"""
# Safety check on inputs
mdims = np.asarray(margindims, dtype='int')
if np.amin(mdims) < 0 or np.amin(mdims) > self.ndim-1 or \
len(mdims) != len(np.unique(mdims)):
raise ValueError("bp.make_cache: margindims must be "
" unique integers in the range "
" 0, ndim")
# If we already have a cache for this set of marginalised
# dimensions, do nothing
for c in self.__cache:
if c['margindims'] == list(margindims):
return
# Get list of dimensions that remain
keepdims = np.array(list(set(range(self.ndim)) - set(mdims)),
dtype=int)
nphys = np.sum(keepdims < self.nphys)
# Extract the dimensions we're keeping from the data set
data_cache = np.copy(self.__dataset[:,keepdims])
bw = self.bandwidth[keepdims]
if self.__prior_data is not None:
prior_cache = np.copy(self.__prior_data)
else:
prior_cache = None
if self.__pobs_data is not None:
pobs_cache = np.copy(self.__pobs_data)
else:
pobs_cache = None
# Build a new bp object from the dimensionally-reduced data
if self.__ktype == 2:
ktype = 'gaussian'
elif self.__ktype == 1:
ktype = 'tophat'
elif self.__ktype == 0:
ktype = 'epanechnikov'
bp_cache = bp(data_cache, nphys, bandwidth=bw, ktype=ktype,
priors=prior_cache, pobs=pobs_cache,
sample_density=self.__sample_density,
reltol=self.reltol,
abstol=self.abstol, leafsize=self.leafsize,
thread_safe=self.thread_safe,
caching='none')
# Store the new object
self.__cache.append(
{ 'margindims' : list(margindims),
'keepdims' : list(keepdims),
'data' : data_cache,
'prior' : prior_cache,
'pobs' : pobs_cache,
'bp' : bp_cache } )
[docs] def clear_cache(self, margindims=None):
"""
This method deletes from the cache
Parameters
margindims : listlike of integers
list of marginalised dimensions that should be removed
from the cache, in the same format as make_cache; if
left as None, the cache is completely emptied
Returns
Nothing
"""
if margindims is None:
self.__cache = []
else:
for i in range(len(cache)):
if cache[i]['margindims'] == list(margindims):
del cache[i]
return
raise ValueError(
"bp.clear_cache: no cached data found for "+
repr(margindims))
##################################################################
# Method to compute the log likelihood function for a particular
# set of physical properties given a particular set of photometric
# properties; some dimensions can be marginalised over if desired
##################################################################
[docs] def logL(self, physprop, photprop, photerr=None,
margindim=None):
"""
This function returns the natural log of the likelihood
function evaluated at a particular log mass, log age,
extinction, and set of log luminosities
Parameters
physprop : arraylike, shape (nphys) or (..., nphys)
array giving values of the physical properties; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photprop : arraylike, shape (nfilter) or (..., nfilter)
array giving the photometric values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photerr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors; for a multidimensional
array, the operation is vectorized over the leading
dimensions
margindim : int | arraylike of ints | None
The index or indices of the physical or photometric
properties to be maginalized over, numbered from 0 -
nphys-1 for physical properties and from nphys - nfilter +
nfilter - 1 for photometric properties. If this keyword is
set, then physprop and/or photprop should have fewer
than nphys or nphot elements due to the omission of
marginalised dimensions. If all physical or photometric
dimensions are marginalised out, that corresponding
argument for physprop or photprop should be set to None
Returns
logL : float or arraylike
natural log of the likelihood function
"""
# If we're marginalising, see if we should use a cached data
# set
if margindim is not None:
# If we're in lazy mode, add a cache for this set of
# marginalised dimensions if it doesn't exist
if self.caching == 'lazy':
self.make_cache(margindim)
# Look for this set of dimensions in our existing cache;
# if we find a match, use it
for c in self.__cache:
if list(margindim) == c['margindims']:
return c['bp'].logL(physprop, photprop,
photerr=photerr)
# Safety check: make sure all the dimensions of the input
# arrays match what we expect. If we are marginalizing over
# some dimensions, extract the dimensions we want to keep
if margindim is not None:
nphys_margin = np.sum(np.asarray(margindim) <
self.__nphys)
nphot_margin = len(margindim) - nphys_margin
nphys = self.__nphys - nphys_margin
nphot = self.__nphot - nphot_margin
keepdims = np.array(
list(set(range(self.ndim)) - set(margindim)),
dtype=c_ulong)
bw = self.__bandwidth[keepdims]
else:
nphys = self.__nphys
nphot = self.__nphot
bw = self.__bandwidth
if nphys > 1:
if (np.asarray(physprop).shape[-1] != nphys):
raise ValueError("need " + str(nphys) +
" physical properties!")
if nphys == 0 and physprop is not None:
raise ValueError(
"input includes physical properties, but "
"all physical properties are marginalised "
"out!")
if nphot > 1:
if (np.asarray(photprop).shape[-1] != nphot):
raise ValueError("need " + str(nphot) +
" photometric properties!")
if photerr is not None:
if (np.asarray(photerr).shape[-1] != nphot):
raise ValueError("need " + str(nphot) +
" photometric errors!")
if nphot == 0 and photprop is not None:
raise ValueError(
"input includes photometric properties, but "
"all photometric properties are marginalised "
"out!")
if nphot == 0 and photerr is not None:
raise ValueError(
"input includes photometric errors, but "
"all photometric properties are marginalised "
"out!")
# Handle special case of a single photometric or physical
# quantity; in this case make sure we have a trailing
# dimension of 1 so that all the fancy array manipulation
# below works correctly
if nphys == 1 and (np.asarray(physprop).shape[-1] != 1
or np.asarray(physprop).ndim == 1):
physprop_ = np.array(physprop).\
reshape(np.asarray(physprop).shape+(1,))
else:
physprop_ = physprop
if nphot == 1 and (np.asarray(photprop).shape[-1] != 1
or np.asarray(photprop).ndim == 1):
photprop_ = np.array(photprop).\
reshape(np.asarray(photprop).shape+(1,))
else:
photprop_ = photprop
if nphot == 1 and photerr is not None:
if np.asarray(photerr).shape[-1] != 1 \
or np.asarray(photerr).ndim == 1:
photerr_ = np.array(photerr).\
reshape(np.asarray(photerr).shape+(1,))
else:
photerr_ = photerr
# Figure out number of distinct input sets of physical and
# photometric properties in the input data
if nphys > 0:
nphys_in = np.asarray(physprop_).size // nphys
else:
nphys_in = 0
if nphot > 0:
nphot_in = np.asarray(photprop_).size // nphot
else:
nphot_in = 0
# Allocate an array to hold the results. The shape is a bit
# tricky to figure out, since one or more of the inputs
# (physprop, photprop, and photerr) can be None. We therefore
# need to consider several possible cases.
if physprop_ is not None and photprop_ is not None and \
photerr_ is not None:
pdf = np.zeros(
np.broadcast(np.atleast_2d(physprop_)[..., 0],
np.atleast_2d(photprop_)[..., 0],
np.atleast_2d(photerr_)[..., 0]).shape,
dtype=c_double)
elif physprop_ is not None and photprop_ is not None:
pdf = np.zeros(
np.broadcast(np.atleast_2d(physprop_)[..., 0],
np.atleast_2d(photprop_)[..., 0]).shape,
dtype=c_double)
elif physprop_ is not None:
pdf = np.zeros(np.atleast_2d(physprop_)[..., 0].shape,
dtype=c_double)
elif photprop_ is not None and photerr_ is not None:
pdf = np.zeros(
np.broadcast(np.atleast_2d(photprop_)[..., 0],
np.atleast_2d(photerr_)[..., 0]).shape,
dtype=c_double)
elif photprop_ is not None:
pdf = np.zeros(np.atleast_2d(photprop_)[..., 0].shape,
dtype=c_double)
if self.__diag_mode:
nodecheck = np.zeros(pdf.shape, dtype=c_ulong)
leafcheck = np.zeros(pdf.shape, dtype=c_ulong)
termcheck = np.zeros(pdf.shape, dtype=c_ulong)
# Make an array suitable for passing data to c routines
cdata = np.zeros(pdf.shape + (nphys+nphot,),
dtype=c_double)
if nphys != 0:
cdata[..., :nphys] \
= np.vstack((physprop_,) * (pdf.size//nphys_in)). \
reshape(cdata[..., :nphys].shape)
if nphot != 0:
cdata[..., nphys:] \
= np.vstack((photprop_,) * (pdf.size//nphot_in)). \
reshape(cdata[..., nphys:].shape)
if photerr_ is not None:
cbandwidth = np.zeros(pdf.shape + (nphot,),
dtype=c_double)
cbandwidth[...] = np.sqrt(bw**2 + photerr_**2)
cbw_ptr = cbandwidth.ctypes.data_as(POINTER(c_double))
else:
cbw_ptr = None
# Call the PDF computation routine
if margindim is None:
# No marginalization, so call kd_pdf
if not self.__diag_mode:
self.__clib.kd_pdf_vec(
self.__kd, np.ravel(cdata), cbw_ptr, pdf.size,
self.reltol, self.abstol, np.ravel(pdf))
else:
self.__clib.kd_pdf_vec(
self.__kd, np.ravel(cdata), cbw_ptr, pdf.size,
self.reltol, self.abstol, np.ravel(pdf),
np.ravel(nodecheck), np.ravel(leafcheck),
np.ravel(termcheck))
else:
# We are marginalizing, so call kd_pdf_int
if not self.__diag_mode:
self.__clib.kd_pdf_int_vec(
self.__kd, np.ravel(cdata), cbw_ptr, keepdims,
keepdims.size, pdf.size, self.reltol,
self.abstol, np.ravel(pdf))
else:
self.__clib.kd_pdf_int_vec(
self.__kd, np.ravel(cdata), cbw_ptr, keepdims,
keepdims.size, pdf.size, self.reltol,
self.abstol, np.ravel(pdf),
np.ravel(nodecheck), np.ravel(leafcheck),
np.ravel(termcheck))
# Return
if not self.__diag_mode:
return np.log(pdf)
else:
return np.log(pdf), nodecheck, leafcheck, termcheck
##################################################################
# Function to return the marginal distribution of one or more of
# the physical properties for a specified set of photometric
# properties
##################################################################
[docs] def mpdf(self, idx, photprop, photerr=None, ngrid=128,
qmin=None, qmax=None, grid=None, norm=True):
"""
Returns the marginal probability for one or mode physical
quantities for one or more input sets of photometric
properties. Output quantities are computed on a grid of
values, in the same style as meshgrid.
Parameters:
idx : int or listlike containing ints
index of the physical quantity whose PDF is to be
computed; if this is an iterable, the joint distribution of
the indicated quantities is returned
photprop : arraylike, shape (nfilter) or (..., nfilter)
array giving the photometric values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photerr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors; for a multidimensional
array, the operation is vectorized over the leading
dimensions
ngrid : int or listlike containing ints
number of points in each dimension of the output grid;
if this is an iterable, it must have the same number of
elements as idx
qmin : float or listlike
minimum value in the output grid in each quantity; if
left as None, defaults to the minimum value in the
library; if this is an iterable, it must contain the
same number of elements as idx
qmax : float or listlike
maximum value in the output grid in each quantity; if
left as None, defaults to the maximum value in the
library; if this is an iterable, it must contain the
same number of elements as idx
grid : listlike of arrays
set of values defining the grid on which the PDF is to
be evaluated, in the same format used by meshgrid
norm : bool
if True, returned pdf's will be normalized to integrate
to 1
Returns:
grid_out : array
array of values at which the PDF is evaluated; contents
are the same as returned by meshgrid
pdf : array
array of marginal posterior probabilities at each point
of the output grid, for each input set of photometry; the leading
dimensions match the leading dimensions produced by
broadcasting the leading dimensions of photprop and
photerr together, while the trailing dimensions match
the dimensions of the output grid
"""
# Check if we're marginalising over any dimensions, and if so
# whether we have or should create a cache for those
# dimensions
if len(np.atleast_1d(idx)) < self.nphys:
# Figure out which dimensions we're marginalising over
keepdim = list(np.atleast_1d(idx))
margindim = list(set(range(self.nphys)) - set(keepdim))
# If we're in lazy mode, add a cache for this set of
# marginalised dimensions
if self.caching == 'lazy':
self.make_cache(margindim)
# Look for this set of dimensions in our existing cache
for c in self.__cache:
if list(margindim) == c['margindims']:
# Found a match; call the bp object that matches
# and return its output
return c['bp'].mpdf(keepdim, photprop,
photerr=photerr,
ngrid=ngrid,
qmin=qmin, qmax=qmax,
grid=grid, norm=norm)
# Safety check
if (np.array(photprop).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric properties!")
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric errors!")
if (np.amax(idx) > self.nphys) or (np.amin(idx) < 0) or \
(not np.array_equal(np.squeeze(np.unique(np.array(idx))),
np.squeeze(np.array([idx])))):
raise ValueError("need non-repeating indices in " +
"the range 0 - {:d}!".
format(nphys-1))
# Reshape arrays if necessary
if (np.array(photprop).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
photprop = photprop.reshape(photprop.shape+(1,))
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
photerr = photerr.reshape(photerr.shape+(1,))
# Set up the grid of outputs, and the versions of it that we
# will be passing to the c code
if grid is not None:
grid_out = np.array(grid)
qmin_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
dtype=np.double)
qmax_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
dtype=np.double)
ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
else:
if qmin is None:
qmin = np.amin(self.__dataset[:,idx], axis=0)
if qmax is None:
qmax = np.amax(self.__dataset[:,idx], axis=0)
griddims = []
if hasattr(idx, '__len__'):
nidx = len(idx)
else:
nidx = 1
if nidx > 1:
# Case for multiple indices
griddims = []
if hasattr(ngrid, '__len__'):
ngrid_tmp = np.array(ngrid, dtype=c_ulong)
else:
ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
for i in range(len(idx)):
griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) *
float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
indexing='ij')))
out_shape = grid_out[0, ...].shape
qmin_tmp \
= np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
qmax_tmp \
= np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
else:
# Case for a single index
ngrid_tmp = np.array([ngrid], dtype=c_ulong)
qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
grid_out = qmin + \
np.arange(ngrid) * \
float(qmax-qmin)/(ngrid-1)
out_shape = grid_out.shape
# Figure out how many distinct photometric values we've been
# given, and how many sets of photometric errors
nphot = np.array(photprop).size//self.__nphot
nphot_err = np.array(photerr).size//self.__nphot
# Set up a grid to hold the outputs
if photerr is not None:
pdf = np.zeros(
np.broadcast(np.array(photprop)[..., 0],
np.array(photerr)[..., 0]).shape +
out_shape)
else:
pdf = np.zeros(np.array(photprop)[..., 0].shape +
out_shape)
# Prepare data for c library
if hasattr(idx, '__len__'):
nidx = len(idx)
else:
nidx = 1
dims = np.zeros(nidx+self.__nphot, dtype=c_ulong)
dims[:nidx] = idx
dims[nidx:] = self.__nphys+np.arange(self.__nphot, dtype=c_ulong)
ndim = c_ulong(nidx + self.__nphot)
phottmp = np.array(photprop, dtype=c_double)
# Separate cases with single / no photometric errors from
# cases with multiple sets of photometric errors
if nphot_err <= 1:
# Case with at most one set of photometric errors
# Set the bandwidth based on the photometric errors if we
# were given some
if photerr is None:
kd_tmp = self.__kd
else:
kd_tmp = self.__change_bw_err(photerr)
# Call the PDF computation routine; note that we call
# kd_pdf_int_vec if we're actually marginalizing over any
# physical properties (which is the case if len(idx) <
# nphys), but we invoke kd_pdf_vec if we're not actually
# marginalizing (len(idx)==nphys) because then we don't
# need to do any integration
if nidx < self.__nphys:
self.__clib.kd_pdf_int_reggrid(
kd_tmp, np.ravel(phottmp),
dims[nidx:], self.__nphot, nphot,
qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
self.reltol, self.abstol, np.ravel(pdf))
else:
self.__clib.kd_pdf_reggrid(
kd_tmp, np.ravel(phottmp),
dims[nidx:], self.__nphot, nphot,
qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
self.reltol, self.abstol, np.ravel(pdf))
else:
# Case with multiple sets of photometric errors
# Loop over photometric errors
kd_tmp = None
for i in np.ndindex(*photerr.shape[:-1]):
# Set bandwidth based on photometric error for this
# iteration
kd_tmp = self.__change_bw_err(photerr[i], kd_cur=kd_tmp)
# Grab the corresponding portions of the arrays going
# to and from the c code
if photprop.size < photerr.size:
phot_sub = np.ravel(phottmp)
else:
phot_sub = np.ravel(phottmp[i])
pdf_sub = np.zeros(np.array(pdf[i]).shape)
# Call kernel density estimate with this bandwidth
if nidx < self.__nphys:
self.__clib.kd_pdf_int_reggrid(
kd_tmp, phot_sub,
dims[nidx:], self.__nphot,
phot_sub.size//self.__nphot,
qmin_tmp, qmax_tmp, ngrid_tmp,
dims[:nidx], nidx,
self.reltol, self.abstol, np.ravel(pdf_sub))
else:
self.__clib.kd_pdf_reggrid(
kd_tmp, phot_sub,
dims[nidx:], self.__nphot,
phot_sub.size//self.__nphot,
qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
self.reltol, self.abstol, np.ravel(pdf_sub))
pdf[i] = pdf_sub
# Set the bandwidth back to its default if necessary
if photerr is not None:
self.__restore_bw_err(kd_tmp)
# Normalize if requested
if norm:
# Compute the sizes of the output cells
if nidx == 1:
cellsize = np.zeros(grid_out.size)
cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
cellsize[0] = grid_out[1] - grid_out[0]
cellsize[-1] = grid_out[-1] - grid_out[-2]
else:
# Get the cell sizes in each dimension
csize = []
for i in range(nidx):
vec = grid_out[(i,)+i*(0,)+(slice(None),) +
(grid_out.shape[0]-i-1)*(0,)]
csize.append(np.zeros(vec.size))
csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
csize[i][0] = vec[1] - vec[0]
csize[i][-1] = vec[-1] - vec[-2]
# Take outer product to get grid of sizes
cellsize = np.multiply.outer(csize[0], csize[1])
for i in range(2, nidx):
cellsize = np.multiply.outer(cellsize, csize[i])
# Compute integral
normfac = np.sum(pdf*cellsize, axis =
tuple(range(np.array(photprop).ndim-1, pdf.ndim)))
# Normalize
pdf = np.transpose(np.transpose(pdf)/normfac)
# Return
return grid_out, pdf
##################################################################
# Function to return the marginal distribution of one or more of
# the photometric properties for a specified set of physical
# properties; this is just like mpdf, but in reverse
##################################################################
[docs] def mpdf_phot(self, idx, physprop, ngrid=128,
qmin=None, qmax=None, grid=None, norm=True):
"""
Returns the marginal probability for one or mode photometric
quantities corresponding to an input set of physical
properties. Output quantities are computed on a grid of
values, in the same style as meshgrid.
Parameters:
idx : int or listlike containing ints
index of the photometric quantity whose PDF is to be
computed, starting at 0; if this is an iterable, the
joint distribution of the indicated quantities is returned
physprop : arraylike, shape (nphys) or (..., nphys)
physical properties to be used; if this is an array of
nphys elements, these give the physical properties; if
it is a multidimensional array, the operation is
vectorized over the leading dimensions
physical properties -- the function must take an array
of (nphys) elements as an input, and return a floating
point value representing the PDF evaluated at that set
of physical properties as an output
ngrid : int or listlike containing ints
number of points in each dimension of the output grid;
if this is an iterable, it must have the same number of
elements as idx
qmin : float or listlike
minimum value in the output grid in each quantity; if
left as None, defaults to the minimum value in the
library; if this is an iterable, it must contain the
same number of elements as idx
qmax : float or listlike
maximum value in the output grid in each quantity; if
left as None, defaults to the maximum value in the
library; if this is an iterable, it must contain the
same number of elements as idx
grid : listlike of arrays
set of values defining the grid on which the PDF is to
be evaluated, in the same format used by meshgrid
norm : bool
if True, returned pdf's will be normalized to integrate
to 1
Returns:
grid_out : array
array of values at which the PDF is evaluated; contents
are the same as returned by meshgrid
pdf : array
array of marginal posterior probabilities at each point
of the output grid, for each input set of properties; the leading
dimensions match the leading dimensions produced by
broadcasting the leading dimensions of photprop and
photerr together, while the trailing dimensions match
the dimensions of the output grid
"""
# Check if we're marginalising over any dimensions, and if so
# whether we have or should create a cache for those
# dimensions
if len(np.atleast_1d(idx)) < self.nphot:
# Figure out which dimensions we're marginalising over
keepdim = list(np.atleast_1d(idx)+self.nphys)
margindim = list(set(range(self.nphys,self.ndim))
- set(keepdim))
# If we're in lazy mode, add a cache for this set of
# marginalised dimensions
if self.caching == 'lazy':
self.make_cache(margindim)
# Look for this set of dimensions in our existing cache
for c in self.__cache:
if list(margindim) == c['margindims']:
# Found a match; call the bp object that matches
# and return its output
return c['bp'].mpdf_phot(
np.array(keepdim)-self.nphys,
physprop,
ngrid=ngrid,
qmin=qmin, qmax=qmax,
grid=grid, norm=norm)
# Safety check
if (np.array(physprop).shape[-1] != self.__nphys) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphys) +
" physical properties!")
if (np.amax(np.asarray(idx)) > self.__nphot) or \
(np.amin(np.asarray(idx)) < 0) or \
(not np.array_equal(np.squeeze(np.unique(np.asarray(idx))),
np.squeeze(np.array([idx])))):
raise ValueError("need non-repeating indices in " +
"the range 0 - {:d}!".
format(self.__nphot-1))
# Reshape arrays if necessary
if (np.array(physprop).shape[-1] != self.__nphys) and \
(self.__nphys == 1):
physprop = physprop.reshape(physprop.shape+(1,))
# Set up the grid of outputs, and the versions of it that we
# will be passing to the c code
if grid is not None:
grid_out = np.array(grid)
qmin_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
dtype=np.double)
qmax_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
dtype=np.double)
ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
else:
if qmin is None:
qmin = np.amin(self.__dataset[:,idx+self.__nphys], axis=0)
if qmax is None:
qmax = np.amax(self.__dataset[:,idx+self.__nphys], axis=0)
griddims = []
if hasattr(idx, '__len__'):
nidx = len(idx)
else:
nidx = 1
if nidx > 1:
# Case for multiple indices
griddims = []
if hasattr(ngrid, '__len__'):
ngrid_tmp = np.array(ngrid, dtype=c_ulong)
else:
ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
for i in range(len(idx)):
griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) *
float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
indexing='ij')))
out_shape = grid_out[0, ...].shape
qmin_tmp \
= np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
qmax_tmp \
= np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
else:
# Case for a single index
ngrid_tmp = np.array([ngrid], dtype=c_ulong)
qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
grid_out = qmin + \
np.arange(ngrid) * \
float(qmax-qmin)/(ngrid-1)
out_shape = grid_out.shape
# Prepare things we'll be passing to the c library
# Prepare data for c library
if hasattr(idx, '__len__'):
nidx = len(idx)
else:
nidx = 1
dims = np.zeros(nidx+self.__nphys, dtype=c_ulong)
dims[:nidx] = self.__nphys + idx
dims[nidx:] = np.arange(self.__nphys, dtype=c_ulong)
ndim = c_ulong(nidx + self.__nphys)
# Prepare output holder
pdf = np.zeros(np.array(physprop)[..., 0].shape +
out_shape)
# Prepare inputs for passing to c
nphys = np.array(physprop).size//self.__nphys
phystmp = np.array(physprop, dtype=c_double)
# Call c library; note that we call kd_pdf_int_reggrid if
# we're actually marginalizing over any photometric
# properties (which is the case if len(idx) <
# self.__nphot), but we invoke kd_pdf_reggrid if we're not
# actually marginalizing (len(idx)==self.__nphot) because
# then we don't need to do any integration
if nidx < self.__nphot:
self.__clib.kd_pdf_int_reggrid(
self.__kd, np.ravel(phystmp), dims[nidx:],
self.__nphys, nphys, qmin_tmp, qmax_tmp,
ngrid_tmp, dims[:nidx], nidx, self.reltol,
self.abstol, np.ravel(pdf))
else:
self.__clib.kd_pdf_reggrid(
self.__kd, np.ravel(phystmp), dims[nidx:],
self.__nphys, nphys, qmin_tmp, qmax_tmp,
ngrid_tmp, dims[:nidx], nidx,
self.reltol, self.abstol, np.ravel(pdf))
# Normalize if requested
if norm:
# Compute the sizes of the output cells
if nidx == 1:
cellsize = np.zeros(grid_out.size)
cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
cellsize[0] = grid_out[1] - grid_out[0]
cellsize[-1] = grid_out[-1] - grid_out[-2]
else:
# Get the cell sizes in each dimension
csize = []
for i in range(nidx):
vec = grid_out[(i,)+i*(0,)+(slice(None),) +
(grid_out.shape[0]-i-1)*(0,)]
csize.append(np.zeros(vec.size))
csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
csize[i][0] = vec[1] - vec[0]
csize[i][-1] = vec[-1] - vec[-2]
# Take outer product to get grid of sizes
cellsize = np.multiply.outer(csize[0], csize[1])
for i in range(2, nidx):
cellsize = np.multiply.outer(cellsize, csize[i])
# Compute integral
normfac = np.sum(pdf*cellsize, axis =
tuple(range(np.array(physprop).ndim-1, pdf.ndim)))
# Normalize
pdf = np.transpose(np.transpose(pdf)/normfac)
# Return
return grid_out, pdf
##################################################################
# This is the most general verison of mpdf. It allows the user to
# fix or marginalize any number of physical or photometric
# properties, returning the distribution in the remaining
# properties
##################################################################
[docs] def mpdf_gen(self, fixeddim, fixedprop, margindim, ngrid=128,
qmin=None, qmax=None, grid=None, norm=True):
"""
Returns the marginal probability for one or more physical or
photometric properties, keeping other properties fixed and
marginalizing over other quantities. This is the most general
marginal PDF routine provided.
Parameters:
fixeddim : int | arraylike of ints | None
The index or indices of the physical or photometric
properties to be held fixed; physical properties are
numbered 0 ... nphys-1, and phtometric ones are numbered
nphys ... nphys + nphot - 1. This can also be set to
None, in which case no properties are held fixed.
fixedprop : array | None
The values of the properties being held fixed; the size
of the final dimension must be equal to the number of
elements in fixeddim, and if fixeddim is None, this must
be too
margindim : int | arraylike of ints | None
The index or indices of the physical or photometric
properties to be maginalized over, numbered in the same
way as with fixeddim; if set to None, no marginalization
is performed
ngrid : int or listlike containing ints
number of points in each dimension of the output grid;
if this is an iterable, it must have nphys + nphot -
len(fixeddim) - len(margindim) elements
qmin : float | arraylike
minimum value in the output grid in each quantity; if
left as None, defaults to the minimum value in the
library; if this is an iterable, it must contain a
number of elements equal to nphys + nphot -
len(fixeddim) - len(margindim)
qmax : float | arraylike
maximum value in the output grid in each quantity; if
left as None, defaults to the maximum value in the
library; if this is an iterable, it must have the same
number of elements as qmin
grid : listlike of arrays
set of values defining the grid on which the PDF is to
be evaluated, in the same format used by meshgrid
norm : bool
if True, returned pdf's will be normalized to integrate
to 1
Returns:
grid_out : array
array of values at which the PDF is evaluated; contents
are the same as returned by meshgrid
pdf : array
array of marginal posterior probabilities at each point
of the output grid, for each input set of properties; the leading
dimensions match the leading dimensions produced by
broadcasting the leading dimensions of photprop and
photerr together, while the trailing dimensions match
the dimensions of the output grid
"""
# If we're marginalising out some dimensions, and we have
# cached data for that setup or are going to make it, call our
# cached bp object to do this work for us
if margindim is not None:
# If we're in lazy mode, add a cache for this set of
# marginalised dimensions
if self.caching == 'lazy':
self.make_cache(margindim)
# Look for this set of dimensions in our existing cache
for c in self.__cache:
if list(margindim) == c['margindims']:
# Found a match; update the dimensional indexing
# on fixeddim, then call
if fixeddim is None:
fixeddim_ = None
else:
fixeddim_ = np.array(deepcopy(fixeddim))
diff = np.zeros(fixeddim_.size, dtype=np.int)
for d in margindim:
diff[fixeddim_ > d] += 1
fixeddim_ -= diff
return c['bp'].mpdf_gen(fixeddim_, fixedprop,
None, ngrid=ngrid,
qmin=qmin, qmax=qmax,
grid=grid, norm=norm)
# Safety check: make sure the dimensions all match up
if fixeddim is None:
nfixed = 0
else:
nfixed = np.atleast_1d(np.array(fixeddim)).shape[0]
if len(set(np.atleast_1d(fixeddim))) != nfixed:
raise ValueError("fixeddim must not have any "
"repeated elements!")
if np.array(fixedprop).shape[-1] != nfixed:
raise ValueError("final dimension of fixedprop must" +
" have {:d} dimensions".format(nfixed) +
" to match fixeddim!")
if margindim is None:
nmargin = 0
else:
nmargin = np.atleast_1d(np.array(margindim)).shape[0]
if len(set(np.atleast_1d(margindim))) != nmargin:
raise ValueError("margindim must not have any "
"repeated elements!")
if qmin is not None:
nq = np.atleast_1d(np.array(qmin)).shape[0]
if nq + nmargin + nfixed != \
self.__nphys + self.__nphot:
raise ValueError("qmin must have {:d}".
format(self.__nphys + self.__nphot -
nfixed - nmargin) +
" dimensions!")
else:
nq = self.__nphys + self.__nphot - nfixed - nmargin
if qmax is not None:
if nq != np.atleast_1d(np.array(qmax)).shape[0]:
raise ValueError("qmin and qmax must have matching" +
" number of dimensions")
if grid is not None:
nq = len(grid)
if nq + nmargin + nfixed != \
self.__nphys + self.__nphot:
raise ValueError("grid must have {:d}".
format(self.__nphys + self.__nphot -
nfixed - nmargin) +
" elements!")
if fixeddim is not None and margindim is not None:
if len(set(margindim) & set(fixeddim)) > 0:
raise ValueError("margindim and fixeddim must not "
"have any elements in common!")
# Make versions of all inputs arrays suitable for passing to c
if margindim is not None:
margindim_tmp = np.atleast_1d(
np.array(margindim, dtype=c_ulong))
if fixeddim is not None:
fixeddim_tmp = np.atleast_1d(
np.array(fixeddim, dtype=c_ulong))
xfixed = np.atleast_1d(np.array(fixedprop, dtype=c_double))
# Figure out the indices we're returning
idx = set(range(self.__nphys+self.__nphot))
if margindim is not None:
idx = idx - set(margindim)
if fixeddim is not None:
idx = idx - set(fixeddim)
idx = np.array(list(idx), dtype=c_ulong)
# Set up output grid; make a version to pass to the c code
# that has the axes tranposed, since the c code needs a
# different data ordering
nidx, qmin_tmp, qmax_tmp, grid_out, out_shape \
= self.__make_outgrid(idx, ngrid, qmin, qmax, grid)
grid_out_c = np.transpose(grid_out)
# Set up array to hold the final pdf
if fixedprop is not None:
pdf = np.zeros(np.array(fixedprop)[..., 0] + out_shape,
dtype=c_double)
else:
pdf = np.zeros(out_shape, dtype=c_double)
# Call the correct c library routine, depending on whether we
# have fixed points, marginalized dimensions, both, or neither
if fixeddim is None:
if margindim is None:
# No fixed points, no marginalized dimensions, so just
# call kd_pdf_vec
self.__clib.kd_pdf_vec(
self.__kd, np.ravel(grid_out_c), None,
grid_out_c.size//self.ndim,
self.reltol, self.abstol,
np.ravel(pdf))
else:
# Marginalizing over some dimensions, but no fixed
# points, so call kd_pdf_int_vec
self.__clib.kd_pdf_int_vec(
self.__kd, np.ravel(grid_out_c), None,
idx, len(idx), grid_out_c.size//len(idx),
self.reltol, self.abstol, np.ravel(pdf))
else:
if margindim is None:
# We have fixed points, but no marginalized
# dimensions, so call kd_pdf_reggrid
self.__clib.kd_pdf_reggrid(
self.__kd, np.ravel(xfixed), fixeddim_tmp,
nfixed, xfixed.size//nfixed,
qmin_tmp, qmax_tmp, ngrid_tmp, idx,
idx.size, self.reltol, self.abstol,
np.ravel(pdf))
else:
# We have both fixed points and marginalized
# dimensions, so call kd_pdf_int_reggrid
self.__clib.kd_pdf_int_reggrid(
self.__kd, np.ravel(xfixed), fixeddim_tmp,
nfixed, xfixed.size//nfixed, qmin_tmp,
qmax_tmp, idx, idx.size, self.reltol,
self.abstol, np.ravel(pdf))
# Normalize if requested
if norm:
pdf = self.__mpdf_normalize(nidx, grid_out, pdf)
# Return
return grid_out, pdf
##################################################################
# Utility method to set up grids for outputs of marginal PDF
# routines that are appropriate for passing to the c library. Used
# by all the mpdf routines.
##################################################################
def __make_outgrid(self, idx, ngrid, qmin, qmax, grid):
# Were we given a grid?
if grid is not None:
# Yes, so just make it an array, and extract the limits
# from it
grid_out = np.array(grid, dtype=c_double)
out_shape = grid_out.shape
qmin_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
dtype=np.double)
qmax_tmp \
= np.array(
np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
dtype=np.double)
ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
else:
# No input grid was given
# Were we given upper or lower limits? If not, read them
# from the library. For photometric dimensions, exclude
# values of 99, which indicate bad data.
if qmin is None:
qmin = np.amin(self.__dataset[:,idx], axis=0)
if qmax is None:
qmax = np.zeros(len(idx))
for i in range(len(idx)):
if i < self.__nphys:
qmax[i] = np.amax(self.__dataset[:,i])
else:
qmax[i] = np.amax(self.__dataset[:,i][
self.__dataset[:,i] < 99.0])
griddims = []
# Figure out how many dimensions the output has
if hasattr(idx, '__len__'):
nidx = len(idx)
else:
nidx = 1
if nidx > 1:
# Case for multiple indices
griddims = []
if hasattr(ngrid, '__len__'):
ngrid_tmp = np.array(ngrid, dtype=c_ulong)
else:
ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
for i in range(len(idx)):
griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) *
float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
grid_out = np.squeeze(
np.array(np.meshgrid(*griddims,
indexing='ij'), dtype=c_double))
out_shape = grid_out[0, ...].shape
qmin_tmp \
= np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
qmax_tmp \
= np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
else:
# Case for a single index
ngrid_tmp = np.array([ngrid], dtype=c_ulong)
qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
grid_out = np.array(
qmin + np.arange(ngrid) *
float(qmax-qmin)/(ngrid-1),
dtype=c_double)
out_shape = grid_out.shape
# Return what we've built
return nidx, qmin_tmp, qmax_tmp, grid_out, out_shape
##################################################################
# Utility method to normalize marginal PDFs
##################################################################
def __mpdf_normalize(self, nidx, grid_out, pdf):
# Compute the sizes of the output cells
if nidx == 1:
cellsize = np.zeros(grid_out.size)
cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
cellsize[0] = grid_out[1] - grid_out[0]
cellsize[-1] = grid_out[-1] - grid_out[-2]
else:
# Get the cell sizes in each dimension
csize = []
for i in range(nidx):
vec = grid_out[(i,)+i*(0,)+(slice(None),) +
(grid_out.shape[0]-i-1)*(0,)]
csize.append(np.zeros(vec.size))
csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
csize[i][0] = vec[1] - vec[0]
csize[i][-1] = vec[-1] - vec[-2]
# Take outer product to get grid of sizes
cellsize = np.multiply.outer(csize[0], csize[1])
for i in range(2, nidx):
cellsize = np.multiply.outer(cellsize, csize[i])
# Compute integral
normfac = np.sum(pdf*np.abs(cellsize), axis =
tuple(range(pdf.ndim-nidx, pdf.ndim)))
# Normalize
pdf = np.transpose(np.transpose(pdf)/normfac)
# Return
return pdf
##################################################################
# Method to return log likelihood function at a specified set of
# physical properties for a particular set of photometric
# variables; this is set up for use by emcee, and is not intended
# for use by humans
##################################################################
def __logL(self, *args):
x = np.zeros(self.__nphys+self.__nphot)
x[:self.__nphys] = args[0]
x[self.__nphys:] = args[1:-1]
kd_tmp = args[-1]
if not self.__diag_mode:
return np.log(self.__clib.kd_pdf(kd_tmp, x, self.reltol,
self.abstol))
else:
nodecheck = c_ulong(0)
leafcheck = c_ulong(0)
termcheck = c_ulong(0)
return np.log(self.__clib.kd_pdf(kd_tmp, x, self.reltol,
self.abstol, nodecheck,
leafcheck, termcheck))
##################################################################
# Function to compute an MCMC sampler for a particular set of
# photometric values
##################################################################
[docs] def mcmc(self, photprop, photerr=None, mc_walkers=100,
mc_steps=500, mc_burn_in=50):
"""
This function returns a sample of MCMC walkers sampling the
physical parameters at a specified set of photometric values.
Parameters:
photprop : arraylike, shape (nfilter) or (..., nfilter)
array giving the photometric values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photerr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors; for a multidimensional
array, the operation is vectorized over the leading
dimensions
mc_walkers : int
number of walkers to use in the MCMC
mc_steps : int
number of steps in the MCMC
mc_burn_in : int
number of steps to consider "burn-in" and discard
Returns
samples : array
array of sample points returned by the MCMC
"""
# See if we have emcee
if not mc_avail:
raise ImportError("unable to import emcee")
# Safety check
if (np.array(photprop).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric properties!")
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric errors!")
# Reshape arrays if necessary
if (np.array(photprop).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
photprop = photprop.reshape(photprop.shape+(1,))
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
photerr = photerr.reshape(photerr.shape+(1,))
# Prepare storage for output
samples = []
# Make dummy photometric errors if necessary
if photerr is None:
photerr = np.zeros(self.__nphot)
# Loop over photometric errors
kd_tmp = None
for i in np.ndindex(*np.array(photerr).shape[:-1]):
# Set bandwidth based on photometric error for this
# iteration
kd_tmp = self.__change_bw_err(photerr[i], kd_cur=kd_tmp)
# Grab the clusters that go with this photometric error
if photprop.ndim < photerr.ndim:
ph = photprop
else:
ph = photprop[i]
# Loop over clusters
for j in np.ndindex(*np.array(ph).shape[:-1]):
# Grab photometric values for this cluster
ph_tmp = ph[j]
# Search the data set for the sample cluster closest to
# the observed luminosities; this will be the starting
# point for the MCMC
dims = np.arange(self.__nphys,
self.__nphys+self.__nphot,
dtype=np.uint32)
nearpt = np.zeros(self.__nphys+self.__nphot)
wgt = np.zeros(1)
dist2 = np.zeros(1)
self.__clib. \
kd_neighbors(kd_tmp, ph_tmp,
dims.ctypes.data_as(POINTER(c_ulong)),
self.__nphot, 1, True, nearpt,
wgt.ctypes.data_as(POINTER(c_double)),
dist2)
# Generate a set of starting points by scattering walkers
# around the starting position
pos = [nearpt[:self.__nphys] +
self.bandwidth[:self.__nphys] *
np.random.randn(self.__nphys)
for i in range(mc_walkers)]
# Run the MCMC
sampler=emcee.EnsembleSampler(mc_walkers, self.__nphys,
self.__logL,
args=[ph_tmp, kd_tmp])
sampler.run_mcmc(pos, mc_steps)
# Store the result
samples.append(sampler.chain[:,mc_burn_in:,:].
reshape((-1,self.__nphys)))
# Set bandwidth back to default if necessary
if photerr is not None:
self.__restore_bw_err(kd_tmp)
# Reshape the samples
samples = np.squeeze(
np.array(samples).reshape(
np.broadcast(np.array(photprop)[..., 0],
np.array(photerr)[..., 0]).shape +
samples[0].shape))
# Return
return samples
##################################################################
# Function to return the N best matches in the library to an input
# set of photometric properties
##################################################################
[docs] def bestmatch(self, phot, photerr=None, nmatch=1,
bandwidth_units=False):
"""
Searches through the simulation library and returns the closest
matches to an input set of photometry.
Parameters:
phot : arraylike, shape (nfilter) or (..., nfilter)
array giving the photometric values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photerr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors, which must have the
same shape as phot; if this is not None,
then distances will be measured in units of the
photometric error if bandwidth_units is False, or in
units of the bandwidth added in quadrature with the
errors if it is True
nmatch : int
number of matches to return; returned matches will be
ordered by distance from the input
bandwidth_units : bool
if False, distances are computed based on the
logarithmic difference in luminosity; if True, they are
measured in units of the bandwidth
Returns:
matches : array, shape (..., nmatch, nphys + nfilter)
best matches to the input photometry; shape in the
leading dimensions will be the same as for phot, and if
nmatch == 1 then that dimension will be omitted
dist : array, shape (..., nmatch)
distances between the matches and the input photometry
"""
# Safety check
if (np.array(phot).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric properties!")
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphot) +
" photometric errors!")
if np.array(photerr).shape != np.array(phot).shape:
raise ValueError("phot and photerr must have the same shape!")
# Reshape arrays if necessary
if (np.array(phot).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
phot = phot.reshape(phot.shape+(1,))
if photerr is not None:
if (np.array(photerr).shape[-1] != self.__nphot) and \
(self.__nphot == 1):
photerr = photerr.reshape(photerr.shape+(1,))
# Figure out how many distinct photometric values we've been
# given, and how many sets of photometric errors
nphot = np.array(phot).size//self.__nphot
nphot_err = np.array(photerr).size//self.__nphot
# Figure out what shape the output should have
if nphot == 1 and nphot_err == 1:
outshape = [self.__nphys + self.__nphot]
elif photerr is None:
outshape = list(phot.shape[:-1]) + \
[self.__nphys + self.__nphot]
else:
outshape = list(
np.broadcast(np.array(phot)[..., 0],
np.array(photerr)[..., 0]).shape) + \
[self.__nphys + self.__nphot]
if nmatch > 1:
outshape.insert(-1, nmatch)
# Create array to hold output
matches = np.zeros(outshape)
if len(outshape) > 1:
wgts = np.zeros(outshape[:-1])
d2 = np.zeros(outshape[:-1])
else:
wgts = np.zeros([1])
d2 = np.zeros([1])
# Do we have errors?
if photerr is None:
# No, so just call the c neighbor-finding routine
dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
dtype=c_ulong)
self.__clib.kd_neighbors_vec(
self.__kd, np.ravel(phot),
dims.ctypes.data_as(POINTER(c_ulong)),
self.__nphot, np.array(phot).size // self.__nphot,
nmatch, bandwidth_units, np.ravel(matches),
wgts.ctypes.data_as(POINTER(c_double)),
np.ravel(d2))
elif nphot_err == 1:
# Yes, we have errors, but only one set, so no need to
# loop
# Change the bandwidth to match the input errors
kd_tmp = self.__change_bw_err(
photerr, err_only = not bandwidth_units)
# Now call c neighbor-finding routine
dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
dtype=c_ulong)
self.__clib.kd_neighbors_vec(
kd_tmp, np.ravel(phot),
dims.ctypes.data_as(POINTER(c_ulong)),
self.__nphot, np.array(phot).size//self.__nphot,
nmatch, True, np.ravel(matches),
wgts.ctypes.data_as(POINTER(c_double)),
np.ravel(d2))
# Restore bandwidth to previous value
self.__restore_bw_err(kd_tmp)
else:
# We have multiple sets of errors, so loop
ptr = 0
kd_tmp = None
for i in np.ndindex(*photerr.shape[:-1]):
# Set bandwidth based on photometric error for this
# iteration
kd_tmp = self.__change_bw_err(
photerr[i], kd_cur = kd_tmp,
err_only = not bandwidth_units)
# Call c neighbor-finding routine
offset1 = ptr*nmatch
offset2 = offset1*(self.__nphot+self.__nphys)
dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
dtype=c_ulong)
self.__clib.kd_neighbors_vec(
kd_tmp, np.ravel(phot[i]),
dims.ctypes.data_as(POINTER(c_ulong)),
self.__nphot, np.array(phot[i]).size//self.__nphot,
nmatch, True, np.ravel(matches)[offset2:],
wgts.ctypes.data_as(POINTER(c_double)),
np.ravel(d2)[offset1:])
ptr = ptr+1
# Restore bandwidth to previous value
self.__restore_bw_err(kd_tmp)
# Return
return matches, np.sqrt(d2)
##################################################################
# Function to return the N best matches in the library to an input
# set of physical properties
##################################################################
[docs] def bestmatch_phys(self, phys, nmatch=1, bandwidth_units=False):
"""
Searches through the simulation library and returns the closest
matches to an input set of photometry.
Parameters:
phot : arraylike, shape (nphys) or (..., nphys)
array giving the physical values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
nmatch : int
number of matches to return; returned matches will be
ordered by distance from the input
bandwidth_units : bool
if False, distances are computed based on the
logarithmic difference in physical properties; if True,
they are measured in units of the bandwidth
Returns:
matches : array, shape (..., nmatch, nphys + nfilter)
best matches to the input properties; shape in the
leading dimensions will be the same as for phot, and if
nmatch == 1 then that dimension will be omitted
dist : array, shape (..., nmatch)
distances between the matches and the input physical
properties
"""
# Safety check
if (np.array(phys).shape[-1] != self.__nphys) and \
(self.__nphot > 1):
raise ValueError("need " + str(self.__nphys) +
" physical properties!")
# Figure out how many points we were given
phys_tmp = np.array(phys)
npt = phys_tmp.size // self.__nphys
# Prepare the output arrays
outshape = list(phys_tmp.shape[:-1]) + \
[self.__nphys + self.__nphot]
if nmatch > 1:
outshape.insert(-1, nmatch)
# Create array to hold output
matches = np.zeros(outshape)
if len(outshape) > 1:
wgts = np.zeros(outshape[:-1])
d2 = np.zeros(outshape[:-1])
else:
wgts = np.zeros([1])
d2 = np.zeros([1])
# Call c routine
dims = np.arange(self.__nphys, dtype=c_ulong)
self.__clib.kd_neighbors_vec(
self.__kd, np.ravel(phys_tmp),
dims.ctypes.data_as(POINTER(c_ulong)),
self.__nphys, npt, nmatch, bandwidth_units,
np.ravel(matches),
wgts.ctypes.data_as(POINTER(c_double)),
d2)
# Return
return matches, np.sqrt(d2)
##################################################################
# Functions to return an approximate kernel density representation
# of a given point; that is, for an input physical point or
# photometric point (there are two versions below), this routine
# returns a set of points and weights that can be used to compute
# an approximation to the PDF for it anywhere in space, much
# faster than could be done using the full data set
##################################################################
[docs] def make_approx_phot(self, phys, squeeze=True, filter_ignore=None):
"""
Returns an object that can be used for a fast approximation of
the PDF of photometric properties that corresponds to a set of
physical properties. The PDF produced by summing over the
points returned is guaranteed to account for at least 1-reltol
of the marginal photometric probability, and to represent the
shape of the PDF in photometric space within a local accuracy
of reltol as well.
Parameters:
phys : arraylike, shape (nphys) or (N, nphys)
the set or sets of physical properties for which the
approximation is to be generated
squeeze : bool
if True, the representation returned will be squeezed to
minimize the number of points included, using reltol as
the error tolerance
filter_ignore : None or listlike of bool
if None, the kernel density representation returned
covers all filters; otherwise this must be a listlike of
bool, one entry per filter, with a value of False
indicating that filter should be excluded from the
values returned; suppressing filters can allow for more
efficient representations
Returns:
x : array, shape (M, nphot), or a list of such arrays
an array containing the list of points to be used for
the approximation, where nphot is the number of
photometric filters being returned
wgts : array, shape (M), or a list of such arrays
an array containing the weights of the points
Notes:
if the requested relative tolerance cannot be reached for
numerical reasons (usually because the input point is too
far from the library to allow accurate computation), x and
wgts will be return as None, and a warning will be issued
"""
# If given only one set of physical variables, make a
# temporary list we can loop over
if hasattr(phys[0], '__iter__'):
phystmp = phys
else:
phystmp = [phys]
# Specify dimensions to return, and set bandwidth for them
if filter_ignore is None:
dim_return_ptr = None
ndim_return = self.__nphot
bw = self.__bandwidth[self.__nphys:]
else:
dim_return = np.where(np.logical_not(
np.array(filter_ignore)))[0] + self.__nphys
dim_return_ptr = dim_return.ctypes.data_as(POINTER(c_ulong))
ndim_return = len(dim_return)
bw = self.__bandwidth[dim_return]
# Loop over input physical variables
x = []
wgts = []
for ph in phystmp:
# Safety check
if len(ph) != self.__nphys:
raise ValueError("need " + str(self.__nphys) +
" physical properties!")
# Call c library routine to make the representation
xout = POINTER(c_double)()
wgtsout = POINTER(c_double)()
npts = self.__clib.kd_rep(self.__kd, np.array(ph),
np.arange(self.__nphys, dtype=c_ulong),
self.__nphys, self.reltol,
dim_return_ptr, ndim_return,
ctypes.byref(xout),
ctypes.byref(wgtsout))
# Check if the c routine encountered an error due to
# insufficient precision; if so, issue warning and return
# None in place of xpt and wgts
if npts == 0:
warn("bp.make_approx_phot: requested precision cannot be reached")
return None
# Call c library routine to squeeze the representation
if squeeze:
npts = self.__clib.squeeze_rep(
npts, ndim_return, bw, self.reltol,
ctypes.byref(xout), ctypes.byref(wgtsout))
# Convert the returned values into numpy arrays; copy the
# data so that we can free the c buffers
xsave = np.copy(npct.as_array(xout, shape=(npts, ndim_return)))
wgtssave = np.copy(npct.as_array(wgtsout, shape=(npts,)))
# Free the c buffers
self.__clib.free_kd_rep(ctypes.byref(xout),
ctypes.byref(wgtsout))
# Append to the lists we'll be returning
x.append(xsave)
wgts.append(wgtssave)
# If we were given a single object, return something in the
# same shape
if not hasattr(phys[0], '__iter__'):
x = x[0]
wgts = wgts[0]
# Return
return x, wgts
[docs] def make_approx_phys(self, phot, photerr=None, squeeze=True,
phys_ignore=None, tol=None):
"""
Returns an object that can be used for a fast approximation of
the PDF of physical properties that corresponds to a set of
photometric properties. The PDF produced by summing over the
points returned is guaranteed to account for at least 1-reltol
of the marginal photometric probability, and to represent the
shape of the PDF in photometric space within a local accuracy
of reltol as well.
Parameters:
phot : arraylike, shape (nfilter) or (N, nfilter)
the set or sets of photometric properties for which the
approximation is to be generated
photerr : arraylike, shape (nfilter) or (N, nfilter)
array giving photometric errors; the number of elements
in the output lists will be the size that results from
broadcasting together the leading dimensions of phot and
photerr
squeeze : bool
if True, the representation returned will be squeezed to
minimize the number of points included, using reltol as
the error tolerance
phys_ignore : None or listlike of bool
if None, the kernel density representation returned
covers all physical properties; otherwise this must be a
listlike of bool, one entry per physical dimension, with
a value of False indicating that dimension should be
excluded from the values returned; suppressing
dimensions can allow for more efficient representations
tol : float
if set, this tolerance overrides the value of reltol
Returns:
x : array, shape (M, nphys), or a list of such arrays
an array containing the list of points to be used for
the approximation, where nphys is the number of
physical dimensions being returned
wgts : array, shape (M), or a list of such arrays
an array containing the weights of the points
Notes:
if the requested relative tolerance cannot be reached for
numerical reasons (usually because the input point is too
far from the library to allow accurate computation), x and
wgts will be return as None, and a warning will be issued
"""
# If given only one set of photometric variables, make a
# temporary list we can loop over; same for photometric errors
if hasattr(phot[0], '__iter__'):
phottmp = phot
else:
phottmp = [phot]
if photerr is not None:
if hasattr(photerr[0], '__iter__'):
photerrtmp = photerr
else:
photerrtmp = [photerr]
else:
photerrtmp = [None]
# Specify dimensions to return, and set bandwidth for them
if phys_ignore is None:
dim_return_ptr = None
ndim_return = self.__nphys
bw = self.__bandwidth[:self.__nphys]
else:
dim_return = np.where(np.logical_not(
np.array(phys_ignore)))[0]
dim_return_ptr = dim_return.ctypes.data_as(POINTER(c_ulong))
ndim_return = len(dim_return)
bw = self.__bandwidth[dim_return]
# Set tolerance
if tol is None:
tol = self.reltol
# Loop over input photometric variables
x = []
wgts = []
for ph in phottmp:
# Safety check
if len(ph) != self.__nphot:
raise ValueError("need " + str(self.__nphot) +
" photometric properties!")
# Loop over photometric errors
kd_tmp = None
for pherr in photerrtmp:
# Change the bandwidth if necessary
if pherr is not None:
if len(pherr) != self.__nphot:
raise ValueError("need " + str(self.__nphot) +
" photometric errors!")
kd_tmp = self.__change_bw_err(pherr, kd_cur=kd_tmp)
# Call c library routine to make the representation
xout = POINTER(c_double)()
wgtsout = POINTER(c_double)()
npts = self.__clib.kd_rep(
kd_tmp, np.array(ph),
np.arange(self.__nphot, dtype=c_ulong)+self.__nphys,
self.__nphot, tol,
dim_return_ptr, ndim_return,
ctypes.byref(xout),
ctypes.byref(wgtsout))
# Check if the c routine encountered an error due to
# insufficient precision; if so, issue warning and
# return None in place of xpt and wgts
if npts == 0:
warn("bp.make_approx_phys: requested precision cannot be reached")
if kd_tmp is not None:
self.__restore_bw_err(kd_tmp)
return None
# Call c library routine to squeeze the representation
if squeeze:
npts = self.__clib.squeeze_rep(
npts, ndim_return, bw, self.reltol,
ctypes.byref(xout), ctypes.byref(wgtsout))
# Convert the returned values into numpy arrays; copy the
# data so that we can free the c buffers
xsave = np.copy(npct.as_array(xout,
shape=(npts, ndim_return)))
wgtssave = np.copy(npct.as_array(wgtsout, shape=(npts,)))
# Free the c buffers
self.__clib.free_kd_rep(ctypes.byref(xout),
ctypes.byref(wgtsout))
# Append to the lists we'll be returning
x.append(xsave)
wgts.append(wgtssave)
# Reset the bandwidth
if photerr is not None:
self.__restore_bw_err(kd_tmp)
# If we were given a single object, return something in the
# same shape
if not hasattr(phot[0], '__iter__'):
if photerr is None:
x = x[0]
wgts = wgts[0]
elif not hasattr(photerr[0], '__iter__'):
x = x[0]
wgts = wgts[0]
# Return
return x, wgts
##################################################################
# Method to squeeze a representation that has already been created
##################################################################
[docs] def squeeze_rep(self, x, wgts, dims=None):
"""
Takes an input array of positions and weights that form a
kernel density representation and approximates them using
fewer points, using an error tolerance of reltol
Parameters:
x : array, shape (N, ndim)
an array of points forming a kernel density
representation; on exit, x will be resized to (M, ndim)
with M <= N
wgts : array, shape (N)
an array of weights for the kernel density
representation; on exit, wgts will be resized to (M),
with M <= N
dims : array, shape (ndim)
array specifying which dimensions in the kernel density
representation the coordinates in x correspond to; if
left as None, they are assumed to correspond to the
first ndim dimensions in the data set
Returns:
Nothing
"""
# Get the bandwidth
if dims is None:
bw = self.__bandwidth[:x.shape[1]]
else:
bw = self.__bandwidth[dims]
# Prepare data to pass to the c routine
npt = x.shape[0]
ndim = x.shape[1]
xtmp = POINTER(c_double)(x.ctypes.data_as(POINTER(c_double)))
wgtstmp = POINTER(wgts.ctypes.data_as(POINTER(c_double)))
# Call the c squeeze routine
npt_out = self.__clib.squeeze_rep(
npt, ndim, bw, self.reltol,
ctypes.byref(xtmp), ctypes.byref(wgtstmp))
# Discard the old metadata for x and wgts, and rebuild them
# using the new metadata
x = npct.as_array(xtmp, shape=(npt_out, ndim))
wgts = npct.as_array(wgtstmp, shape=(ndim,))
##################################################################
# Routines to use the approximate representations returned by
# make_approx_phys and make_approx_phot to compute marginal
# posterior probabilities
##################################################################
[docs] def mpdf_approx(self, x, wgts, dims='phys', dims_return=None,
ngrid=64, qmin='all', qmax='all', grid=None,
norm=True):
"""
Returns the marginal posterior PDF computed from a kernel
density approximation returned by make_approx_phys or
make_approx_phot. Outputs are computed on a grid of values, in
the same style as meshgrid.
Parameters:
x : array, shape (M, ndim), or a list of such arrays
array of points retured by make_approx_phot or
make_approx_phys
wgts : array, shape (M) or a list of such arrays
array of weights returned by make_approx_phot or
make_approx_phys
dims : 'phys' | 'phot' | arraylike of ints
dimensions covered by x and wgts; the strings 'phys' or
'phot' indicate that they cover all physical or
photometric dimensions, and correspond to the defaults
returned by make_approx_phys and make_approx_phot,
respectively; if dims is an array of ints, these specify
the dimensions covered by x and wgts, where the
physical dimensions are numbered 0, 1, ... nphys-1, and
the photometric ones are nphys, nphys+1,
... nphys+nphot-1
dims_return : None or arraylike of ints
if None, the output PDF has the same dimensions as
specified in dms; if not, then dimreturn must be a
subset of dim, and a marginal PDF in certain dimensions
will be generated
ngrid : int or listlike containing ints
number of points in each dimension of the output grid;
if this is an iterable, it must have the same number of
elements as idx
qmin : float | listlike | 'zoom' | 'all'
minimum value in the output grid in each quantity; if
this a float, it is applied to each dimension; if it is
an iterable, it must contain the same number of elements
as the number of dimensions being returned, as gives the
minimum in each dimension; if it is 'zoom' or 'all', the
minimum is chosen automatically, with 'zoom' focusing on
a region encompassing the probability maximum, and 'all'
encompassing all the points in the representation
qmax : float | listlike | 'zoom' | 'all'
same as qmin, but for the maximum of the output grid
grid : listlike of arrays
set of values defining the grid on which the PDF is to
be evaluated, in the same format used by meshgrid
norm : bool
if True, returned pdf's will be normalized to integrate
to 1
Returns:
grid_out : array
array of values at which the PDF is evaluated; contents
are the same as returned by meshgrid
pdf : array
array of marginal posterior probabilities at each point
of the output grid, for each input cluster; the leading
dimensions match the leading dimensions produced by
broadcasting the leading dimensions of photprop and
photerr together, while the trailing dimensions match
the dimensions of the output grid
"""
# Set up input and output dimensions
if dims == 'phys':
dim_in = np.arange(self.__nphys)
elif dims == 'phot':
dim_in = np.arange(self.__nphot) + self.__nphys
else:
dim_in = dims
if dims_return is None:
dim_out = dim_in
elif not hasattr(dims_return, '__iter__'):
dim_out = [dims_return]
if not dim_out in dim_in:
raise ValueError("dims_return must be a subset of dims!")
elif set(dims_return) <= set(dim_in):
dim_out = dims_return
else:
raise ValueError("dims_return must be a subset of dims!")
nidx = len(dim_out)
# Set up the output grid
if grid is not None:
grid_out = np.meshgrid(grid)
qmin \
= np.array(
np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
dtype=np.double)
qmax \
= np.array(
np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
dtype=np.double)
ngrid_tmp = np.squeeze(grid_out).shape
for i in range(len(dim_out)):
griddims.append(np.linspace(qmin[i], qmax[i],
ngrid_tmp[i]))
else:
if qmin == 'zoom' or qmin == 'all' or \
qmax == 'zoom' or qmax == 'all':
if qmin is 'zoom' or qmin == 'all':
get_qmin = True
qmin = []
else:
get_qmin = False
if qmax == 'zoom' or qmax == 'all':
qmax = []
get_qmax = True
else:
get_qmax = False
for d in dim_out:
xtmp = np.copy(x[:,d])
wgttmp = np.copy(wgts)
idx = np.argsort(xtmp)
xtmp = xtmp[idx]
wgttmp = wgttmp[idx]
wgtsum = np.cumsum(wgttmp)
if get_qmin:
if qmin == 'all':
qmin.append(xtmp[0])
else:
qmin.append(xtmp[np.argmax(wgtsum > 0.001)])
if get_qmax:
if wgtsum[-1] > 0.999 and qmax is not 'all':
qmax.append(xtmp[np.argmax(wgtsum >
0.999)])
else:
qmax.append(xtmp[-1])
if get_qmin:
qmin = np.array(qmin)
if get_qmax:
qmax = np.array(qmax)
griddims = []
if nidx > 1:
# Case for multiple indices
griddims = []
if hasattr(ngrid, '__len__'):
ngrid_tmp = np.array(ngrid)
else:
ngrid_tmp = np.array([ngrid]*len(dim_out))
for i in range(len(dim_out)):
griddims.append(np.linspace(qmin[i], qmax[i],
ngrid_tmp[i]))
grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
indexing='ij')))
else:
# Case for a single index
grid_out = qmin + \
np.arange(ngrid) * \
float(qmax-qmin)/(ngrid-1)
griddims = [grid_out]
# Compute result; in the multi-dimensional case this involves
# some tricky array indexing
if nidx == 1:
pdf = np.einsum('i,ij', wgts,
np.exp(-np.subtract.outer(
x[:,dim_out[0]], grid_out)**2 /
(2*self.__bandwidth[dim_out[0]]**2)))
else:
compsum = np.exp(
-np.subtract.outer(x[:,dim_out[0]], griddims[0])**2 /
(2.0*self.__bandwidth[dim_out[0]]**2))
for d, grd in zip(dim_out[1:], griddims[1:]):
comp = np.exp(-np.subtract.outer(x[:,d], grd)**2 / \
(2.0*self.__bandwidth[d]**2))
compsum = np.einsum('...j,...k->...jk', compsum, comp)
pdf = np.einsum('i,i...', wgts, compsum)
# Normalize if requested
if norm:
# Compute the sizes of the output cells
if nidx == 1:
cellsize = np.zeros(grid_out.size)
cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
cellsize[0] = grid_out[1] - grid_out[0]
cellsize[-1] = grid_out[-1] - grid_out[-2]
else:
# Get the cell sizes in each dimension
csize = []
for i in range(nidx):
vec = grid_out[(i,)+i*(0,)+(slice(None),) +
(grid_out.shape[0]-i-1)*(0,)]
csize.append(np.zeros(vec.size))
csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
csize[i][0] = vec[1] - vec[0]
csize[i][-1] = vec[-1] - vec[-2]
# Take outer product to get grid of sizes
cellsize = np.multiply.outer(csize[0], csize[1])
for i in range(2, nidx):
cellsize = np.multiply.outer(cellsize, csize[i])
# Compute integral
normfac = np.sum(pdf*cellsize)
# Normalize
pdf = pdf/normfac
# Return
return grid_out, pdf
##################################################################
# Routines to draw from the kernel density PDF, constraining
# dimensions or not
##################################################################
[docs] def draw_sample(self, photerr=None, nsample=1):
"""
Returns a randomly-drawn sample of physical and photometric
properties from the kernel density function
Parameters:
nsample : int
number of random samples to draw for each set of
physical properties; must be positive
photerr : arraylike, shape (nphot)
photometric errors to apply to the output photometry;
these are added in quadrature with the kernel density
estimation bandwidth
Returns:
samples : array, shape (nsample, nphys+nphot)
random sample drawn from the kernel density object; for
the final dimension in the output, the first nphys
elements are the physical quantities, the next nphot are
the photometric quantities
"""
# Safety check
if (nsample < 1):
raise ValueError("need to draw at least 1 sample")
# Allocate a random number generator from gsl if we do not
# already have one
if not hasattr(self, '__rng'):
self.__rng = self.__clib.rng_init(0)
# Allocate array to hold outputs
samples = np.zeros((nsample, self.__nphys+self.__nphot),
dtype=c_double)
# Apply photometric errors if requested
if photerr is None:
kd_tmp = self.__kd
else:
kd_tmp = self.__change_bw_err(photerr)
# Call library function
self.__clib.kd_pdf_draw(kd_tmp, None, None, 0,
nsample, 1, self.__rng,
np.ravel(samples))
# Restore kernel density bandwidth
if photerr is not None:
self.__restore_bw_err(kd_tmp)
# Return samples
return samples
[docs] def draw_phot(self, physprop, physidx=None, photerr=None,
nsample=1):
"""
Returns a randomly-drawn sample of photometric properties for
one or more input sets of physical properties.
Parameters:
physprop : arraylike
physical properties to be used; the final dimension of
the input must have len(physidx) indices, or nphys
indicates if physidx is None; if the input is a
multidimensional array, the operation is vectorized over
the leading dimensions physical properties
physidx : arraylike
indices of the physical quantities being constrained; if
left as None, all physical properties are set, and
physprop must have a trailing dimension of size equal to
nphys; otherwise this must be an arraylike of <= nphys
positive integers, each unique and in the range [0,
nphys), specying which physical dimensions are
constrained
photerr : arraylike, shape (nphot)
photometric errors to apply to the output photometry;
these are added in quadrature with the kernel density
estimation bandwidth
nsample : int
number of random samples to draw for each set of
physical properties; must be positive
Returns:
samples : arraylike
sample of photometric properties; the shape of the
output is (..., nsample, nphot), where nphot is the
number of photometric properties, and the leading
dimension(s) match the leading dimension(s) of physprop
"""
# Safety check and sanitization on inputs
if (nsample < 1):
raise ValueError("need to draw at least 1 sample")
if physidx is None:
physidx_ = np.arange(self.__nphys, dtype=c_ulong)
else:
physidx_ = np.flatten(np.array(physidx_))
if (len(physidx_) > self.__nphys) or \
(len(physidx_) != len(np.unique(physidx))) or \
(np.amin(physidx_) < 0) or \
(np.amax(physidx_) >= self.__nphys):
raise ValueError("physidx must be an array of "
"unique indices from 0 to {:d}".
format(self.__nphys-1))
physprop_ = np.array(physprop)
if physprop_.shape[-1] != physidx_.size:
raise ValueError("physidx must have as many elements as "
"trailing dimension of physprop")
# Set up array to hold outputs
if physprop_.ndim == 1:
physprop_ = physprop_.reshape((1,)+physprop_.shape)
sample_dim = physprop_.shape[:-1]
samples = np.zeros(sample_dim + (nsample, self.__nphot))
# Apply photometric errors if requested
if photerr is None:
kd_tmp = self.__kd
else:
kd_tmp = self.__change_bw_err(photerr)
# Allocate a random number generator from gsl if we do not
# already have one
if not hasattr(self, '__rng'):
self.__rng = self.__clib.rng_init(0)
# Loop over leading dimensions of input physical properties
for i in np.ndindex(*physprop_.shape[:-1]):
# Call library function
self.__clib.kd_pdf_draw(
kd_tmp,
physprop_[i].ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
physidx_.ctypes.data_as(ctypes.POINTER(c_ulong)),
physidx_.size,
nsample, 0,
self.__rng,
np.ravel(samples[i]))
# Restore kernel density bandwidth
if photerr is not None:
self.__restore_bw_err(kd_tmp)
# Reshape output if needed
if physprop_.ndim == 1:
samples = samples[0]
# Return samples
return samples