Source code for slugpy.bayesphot.bp

"""
This defines a class that can be used to estimate the PDF of physical
quantities from a set of input photometry in various bands, together
with a training data set.
"""

import numpy as np
import scipy.interpolate as interp
import os
import os.path as osp
import ctypes
from ctypes import POINTER
from ctypes import c_void_p
from ctypes import c_int
from ctypes import c_uint
from ctypes import c_ulong
from ctypes import c_double
from ctypes import c_bool
import numpy.ctypeslib as npct
from copy import deepcopy
from warnings import warn
try:
    import emcee
    mc_avail = True
except:
    mc_avail = False
    pass

##################################################################
# Define some types for use later                                #
##################################################################
array_1d_double = npct.ndpointer(dtype=c_double, ndim=1,
                                 flags="CONTIGUOUS")
array_1d_ulong = npct.ndpointer(dtype=c_ulong, ndim=1,
                               flags="CONTIGUOUS")
array_1d_int = npct.ndpointer(dtype=c_int, ndim=1,
                              flags="CONTIGUOUS")

##################################################################
# Define the cluster_slug class                                  #
##################################################################


[docs]
class bp(object):
    """
    A class that can be used to estimate the PDF of the physical
    properties of stellar population from a training set plus a set of
    measured photometric values.

    Properties
       priors : array, shape (N) | callable | None
          prior probability on each data point; interpretation
          depends on the type passed; array, shape (N): values are
          interpreted as the prior probability of each data point;
          callable: the callable must take as an argument an array
          of shape (N, nphys), and return an array of shape (N)
          giving the prior probability at each data point; None:
          no reweighting is performed, so all data points in the library
          have equal prior probability, i.e. the prior is the same as
          the sampling of points in the library
       pobs : array, shape (N) | callable | None
          the probability that a particular object would be observed,
          which is used, like prior, to weight the library;
          interpretation depends on type. None means all objects are
          equally likely to be observed, array is an array giving the
          observation probability of each object in the library, and
          callable means must be a function that takes an array
          containing the photometry, of shape (N, nhpot), as an
          argument, and returns an array of shape (N) giving the
          probability of observation for that object
       bandwidth : 'auto' | float | array, shape (M)
          bandwidth for kernel density estimation; if set to
          'auto', the bandwidth will be estimated automatically; if
          set to a scalar quantity, the same bandwidth is used for all
          dimensions
       nphys : int
          number of physical properties in the library
       nphot : int
          number of photometric properties in the library
       ndim : int
          nphys + nphot
    """

    ##################################################################
    # Initializer method
    ##################################################################

[docs]
    def __init__(self, dataset, nphys, filters=None, bandwidth='auto',
                 ktype='gaussian', priors=None, pobs=None,
                 sample_density=None, reltol=1.0e-2, abstol=1.0e-10,
                 leafsize=16, nosort=None, thread_safe=True,
                 caching='none'):
        """
        Initialize a bp object.

        Parameters
           dataset : array, shape (N, M)
              training data set; this is a set of N sample stellar
              populations, having M properties each; the first nphys
              represent physical properties (e.g., log mass, log age),
              while the next M - nphys represent photometric
              properties
           nphys : int
              number of physical properties in dataset
           filters : listlike of strings
              names of photometric filters; not used, but can be
              stored for convenience
           bandwidth : 'auto' | float | array, shape (M)
              bandwidth for kernel density estimation; if set to
              'auto', the bandwidth will be estimated automatically; if
              set to a scalar quantity, the same bandwidth is used for all
              dimensions
           ktype : string
              type of kernel to be used in densty estimation; allowed
              values are 'gaussian' (default), 'epanechnikov', and
              'tophat'; only Gaussian can be used with error bars
           priors : array, shape (N) | callable | None
              prior probability on each data point; interpretation
              depends on the type passed; array, shape (N): values are
              interpreted as the prior probability of each data point;
              callable: the callable must take as an argument an array
              of shape (N, nphys), and return an array of shape (N)
              giving the prior probability at each data point; None:
              all data points have equal prior probability
           pobs : array, shape (N) | callable | None
              the probability that a particular object would be observed,
              which is used, like prior, to weight the library;
              interpretation depends on type. None means all objects are
              equally likely to be observed, array is an array giving the
              observation probability of each object in the library, and
              callable means must be a function that takes an array
              containing the photometry, of shape (N, nhpot), as an
              argument, and returns an array of shape (N) giving the
              probability of observation for that object
           sample_density : array, shape (N) | callable | 'auto' | None
              the density of the data samples at each data point; this
              need not match the prior density; interpretation depends
              on the type passed; array, shape (N): values are
              interpreted as the density of data sampling at each
              sample point; callable: the callable must take as an
              argument an array of shape (N, nphys), and return an
              array of shape (N) giving the sampling density at each
              point; 'auto': the sample density will be computed
              directly from the data set; note that this can be quite
              slow for large data sets, so it is preferable to specify
              this analytically if it is known; None: data are assumed
              to be uniformly sampled
           reltol : float
              relative error tolerance; errors on all returned
              probabilities p will satisfy either
              abs(p_est - p_true) <= reltol * p_est   OR
              abs(p_est - p_true) <= abstol,
              where p_est is the returned estimate and p_true is the
              true value
           abstol : float
              absolute error tolerance; see above
           leafsize : int
              number of data points in each leaf of the KD tree
           nosort : arraylike of bool, shape (N) | None
              if specified, this keyword causes the KD tree not to be
              sorted along the dimensions for which nosort is True
           thread_safe : bool
              if True, bayesphot will make extra copies of internals
              as needed to ensure thread safety when the computation
              routines (logL, mpdf, mcmc, bestmatch, make_approx_phot,
              make_approx_phys, mpdf_approx) are used with
              multiprocessing; this incurs a minor performance
              penalty, and can be disabled by setting to False if the
              code will not be run with the multiprocessing module
           caching : 'aggressive' | 'lazy' | 'none'
              strategy for caching subsets of the data with some
              dimensions marginalised out; behavior is as follows:

                 'agressive'
                    on construction, store sorted data for fast
                    calculation of 1D PDFs of variables by themselves,
                    and 1D PDFs of all physical variables marginalised
                    over all other physical variables; this
                    significantly increases the memory footprint and
                    construction time, but greatly speeds up
                    subsequent evaluation of any of these quantities,
                    and is generally the best choice for prudction
                    work in parallel
                 'lazy'
                    sorted data sets for fast computation are created
                    and cached as needed; this will make the first
                    computation of any marginal PDF slower, but speed
                    up all subsequent ones, without imposing any
                    extra time at initial construction; this mode is
                    generally best for interactive work, but is not
                    thread_safe = True; memory cost depends on how
                    many different marginal PDF combinations are
                    calculated, but is always less than aggressive
                 'none'
                    no caching is performed automatically; the user
                    may still manually cache data by calling
                    the make_cache method

        Returns
           Nothing

        Raises
           IOError, if the bayesphot c library cannot be found

        Notes
           Because the data sets passed in may be large, this class
           does not make copies of any of its arguments, and instead
           modifies them in place. However, this means it is the
           responsibility of the user not to alter the any of the
           arguments once they are passed to this class; for example,
           dataset must not be modified after it is passed. Altering the
           arguments, except through this class's methods, may cause
           incorrect results to be generated.

           Caching with 'aggressive' or 'lazy' does create significant
           memory overhead.
        """
        
        # Load the c library
        self.__clib = npct.load_library("bayesphot", 
                                        osp.realpath(__file__))

        # Check for diagnostic mode
        self.__clib.diagnostic_mode.restype = c_bool
        self.__clib.diagnostic_mode.argtypes = None
        self.__diag_mode = bool(self.__clib.diagnostic_mode())

        # Define interfaces to all the c library functions
        self.__clib.build_kd.restype = c_void_p
        self.__clib.build_kd.argtypes \
            = [ array_1d_double,   # x
                c_ulong,           # ndim
                c_ulong,           # npt
                ctypes.
                POINTER(c_double), # wgt
                c_ulong,           # leafsize
                array_1d_double,   # bandwidth
                c_int,             # ktype
                c_ulong,           # minsplit
                POINTER(c_ulong) ] # sortmap

        self.__clib.build_kd_sortdims.restype = c_void_p
        self.__clib.build_kd_sortdims.argtypes \
            = [ array_1d_double,   # x
                c_ulong,           # ndim
                c_ulong,           # npt
                ctypes.
                POINTER(c_double), # wgt
                c_ulong,           # leafsize
                array_1d_double,   # bandwidth
                c_int,             # ktype
                array_1d_int,      # nosort
                POINTER(c_ulong) ] # sortmap

        self.__clib.copy_kd.restype = c_void_p
        self.__clib.copy_kd.argtypes = [ c_void_p ] # kd

        self.__clib.free_kd.restype = None
        self.__clib.free_kd.argtypes = [ c_void_p ]

        self.__clib.free_kd_copy.restype = None
        self.__clib.free_kd_copy.argtypes = [ c_void_p ]

        self.__clib.kd_change_wgt.restype = None
        self.__clib.kd_change_wgt.argtypes \
            = [ POINTER(c_double), # wgt
                c_void_p ]         # kd

        self.__clib.kd_change_bandwidth.restype = None
        self.__clib.kd_change_bandwidth.argtypes \
            = [ array_1d_double,   # bandwidth
                c_void_p ]         # kd

        self.__clib.kd_neighbors.restype = None
        self.__clib.kd_neighbors.argtypes \
            = [ c_void_p,          # kd
                array_1d_double,   # xpt
                POINTER(c_ulong),  # dims
                c_ulong,           # ndim
                c_ulong,           # nneighbor
                c_bool,            # bandwidth_units
                array_1d_double,   # pos
                POINTER(c_double), # dptr
                array_1d_double ]  # d2
        self.__clib.kd_neighbors_vec.restype = None
        self.__clib.kd_neighbors_vec.argtypes \
            = [ c_void_p,          # kd
                array_1d_double,   # xpt
                POINTER(c_ulong),  # dims
                c_ulong,           # ndim
                c_ulong,           # npt
                c_ulong,           # nneighbor
                c_bool,            # bandwidth_units
                array_1d_double,   # pos
                POINTER(c_double), # dptr
                array_1d_double ]  # d2

        self.__clib.kd_neighbors_point.restype = None
        self.__clib.kd_neighbors_point.argtypes \
            = [ c_void_p,          # kd
                c_ulong,           # idxpt
                c_ulong,           # nneighbor
                c_bool,            # bandwidth_units
                array_1d_ulong,     # idx
                array_1d_double ]  # d2
        self.__clib.kd_neighbors_point_vec.restype = None
        self.__clib.kd_neighbors_point_vec.argtypes \
            = [ c_void_p,          # kd
                array_1d_ulong,     # idxpt
                c_ulong,           # npt
                c_ulong,           # nneighbor
                c_bool,            # bandwidth_units
                array_1d_ulong,     # idx
                array_1d_double ]  # d2

        self.__clib.kd_neighbors_all.restype = None
        self.__clib.kd_neighbors_all.argtypes \
            = [ c_void_p,          # kd
                c_ulong,           # nneighbor
                c_bool,            # bandwidth_units
                array_1d_ulong,     # idx
                array_1d_double ]  # d2

        self.__clib.kd_pdf.restype = c_double
        if self.__diag_mode:
            self.__clib.kd_pdf.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    c_double,          # reltol
                    c_double,          # abstol
                    POINTER(c_ulong),  # nodecheck
                    POINTER(c_ulong),  # leafcheck
                    POINTER(c_ulong) ] # termcheck
        else:
            self.__clib.kd_pdf.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    c_double,          # reltol
                    c_double ]         # abstol

        self.__clib.kd_pdf_grid.restype = None
        self.__clib.kd_pdf_grid.argtypes \
            = [ c_void_p,              # kd
                array_1d_double,       # xfixed
                array_1d_ulong,         # dimfixed
                c_ulong,               # ndimfixed
                c_ulong,               # nfixed
                array_1d_double,       # xgrid
                array_1d_ulong,         # dimgrid
                c_ulong,               # ndimgrid
                c_ulong,               # ngrid
                c_double,              # reltol
                c_double,              # abstol
                array_1d_double ]      # pdf
        self.__clib.kd_pdf_reggrid.restype = None
        self.__clib.kd_pdf_reggrid.argtypes \
            = [ c_void_p,              # kd
                array_1d_double,       # xfixed
                array_1d_ulong,         # dimfixed
                c_ulong,               # ndimfixed
                c_ulong,               # nfixed
                array_1d_double,       # xgridlo
                array_1d_double,       # xgridhi
                array_1d_ulong,         # ngrid
                array_1d_ulong,         # dimgrid
                c_ulong,               # ndimgrid
                c_double,              # reltol
                c_double,              # abstol
                array_1d_double ]      # pdf

        self.__clib.kd_pdf_vec.restype = None
        if self.__diag_mode:
            self.__clib.kd_pdf_vec.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    POINTER(c_double), # bandwidth
                    c_ulong,           # npt
                    c_double,          # reltol
                    c_double,          # abstol
                    array_1d_double,   # pdf
                    array_1d_ulong,     # nodecheck
                    array_1d_ulong,     # leafcheck
                    array_1d_ulong ]    # termcheck
        else:
            self.__clib.kd_pdf_vec.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    POINTER(c_double), # bandwidth
                    c_ulong,           # npt
                    c_double,          # reltol
                    c_double,          # abstol
                    array_1d_double ]  # pdf
        self.__clib.kd_pdf_int.restype = c_double
        if self.__diag_mode:
            self.__clib.kd_pdf_int.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    array_1d_ulong,    # dims
                    c_ulong,           # ndim
                    c_double,          # reltol
                    c_double,          # abstol
                    array_1d_ulong,    # nodecheck
                    array_1d_ulong,    # leafcheck
                    array_1d_ulong ]   # termcheck
        else:
            self.__clib.kd_pdf_int.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    array_1d_ulong,    # dims
                    c_ulong,           # ndim
                    c_double,          # reltol
                    c_double ]         # abstol
        self.__clib.kd_pdf_int_vec.restype = None
        if self.__diag_mode:
            self.__clib.kd_pdf_int_vec.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    POINTER(c_double), # bandwidth
                    array_1d_ulong,    # dims
                    c_ulong,           # ndim
                    c_ulong,           # npt
                    c_double,          # reltol
                    c_double,          # abstol
                    array_1d_double,   # pdf
                    array_1d_ulong,    # nodecheck
                    array_1d_ulong,    # leafcheck
                    array_1d_ulong ]   # termcheck
        else:
            self.__clib.kd_pdf_int_vec.argtypes \
                = [ c_void_p,          # kd
                    array_1d_double,   # x
                    POINTER(c_double), # bandwidth
                    array_1d_ulong,    # dims
                    c_ulong,           # ndim
                    c_ulong,           # npt
                    c_double,          # reltol
                    c_double,          # abstol
                    array_1d_double ]  # pdf
        self.__clib.kd_pdf_int_grid.restype = None
        self.__clib.kd_pdf_int_grid.argtypes \
            = [ c_void_p,              # kd
                array_1d_double,       # xfixed
                array_1d_ulong,        # dimfixed
                c_ulong,               # ndimfixed
                c_ulong,               # nfixed
                array_1d_double,       # xgrid
                array_1d_ulong,        # dimgrid
                c_ulong,               # ndimgrid
                c_ulong,               # ngrid
                c_double,              # reltol
                c_double,              # abstol
                array_1d_double ]      # pdf
        self.__clib.kd_pdf_int_reggrid.restype = None
        self.__clib.kd_pdf_int_reggrid.argtypes \
            = [ c_void_p,              # kd
                array_1d_double,       # xfixed
                array_1d_ulong,        # dimfixed
                c_ulong,               # ndimfixed
                c_ulong,               # nfixed
                array_1d_double,       # xgridlo
                array_1d_double,       # xgridhi
                array_1d_ulong,        # ngrid
                array_1d_ulong,        # dimgrid
                c_ulong,               # ndimgrid
                c_double,              # reltol
                c_double,              # abstol
                array_1d_double ]      # pdf                

        self.__clib.kd_rep.restype = c_ulong
        self.__clib.kd_rep.argtypes \
            = [ c_void_p,              # kd
                array_1d_double,       # x
                array_1d_ulong,        # dims
                c_ulong,               # ndim
                c_double,              # reltol
                POINTER(c_ulong),      # dim_return
                c_ulong,               # ndim_return
                POINTER(POINTER(
                    c_double)),        # xpt
                POINTER(POINTER(
                    c_double)) ]       # wgts
        self.__clib.free_kd_rep.restype = None
        self.__clib.free_kd_rep.argtypes \
            = [ POINTER(POINTER(
                c_double)),            # xpt
               POINTER(POINTER(
                   c_double)) ]        # wgts
        self.__clib.squeeze_rep.restype = c_ulong
        self.__clib.squeeze_rep.argtypes \
            = [ c_ulong,               # npts
                c_ulong,               # ndim
                array_1d_double,       # h
                c_double,              # tol
                POINTER(POINTER(
                    c_double)),        # xpt
                POINTER(POINTER(
                    c_double)) ]       # wgts

        self.__clib.kd_pdf_draw.restype = None
        self.__clib.kd_pdf_draw.argtypes \
            = [ c_void_p,              # kd
                POINTER(c_double),     # x
                POINTER(c_ulong),      # dims
                c_ulong,               # ndim
                c_ulong,               # nsample
                c_int,                 # draw_method
                c_void_p,              # rng
                array_1d_double ]      # out
        self.__clib.rng_init.restype = c_void_p
        self.__clib.rng_init.argtypes \
            = [ c_ulong ]              # seed
        self.__clib.rng_free.restype = None
        self.__clib.rng_free.argtypes \
            = [ c_void_p ]             # r

        # Record some of the input parameters
        if (ktype == 'gaussian'):
            self.__ktype = 2
        elif (ktype == 'epanechnikov'):
            self.__ktype = 0
        elif (ktype == 'tophat'):
            self.__ktype = 1
        self.leafsize = leafsize
        self.__abstol = abstol
        self.__reltol = reltol
        self.thread_safe = thread_safe

        # Store data set
        self.__dataset = np.ascontiguousarray(dataset)

        # Store list of available filters
        self.__filters = deepcopy(filters)

        # Initialize internal data
        self.__ndata = self.__dataset.shape[0]
        self.__nphys = nphys
        self.__nphot = self.__dataset.shape[1] - self.__nphys
        self.__auto_bw = None
        self.__auto_bw_set = False
        self.__priors = None
        self.__pobs = None
        self.__prior_data = None
        self.__pobs_data = None
        self.__kd_phys = None
        self.__cache = []

        # Build the initial kernel density estimation object, using a
        # dummy bandwidth; record mapping from indices of input data
        # to sorted indices, in case we need it to interpret arrays of
        # sample density, observation probability, or prior
        self.__bandwidth = np.ones(self.__nphys + self.__nphot)
        self.__idxmap = np.zeros(self.__ndata, dtype=c_ulong)
        if nosort is None:
            self.__kd = self.__clib.build_kd(
                np.ravel(self.__dataset), self.__dataset.shape[1],
                self.__ndata, None, self.leafsize, self.__bandwidth,
                self.__ktype, 0,
                self.__idxmap.ctypes.data_as(POINTER(c_ulong)))
        else:
            nosort_c = np.zeros(nosort.shape, dtype=np.intc)
            nosort_c[:] = nosort == True
            self.__kd = self.__clib.build_kd_sortdims(
                np.ravel(self.__dataset), self.__dataset.shape[1],
                self.__ndata, None, self.leafsize, self.__bandwidth,
                self.__ktype, nosort_c,
                self.__idxmap.ctypes.data_as(POINTER(c_ulong)))
        self.__idxmap_inv = np.argsort(self.__idxmap)

        # Store sample density
        if hasattr(sample_density, '__iter__'):
            self.__sden = np.array(sample_density)[self.__idxmap]
        else:
            self.__sden = sample_density
        self.__sample_density = None
        dummy = self.sample_density

        # Initialize the bandwidth
        self.bandwidth = bandwidth

        # Set priors and observation probability
        self.priors = priors
        self.pobs = pobs

        # Set the caching policy, and, if aggressive, construct the
        # data caches
        self.caching = caching
        if caching == 'lazy' and thread_safe:
            warn(
                "bp: cannot use lazy caching in thread safe"
                " mode; caching policy changed to 'none'")
            self.caching == 'none'
        if self.caching == 'aggressive':
            # Construct cached data sets for calculation of PDFs over
            # 1 dimension with all others marginalised out
            for d in range(self.ndim):
                margindims = list(range(0,d)) + \
                             list(range(d+1, self.ndim))
                self.make_cache(margindims)
            # Construct cached data sets for calculation of 1d
            # marginal PDFs of physical variables over photometric
            # ones
            if self.__nphys > 1:
                for d in range(self.__nphys):
                    margindims = list(range(0,d)) + \
                                 list(range(d+1, self.__nphys))
                    self.make_cache(margindims)



    ##################################################################
    # De-allocation method
    ##################################################################
    def __del__(self):
        if hasattr(self, '__kd'):
            if self.__kd is not None:
                self.__clib.free_kd(self.__kd)
        if hasattr(self, '__kd_phys'):
            if self.__kd_phys is not None:
                self.__clib.free_kd(self.__kd_phys)
        if hasattr(self, '__rng'):
            self.__clib.rng_free(self.__rng)


    ##################################################################
    # Return a copy of the list of available filters
    ##################################################################
    def filters(self):
        return deepcopy(self.__filters)
    
    ##################################################################
    # Property to get the sampling density
    ##################################################################

    @property
    def sample_density(self):
        """
        The density with which the library was sampled, evaluated for
        each simulation in the library
        """
        
        if self.__sample_density is None:

            # Choose computation method
            if hasattr(self.__sden, '__call__'):

                # Callable, so pass the physical data to the
                # callable and store the result
                self.__sample_density \
                    = self.__sden(self.__dataset[:,:self.__nphys])

            elif type(self.__sden) is np.ndarray or self.__sden is None:

                # Array, so treat treat this as the data
                self.__sample_density = self.__sden

            elif self.__sden == 'auto':

                # We've been asked to calculate the sample density
                # ourselves, so do so

                # Create unweighted kernel density object for just
                # the physical parameters if we have not done so
                # already
                if self.__kd_phys is None:
                    self.__dataset_phys \
                        = np.copy(self.__dataset[:,:self.__nphys])
                    self.__kd_phys \
                        = self.__clib.build_kd(
                            np.ravel(self.__dataset_phys), 
                            self.__nphys, self.__ndata,
                            None, self.leafsize, self.__bandwidth,
                            self.__ktype, 0)

                    # Use the unweighted kernel density object to
                    # evaluate the raw sample density near each data
                    # point, or near a sub-sample which we can
                    # interpolate from
                    self.__sample_density = np.zeros(self.__ndata)
                    nsamp = 500
                    if self.__ndata < nsamp:
                        # Few data points, just use them all; note
                        # that we cannot pass self.__dataset_phys,
                        # because it is not in the same order as
                        # the full data set anymore
                        pts = np.ravel(self.__dataset[:,:self.__nphys])
                        if not self.__diag_mode:
                            self.__clib.kd_pdf_vec(
                                self.__kd_phys, pts, None,
                                self.__ndata, self.reltol, self.abstol,
                                self.__sample_density)
                        else:
                            nodecheck = np.zeros(self.__ndata, dtype=c_ulong)
                            leafcheck = np.zeros(self.__ndata, dtype=c_ulong)
                            termcheck = np.zeros(self.__ndata, dtype=c_ulong)
                            self.__clib.kd_pdf_vec(
                                self.__kd_phys, pts, None,
                                self.__ndata, self.reltol, self.abstol,
                                self.__sample_density_sorted, nodecheck, 
                                leafcheck, termcheck)
                    else:
                        # Many data points, so choose a sample at random
                        idxpt = np.array(
                            np.random.sample(np.arange(self.__ndata), 
                                             nsamp), dtype=c_ulong)
                        pos = np.copy(self.__dataset_phys[idxpt,:])
                        # Add points at the edges of the dataset
                        # to ensure we enclose all the points
                        lowlim = np.amin(self.__dataset_phys,
                                         axis=0)
                        pos = np.append(pos, lowlim)
                        hilim = np.amax(self.__dataset_phys,
                                        axis=0)
                        pos = np.append(pos, hilim)
                        # Compute density at selected points
                        sample_density = np.zeros(nsamp+2)
                        if not self.__diag_mode:
                            self.__clib.kd_pdf_vec(
                                self.__kd_phys, np.ravel(pos), None,
                                nsamp+2, self.reltol, self.abstol,
                                sample_density)
                        else:
                            nodecheck = np.zeros(nsamp+2, dtype=c_ulong)
                            leafcheck = np.zeros(nsamp+2, dtype=c_ulong)
                            termcheck = np.zeros(nsamp+2, dtype=c_ulong)
                            self.__clib.kd_pdf_vec(
                                self.__kd_phys, np.ravel(pos), None,
                                nsamp+2, self.reltol, self.abstol,
                                sample_density, nodecheck, 
                                leafcheck, termcheck)
                        # Now interpolate the sample points to all
                        # points in the data set
                        pts = np.ravel(self.__dataset[:,:self.__nphys])
                        self.__sample_density \
                            = np.exp(
                                interp.griddata(pos, 
                                                np.log(sample_density),
                                                pts,
                                                method='linear')).flatten()
                        
        # Return result, sorted back into the order of the original
        # data
        if self.__sample_density is not None:
            return self.__sample_density[self.__idxmap_inv]
        else:
            return None
    

    @sample_density.setter
    def sample_density(self, sden):
        """
        This function sets the sampling density for the library

        Parameters
           sden : array, shape (N) | callable | None
              sample density each data point; interpretation depends
              on the type passed:
                 array, shape (N) : 
                    values are interpreted as the sampling density at
                    each data point in the dataset used to construct
                    the bp object
                 callable : 
                    the callable must take as an argument an array of
                    shape (N, nphys), and return an array of shape (N)
                    giving the prior probability at each data point
                 None :
                    all data points have equal prior probability

        Returns
           Nothing
        """
        # Erase current sample density information, then force
        # recomputation
        self.__sample_density = None
        if hasattr(sden, '__iter__'):
            # If passed an array, assume it is ordered as was the
            # original data set, not our re-ordered version, so
            # re-order before storing
            self.__sden = np.array(sden)[self.__idxmap]
        else:
            self.__sden = sden
        dummy = self.sample_density

        # Update any cached objects as well
        for c in self.__cache:
            c['bp'].sample_density = dummy
            

    ##################################################################
    # Define the priors and pobs properties
    ##################################################################

    @property
    def priors(self):
        """
        The current set of prior probabilities for every
        simulation in the library; data returned are in the same order
        as the data originally used to construct the library
        """
        if self.__priors is None:
            return None
        else:
            return self.__prior_data[self.__idxmap_inv]

    @priors.setter
    def priors(self, prior):
        """
        This function sets the prior probabilities to use

        Parameters
           prior : array, shape (N) | callable | None
              prior probability on each data point; interpretation
              depends on the type passed:
                 array, shape (N) : 
                    values are interpreted as the prior probability of
                    each data point in the dataset used to construct
                    the bp object
                 callable : 
                    the callable must take as an argument an array of
                    shape (N, nphys), and return an array of shape (N)
                    giving the prior probability at each data point
                 None :
                    all data points have equal prior probability

        Returns
           Nothing
        """

        # If prior is an arraylike, sort it using our index map
        if hasattr(prior, '__iter__'):
            pr = np.array(prior)[self.__idxmap]
        else:
            pr = prior

        # If the prior is unchanged, do nothing
        if (type(pr) == np.ndarray) and \
           (type(self.__priors) == np.ndarray):
            if np.array_equal(pr, self.__priors):
                return
        elif (type(pr) == type(self.__priors)) and \
             (pr == self.__priors):
            return

        # If priors and pobs are both None, just remove all weighting
        if pr is None and self.pobs is None:
            self.__clib.kd_change_wgt(None, self.__kd)
            self.__priors = None
            self.__prior_data = None
            for c in self.__cache:
                c['bp'].priors = None
            return

        else:
            # If we're here, we have a non-trival prior or pobs;
            # record the new prior, and, if it is a callable, call it
            self.__priors = pr
            if hasattr(self.__priors, '__call__'):
                self.__prior_data \
                    = self.__priors(self.__dataset[:,:self.__nphys]) \
                          .flatten()
            else:
                self.__prior_data = self.__priors

            # Compute the weights from the ratio of the prior times
            # the observation probability to the sample density, then
            # adjust the weights in the kd; note that a bit of
            # trickery is required, because the sample_density property
            # returns the sample density sorted in the original data
            # order, not in the sorted order, which is what we need
            dummy = self.sample_density    # Force re-computation
            wgt = np.ones(self.__ndata)
            if self.__sample_density is not None:
                wgt *= 1.0 / self.__sample_density
            if self.__prior_data is not None:
                wgt *= self.__prior_data
            if self.__pobs_data is not None:
                wgt *= self.__pobs_data
            self.__wgt = wgt
            self.__clib.kd_change_wgt(self.__wgt.ctypes.data_as(
                POINTER(c_double)), self.__kd)

            # Updated cached bp objects
            for c in self.__cache:
                c['bp'].priors = self.__prior_data


    @property
    def pobs(self):
        """
        The current set of observation probabilities for every
        simulation in the library; data returned are in the same order
        as the data originally used to construct the library
        """
        if self.__pobs_data is None:
            return None
        else:
            return self.__pobs_data[self.__idxmap_inv]

    @pobs.setter
    def pobs(self, p_obs):
        """
        This function sets the observation probabilities to use

        Parameters
           p_obs : array, shape (N) | callable | None
              probability of observation for each data point;
              interpretation depends on the type passed:
                 array, shape (N) : 
                    values are interpreted as the probability of
                    observation each data point in the dataset used to
                    construct the bp object
                 callable : 
                    the callable must take as an argument an array of
                    shape (N, nphot), and return an array of shape (N)
                    giving the probability of observation for each
                    data point
                 None :
                    all data points have equal probability of
                    observation

        Returns
           Nothing
        """

        # If prior is an arraylike, sort it using our index map
        if hasattr(p_obs, '__iter__'):
            po = np.array(p_obs)[self.__idxmap]
        else:
            po = p_obs

        # If the observation probability is unchanged, do nothing
        if (type(po) == np.ndarray) and \
           (type(self.__pobs) == np.ndarray):
            if np.array_equal(po, self.__pobs):
                return
        elif (type(po) == type(self.__pobs)) and \
             (po == self.__pobs):
            return

        # If pobs and prior are both None, just remove all weighting
        if po is None and self.priors is None:
            self.__clib.kd_change_wgt(None, self.__kd)
            self.__pobs = None
            self.__pobs_data = None
            for c in self.__cache:
                c['bp'].pobs = None
            return

        else:

            # If we're here, we have a non-trivial pobs or prior; if
            # the new pobs is a callable, call it and save the result
            self.__pobs = po
            if hasattr(self.__pobs, '__call__'):
                self.__pobs_data \
                    = self.__pobs(self.__dataset[:,self.__nphys:]).\
                    flatten()
            else:
                self.__pobs_data = self.__pobs

            # Compute the weights on the points, and change the kd
            # object appropriately
            dummy = self.sample_density    # Force re-computation
            wgt = np.ones(self.__ndata)
            if self.__sample_density is not None:
                wgt *= 1.0 / self.__sample_density
            if self.__prior_data is not None:
                wgt *= self.__prior_data
            if self.__pobs_data is not None:
                wgt *= self.__pobs_data
            self.__wgt = wgt
            self.__clib.kd_change_wgt(self.__wgt.ctypes.data_as(
                POINTER(c_double)), self.__kd)

            # Update cached objects
            for c in self.__cache:
                c['bp'].pobs = None


    ##################################################################
    # Define the bandwidth property
    ##################################################################
 
    @property
    def bandwidth(self):
        """
        The current bandwidth
        """
        return deepcopy(self.__bandwidth)

    @bandwidth.setter
    def bandwidth(self, bw):

        if np.array_equal(self.__bandwidth, bw):
            # If new bandwidth equals old bandwidth, do nothing
            return

        elif str(bw) != 'auto':
            # If we've been given a specified bandwidth, set to that
            if hasattr(bw, '__iter__'):
                self.__bandwidth = np.copy(bw)
            else:
                self.__bandwidth \
                    = np.zeros(self.__nphys + self.__nphot) + bw
            self.__auto_bw_set = False

        else:
            # Automatic bandwidth setting

            # Are we already set on auto? If so, just return
            if self.__auto_bw_set:
                return

            # Do we have a stored value for the automatic bandwidth?
            # If not, we need to compute it.
            if self.__auto_bw is None:

                # Find 10th nearest neighbors
                nneighbor=10
                if self.__ndata > 5000:
                    # For data sets with > 5000 samples, just use a
                    # sub-sample of 5,000, which is more than enough
                    # to get a reasonable estimate of the distribution
                    idxpt = np.array(
                        np.random.choice(np.arange(self.__ndata), 
                                         5000), dtype=c_ulong)
                    neighbors = np.zeros(nneighbor*5000, 
                                              dtype=c_ulong)
                    d2 = np.zeros(nneighbor*5000)
                    self.__clib.kd_neighbors_point_vec(
                        self.__kd, idxpt, 5000, nneighbor, False,
                        neighbors, d2)

                else:
                    # For smaller data sets, use it all
                    neighbors = np.zeros(nneighbor*self.__ndata, 
                                         dtype=c_ulong)
                    d2 = np.zeros(nneighbor*self.__ndata)
                    idxpt = np.arange(self.__ndata, dtype=c_ulong)
                    self.__clib.kd_neighbors_all(self.__kd, nneighbor, 
                                                 False, neighbors, d2)

                # Take the bandwidth in each dimension to be the 90th
                # percentile of the 10th nearest neighbor distance
                offset = np.abs(self.__dataset[idxpt,:] -
                                self.__dataset[
                                    neighbors[nneighbor-1::nneighbor],:])
                self.__auto_bw = np.zeros(self.__nphys+self.__nphot)
                for i in range(self.__nphys+self.__nphot):
                    self.__auto_bw[i] = np.percentile(offset[:,i], 95)

            # Set to the auto bandwidth
            self.__bandwidth = np.copy(self.__auto_bw)
            self.__auto_bw_set = True

        # Set the new bandwidth
        self.__clib.kd_change_bandwidth(self.__bandwidth, self.__kd)

        # If we have stored priors, and we're using automatic sample
        # density setting, we need to recompute them for the
        # new bandwidth; zero out the stored sample density, and
        # adjust the kernel density estimator for the physical
        # parameters to the new bandwidth before doing so
        if type(self.__sden) is str:
            if self.__sden == 'auto':
                self.__sample_density = None
                if self.__kd_phys is not None:
                    self.__clib.kd_change_bandwidth(
                        self.__bandwidth[:self.__nphys], self.__kd_phys)
                pr = self.priors
                self.priors = None
                self.priors = pr

        # Update cached objects
        for c in self.__cache:
            c['bp'] = bw

    ##################################################################
    # Utility methods to change the tolerances. We need properties for
    # these because, if we have cached data sets, we need to update
    # their tolerances too.
    ##################################################################
    @property
    def abstol(self):
        return self.__abstol

    @abstol.setter
    def abstol(self, newtol):
        self.__abstol = newtol
        for c in self.__cache:
            c['bp'].abstol = self.__abstol

    @property
    def reltol(self):
        return self.__reltol

    @reltol.setter
    def reltol(self, newtol):
        self.__reltol = newtol
        for c in self.__cache:
            c['bp'].reltol = self.__reltol

    ##################################################################
    # Utitlity methods to return the number of physical and
    # photometric quantities, and the total number of dimensions,
    # without allowing them to be changed
    ##################################################################

    @property
    def nphys(self):
        """
        This is the number of physical properties for the bayesphot
        object.
        """
        return self.__nphys

    @nphys.setter
    def nphys(self, val):
        raise ValueError("can't set bayesphot.nphys!")
    
    @property
    def nphot(self):
        """
        This is the number of photometric properties for the bayesphot
        object.
        """
        return self.__nphot

    @nphot.setter
    def nphot(self, val):
        raise ValueError("can't set bayesphot.nphot!")

    @property
    def ndim(self):
        """
        This is the number of physical plus photometric properties for
        the bayesphot object.
        """
        return (self.__nphys + self.__nphot)

    @ndim.setter
    def ndim(self, val):
        raise ValueError("can't set bayesphot.ndim!")

    ##################################################################
    # Utility methods to set a new bandwidth to account for
    # photometric errors, to reset to the original bandwidth, and to
    # free a temporary kd_tree holder, all in a thread-safe way. These
    # are intended for internal use.
    ##################################################################

    # Broaden the bandwidth by the photometric error, and return a KD
    # object with the new bandwidth; if we are thread-safe, this is a
    # newly-created KD object, which will need to be freed later; if a
    # value for kd_cur is passed in, the bandwidth will be changed for
    # it, and no allocation will be done regardless of thread safety
    def __change_bw_err(self, photerr, kd_cur=None, err_only=False):
        if err_only:
            bandwidth = np.copy(self.__bandwidth)
            bandwidth[self.__nphys:] = photerr
        else:
            err = np.zeros(self.__bandwidth.size)
            err[self.__nphys:] = photerr
            bandwidth = np.sqrt(self.__bandwidth**2+err**2)
        if kd_cur is not None:
            kd_tmp = kd_cur
        elif not self.thread_safe: 
            kd_tmp = self.__kd
        else: 
            kd_tmp = self.__clib.copy_kd(self.__kd)
        self.__clib.kd_change_bandwidth(bandwidth, kd_tmp)
        return kd_tmp

    # Free a temporary KD object
    def __free_bw_err(self, kd_tmp):
        if self.thread_safe:
            self.__clib.free_kd_copy(kd_tmp)

    # Restore bandwidth to the default, de-allocating the temporary KD
    # object if needed
    def __restore_bw_err(self, kd_tmp):
        if not self.thread_safe:
            self.__clib.kd_change_bandwidth(
                self.__bandwidth, self.__kd)
        else:
            self.__clib.free_kd_copy(kd_tmp)

    ##################################################################
    # Methods to create and destroy caches
    ##################################################################

[docs]
    def make_cache(self, margindims):
        """
        This method builds a cache to do faster calculation of PDFs
        where certain dimensions are marginalised out. If such caches
        exist, they are used automatically by all the computation
        methods.

        Parameters
           margindims : listlike of integers
              list of dimensions to be marginalised out; physical
              dimensions go from 0 - nphys-1, photometric dimensions
              from nphys to nphys + nphot - 1

        Returns
           Nothing
        """

        # Safety check on inputs
        mdims = np.asarray(margindims, dtype='int')
        if np.amin(mdims) < 0 or np.amin(mdims) > self.ndim-1 or \
           len(mdims) != len(np.unique(mdims)):
            raise ValueError("bp.make_cache: margindims must be "
                             " unique integers in the range "
                             " 0, ndim")

        # If we already have a cache for this set of marginalised
        # dimensions, do nothing
        for c in self.__cache:
            if c['margindims'] == list(margindims):
                return

        # Get list of dimensions that remain
        keepdims = np.array(list(set(range(self.ndim)) - set(mdims)),
                            dtype=int)
        nphys = np.sum(keepdims < self.nphys)

        # Extract the dimensions we're keeping from the data set
        data_cache = np.copy(self.__dataset[:,keepdims])
        bw = self.bandwidth[keepdims]
        if self.__prior_data is not None:
            prior_cache = np.copy(self.__prior_data)
        else:
            prior_cache = None
        if self.__pobs_data is not None:
            pobs_cache = np.copy(self.__pobs_data)
        else:
            pobs_cache = None

        # Build a new bp object from the dimensionally-reduced data
        if self.__ktype == 2:
            ktype = 'gaussian'
        elif self.__ktype == 1:
            ktype = 'tophat'
        elif self.__ktype == 0:
            ktype = 'epanechnikov'
        bp_cache = bp(data_cache, nphys, bandwidth=bw, ktype=ktype,
                      priors=prior_cache, pobs=pobs_cache,
                      sample_density=self.__sample_density,
                      reltol=self.reltol,
                      abstol=self.abstol, leafsize=self.leafsize,
                      thread_safe=self.thread_safe,
                      caching='none')

        # Store the new object
        self.__cache.append(
            { 'margindims' : list(margindims),
              'keepdims'   : list(keepdims),
              'data'       : data_cache,
              'prior'      : prior_cache,
              'pobs'       : pobs_cache,
              'bp'         : bp_cache } )



[docs]
    def clear_cache(self, margindims=None):
        """
        This method deletes from the cache

        Parameters
           margindims : listlike of integers
              list of marginalised dimensions that should be removed
              from the cache, in the same format as make_cache; if
              left as None, the cache is completely emptied

        Returns
           Nothing
        """
        if margindims is None:
            self.__cache = []
        else:
            for i in range(len(cache)):
                if cache[i]['margindims'] == list(margindims):
                    del cache[i]
                    return
            raise ValueError(
                "bp.clear_cache: no cached data found for "+
                repr(margindims))

        

    ##################################################################
    # Method to compute the log likelihood function for a particular
    # set of physical properties given a particular set of photometric
    # properties; some dimensions can be marginalised over if desired
    ##################################################################

[docs]
    def logL(self, physprop, photprop, photerr=None,
             margindim=None):
        """
        This function returns the natural log of the likelihood
        function evaluated at a particular log mass, log age,
        extinction, and set of log luminosities

        Parameters
           physprop : arraylike, shape (nphys) or (..., nphys)
              array giving values of the physical properties; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           photprop : arraylike, shape (nfilter) or (..., nfilter)
              array giving the photometric values; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           photerr : arraylike, shape (nfilter) or (..., nfilter)
              array giving photometric errors; for a multidimensional
              array, the operation is vectorized over the leading
              dimensions
           margindim : int | arraylike of ints | None
              The index or indices of the physical or photometric
              properties to be maginalized over, numbered from 0 -
              nphys-1 for physical properties and from nphys - nfilter +
              nfilter - 1 for photometric properties. If this keyword is
              set, then physprop and/or photprop should have fewer
              than nphys or nphot elements due to the omission of
              marginalised dimensions. If all physical or photometric
              dimensions are marginalised out, that corresponding
              argument for physprop or photprop should be set to None

        Returns
           logL : float or arraylike
              natural log of the likelihood function
        """

        # If we're marginalising, see if we should use a cached data
        # set
        if margindim is not None:

            # If we're in lazy mode, add a cache for this set of
            # marginalised dimensions if it doesn't exist
            if self.caching == 'lazy':
                self.make_cache(margindim)

            # Look for this set of dimensions in our existing cache;
            # if we find a match, use it
            for c in self.__cache:
                if list(margindim) == c['margindims']:
                    return c['bp'].logL(physprop, photprop,
                                        photerr=photerr)

        # Safety check: make sure all the dimensions of the input
        # arrays match what we expect. If we are marginalizing over
        # some dimensions, extract the dimensions we want to keep
        if margindim is not None:
            nphys_margin = np.sum(np.asarray(margindim) <
                                  self.__nphys)
            nphot_margin = len(margindim) - nphys_margin
            nphys = self.__nphys - nphys_margin
            nphot = self.__nphot - nphot_margin
            keepdims = np.array(
                list(set(range(self.ndim)) - set(margindim)),
                dtype=c_ulong)
            bw = self.__bandwidth[keepdims]
        else:
            nphys = self.__nphys
            nphot = self.__nphot
            bw = self.__bandwidth
        if nphys > 1:
            if (np.asarray(physprop).shape[-1] != nphys):
                raise ValueError("need " + str(nphys) + 
                                 " physical properties!")
        if nphys == 0 and physprop is not None:
            raise ValueError(
                "input includes physical properties, but "
                "all physical properties are marginalised "
                "out!")
                
        if nphot > 1:
            if (np.asarray(photprop).shape[-1] != nphot):
                raise ValueError("need " + str(nphot) +
                                 " photometric properties!")
            if photerr is not None:
                if (np.asarray(photerr).shape[-1] != nphot):
                    raise ValueError("need " + str(nphot) +
                                     " photometric errors!")
        if nphot == 0 and photprop is not None:
            raise ValueError(
                "input includes photometric properties, but "
                "all photometric properties are marginalised "
                "out!")
        if nphot == 0 and photerr is not None:
            raise ValueError(
                "input includes photometric errors, but "
                "all photometric properties are marginalised "
                "out!")

        # Handle special case of a single photometric or physical
        # quantity; in this case make sure we have a trailing
        # dimension of 1 so that all the fancy array manipulation
        # below works correctly
        if nphys == 1 and (np.asarray(physprop).shape[-1] != 1
                           or np.asarray(physprop).ndim == 1):
            physprop_ = np.array(physprop).\
                        reshape(np.asarray(physprop).shape+(1,))
        else:
            physprop_ = physprop
        if nphot == 1 and (np.asarray(photprop).shape[-1] != 1
                           or np.asarray(photprop).ndim == 1):
            photprop_ = np.array(photprop).\
                        reshape(np.asarray(photprop).shape+(1,))
        else:
            photprop_ = photprop
        if nphot == 1 and photerr is not None:
            if np.asarray(photerr).shape[-1] != 1 \
               or np.asarray(photerr).ndim == 1:
                photerr_ = np.array(photerr).\
                        reshape(np.asarray(photerr).shape+(1,))
        else:
            photerr_ = photerr

        # Figure out number of distinct input sets of physical and
        # photometric properties in the input data
        if nphys > 0:
            nphys_in = np.asarray(physprop_).size // nphys
        else:
            nphys_in = 0
        if nphot > 0:
            nphot_in = np.asarray(photprop_).size // nphot
        else:
            nphot_in = 0

        # Allocate an array to hold the results. The shape is a bit
        # tricky to figure out, since one or more of the inputs
        # (physprop, photprop, and photerr) can be None. We therefore
        # need to consider several possible cases.
        if physprop_ is not None and photprop_ is not None and \
           photerr_ is not None:
            pdf = np.zeros(
                np.broadcast(np.atleast_2d(physprop_)[..., 0],
                             np.atleast_2d(photprop_)[..., 0],
                             np.atleast_2d(photerr_)[..., 0]).shape,
                dtype=c_double)
        elif physprop_ is not None and photprop_ is not None:
            pdf = np.zeros(
                np.broadcast(np.atleast_2d(physprop_)[..., 0],
                             np.atleast_2d(photprop_)[..., 0]).shape,
                dtype=c_double)
        elif physprop_ is not None:
            pdf = np.zeros(np.atleast_2d(physprop_)[..., 0].shape,
                           dtype=c_double)
        elif photprop_ is not None and photerr_ is not None:
            pdf = np.zeros(
                np.broadcast(np.atleast_2d(photprop_)[..., 0],
                             np.atleast_2d(photerr_)[..., 0]).shape,
                dtype=c_double)
        elif photprop_ is not None:
            pdf = np.zeros(np.atleast_2d(photprop_)[..., 0].shape,
                           dtype=c_double)            
        if self.__diag_mode:
            nodecheck = np.zeros(pdf.shape, dtype=c_ulong)
            leafcheck = np.zeros(pdf.shape, dtype=c_ulong)
            termcheck = np.zeros(pdf.shape, dtype=c_ulong)

        # Make an array suitable for passing data to c routines
        cdata = np.zeros(pdf.shape + (nphys+nphot,),
                         dtype=c_double)
        if nphys != 0:
            cdata[..., :nphys] \
                = np.vstack((physprop_,) * (pdf.size//nphys_in)). \
                reshape(cdata[..., :nphys].shape)
        if nphot != 0:
            cdata[..., nphys:] \
                = np.vstack((photprop_,) * (pdf.size//nphot_in)). \
                reshape(cdata[..., nphys:].shape)
        if photerr_ is not None:
            cbandwidth = np.zeros(pdf.shape + (nphot,),
                                  dtype=c_double)                
            cbandwidth[...] = np.sqrt(bw**2 + photerr_**2)
            cbw_ptr = cbandwidth.ctypes.data_as(POINTER(c_double))
        else:
            cbw_ptr = None
            
        # Call the PDF computation routine
        if margindim is None:
            # No marginalization, so call kd_pdf

            if not self.__diag_mode:
                self.__clib.kd_pdf_vec(
                    self.__kd, np.ravel(cdata), cbw_ptr, pdf.size, 
                    self.reltol, self.abstol, np.ravel(pdf))
            else:
                self.__clib.kd_pdf_vec(
                    self.__kd, np.ravel(cdata), cbw_ptr, pdf.size, 
                    self.reltol, self.abstol, np.ravel(pdf),
                    np.ravel(nodecheck), np.ravel(leafcheck),
                    np.ravel(termcheck))
        else:
            # We are marginalizing, so call kd_pdf_int
            if not self.__diag_mode:
                self.__clib.kd_pdf_int_vec(
                    self.__kd, np.ravel(cdata), cbw_ptr, keepdims,
                    keepdims.size, pdf.size, self.reltol,
                    self.abstol, np.ravel(pdf))
            else:
                self.__clib.kd_pdf_int_vec(
                    self.__kd, np.ravel(cdata), cbw_ptr, keepdims,
                    keepdims.size, pdf.size, self.reltol,
                    self.abstol, np.ravel(pdf),
                    np.ravel(nodecheck), np.ravel(leafcheck),
                    np.ravel(termcheck))

        # Return
        if not self.__diag_mode:
            return np.log(pdf)
        else:
            return np.log(pdf), nodecheck, leafcheck, termcheck


    ##################################################################
    # Function to return the marginal distribution of one or more of
    # the physical properties for a specified set of photometric
    # properties
    ##################################################################

[docs]
    def mpdf(self, idx, photprop, photerr=None, ngrid=128,
             qmin=None, qmax=None, grid=None, norm=True):
        """
        Returns the marginal probability for one or mode physical
        quantities for one or more input sets of photometric
        properties. Output quantities are computed on a grid of
        values, in the same style as meshgrid.

        Parameters:
           idx : int or listlike containing ints
              index of the physical quantity whose PDF is to be
              computed; if this is an iterable, the joint distribution of
              the indicated quantities is returned
           photprop : arraylike, shape (nfilter) or (..., nfilter)
              array giving the photometric values; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           photerr : arraylike, shape (nfilter) or (..., nfilter)
              array giving photometric errors; for a multidimensional
              array, the operation is vectorized over the leading
              dimensions
           ngrid : int or listlike containing ints
              number of points in each dimension of the output grid;
              if this is an iterable, it must have the same number of
              elements as idx
           qmin : float or listlike
              minimum value in the output grid in each quantity; if
              left as None, defaults to the minimum value in the
              library; if this is an iterable, it must contain the
              same number of elements as idx
           qmax : float or listlike
              maximum value in the output grid in each quantity; if
              left as None, defaults to the maximum value in the
              library; if this is an iterable, it must contain the
              same number of elements as idx
           grid : listlike of arrays
              set of values defining the grid on which the PDF is to
              be evaluated, in the same format used by meshgrid
           norm : bool
              if True, returned pdf's will be normalized to integrate
              to 1

        Returns:
           grid_out : array
              array of values at which the PDF is evaluated; contents
              are the same as returned by meshgrid
           pdf : array
              array of marginal posterior probabilities at each point
              of the output grid, for each input set of photometry; the leading
              dimensions match the leading dimensions produced by
              broadcasting the leading dimensions of photprop and
              photerr together, while the trailing dimensions match
              the dimensions of the output grid
        """

        # Check if we're marginalising over any dimensions, and if so
        # whether we have or should create a cache for those
        # dimensions
        if len(np.atleast_1d(idx)) < self.nphys:

            # Figure out which dimensions we're marginalising over
            keepdim = list(np.atleast_1d(idx))
            margindim = list(set(range(self.nphys)) - set(keepdim))

            # If we're in lazy mode, add a cache for this set of
            # marginalised dimensions
            if self.caching == 'lazy':
                self.make_cache(margindim)
            
            # Look for this set of dimensions in our existing cache
            for c in self.__cache:
                if list(margindim) == c['margindims']:

                    # Found a match; call the bp object that matches
                    # and return its output
                    return c['bp'].mpdf(keepdim, photprop,
                                        photerr=photerr,
                                        ngrid=ngrid,
                                        qmin=qmin, qmax=qmax,
                                        grid=grid, norm=norm)
                
        # Safety check
        if (np.array(photprop).shape[-1] != self.__nphot) and \
           (self.__nphot > 1):
            raise ValueError("need " + str(self.__nphot) +
                             " photometric properties!")
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot > 1):
                raise ValueError("need " + str(self.__nphot) +
                                 " photometric errors!")
        if (np.amax(idx) > self.nphys) or (np.amin(idx) < 0) or \
           (not np.array_equal(np.squeeze(np.unique(np.array(idx))), 
                               np.squeeze(np.array([idx])))):
            raise ValueError("need non-repeating indices in " +
                             "the range 0 - {:d}!".
                             format(nphys-1))

        # Reshape arrays if necessary
        if (np.array(photprop).shape[-1] != self.__nphot) and \
           (self.__nphot == 1):
            photprop = photprop.reshape(photprop.shape+(1,))
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot == 1):
                photerr = photerr.reshape(photerr.shape+(1,))

        # Set up the grid of outputs, and the versions of it that we
        # will be passing to the c code
        if grid is not None:
            grid_out = np.array(grid)
            qmin_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            qmax_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
        else:
            if qmin is None:
                qmin = np.amin(self.__dataset[:,idx], axis=0)
            if qmax is None:
                qmax = np.amax(self.__dataset[:,idx], axis=0)
            griddims = []
            if hasattr(idx, '__len__'):
                nidx = len(idx)
            else:
                nidx = 1
            if nidx > 1:
                # Case for multiple indices
                griddims = []
                if hasattr(ngrid, '__len__'):
                    ngrid_tmp = np.array(ngrid, dtype=c_ulong)
                else:
                    ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
                for i in range(len(idx)):
                    griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) * 
                                    float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
                grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
                                                           indexing='ij')))
                out_shape = grid_out[0, ...].shape
                qmin_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
                qmax_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
            else:
                # Case for a single index
                ngrid_tmp = np.array([ngrid], dtype=c_ulong)
                qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
                qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
                grid_out = qmin + \
                           np.arange(ngrid) * \
                           float(qmax-qmin)/(ngrid-1)
                out_shape = grid_out.shape

        # Figure out how many distinct photometric values we've been
        # given, and how many sets of photometric errors
        nphot = np.array(photprop).size//self.__nphot
        nphot_err = np.array(photerr).size//self.__nphot

        # Set up a grid to hold the outputs
        if photerr is not None:
            pdf = np.zeros(
                np.broadcast(np.array(photprop)[..., 0],
                             np.array(photerr)[..., 0]).shape +
                out_shape)
        else:
            pdf = np.zeros(np.array(photprop)[..., 0].shape +
                           out_shape)

        # Prepare data for c library
        if hasattr(idx, '__len__'):
            nidx = len(idx)
        else:
            nidx = 1
        dims = np.zeros(nidx+self.__nphot, dtype=c_ulong)
        dims[:nidx] = idx
        dims[nidx:] = self.__nphys+np.arange(self.__nphot, dtype=c_ulong)
        ndim = c_ulong(nidx + self.__nphot)
        phottmp = np.array(photprop, dtype=c_double)

        # Separate cases with single / no photometric errors from
        # cases with multiple sets of photometric errors
        if nphot_err <= 1:

            # Case with at most one set of photometric errors

            # Set the bandwidth based on the photometric errors if we
            # were given some
            if photerr is None:
                kd_tmp = self.__kd
            else:
                kd_tmp = self.__change_bw_err(photerr)

            # Call the PDF computation routine; note that we call
            # kd_pdf_int_vec if we're actually marginalizing over any
            # physical properties (which is the case if len(idx) <
            # nphys), but we invoke kd_pdf_vec if we're not actually
            # marginalizing (len(idx)==nphys) because then we don't
            # need to do any integration
            if nidx < self.__nphys:
                self.__clib.kd_pdf_int_reggrid(
                    kd_tmp, np.ravel(phottmp),
                    dims[nidx:], self.__nphot, nphot,
                    qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
                    self.reltol, self.abstol, np.ravel(pdf))
            else:
                self.__clib.kd_pdf_reggrid(
                    kd_tmp, np.ravel(phottmp),
                    dims[nidx:], self.__nphot, nphot,
                    qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
                    self.reltol, self.abstol, np.ravel(pdf))

        else:

            # Case with multiple sets of photometric errors

            # Loop over photometric errors
            kd_tmp = None
            for i in np.ndindex(*photerr.shape[:-1]):

                # Set bandwidth based on photometric error for this
                # iteration
                kd_tmp = self.__change_bw_err(photerr[i], kd_cur=kd_tmp)

                # Grab the corresponding portions of the arrays going
                # to and from the c code
                if photprop.size < photerr.size:
                    phot_sub = np.ravel(phottmp)
                else:
                    phot_sub = np.ravel(phottmp[i])
                pdf_sub = np.zeros(np.array(pdf[i]).shape)

                # Call kernel density estimate with this bandwidth
                if nidx < self.__nphys:
                    self.__clib.kd_pdf_int_reggrid(
                        kd_tmp, phot_sub,
                        dims[nidx:], self.__nphot, 
                        phot_sub.size//self.__nphot,
                        qmin_tmp, qmax_tmp, ngrid_tmp, 
                        dims[:nidx], nidx,
                        self.reltol, self.abstol, np.ravel(pdf_sub))
                else:
                    self.__clib.kd_pdf_reggrid(
                        kd_tmp, phot_sub,
                        dims[nidx:], self.__nphot, 
                        phot_sub.size//self.__nphot,
                        qmin_tmp, qmax_tmp, ngrid_tmp, dims[:nidx], nidx,
                        self.reltol, self.abstol, np.ravel(pdf_sub))
                pdf[i] = pdf_sub

        # Set the bandwidth back to its default if necessary
        if photerr is not None:
            self.__restore_bw_err(kd_tmp)

        # Normalize if requested
        if norm:

            # Compute the sizes of the output cells
            if nidx == 1:
                cellsize = np.zeros(grid_out.size)
                cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
                cellsize[0] = grid_out[1] - grid_out[0]
                cellsize[-1] = grid_out[-1] - grid_out[-2]
            else:
                # Get the cell sizes in each dimension
                csize = []
                for i in range(nidx):
                    vec = grid_out[(i,)+i*(0,)+(slice(None),) + 
                                   (grid_out.shape[0]-i-1)*(0,)]
                    csize.append(np.zeros(vec.size))
                    csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
                    csize[i][0] = vec[1] - vec[0]
                    csize[i][-1] = vec[-1] - vec[-2]
                # Take outer product to get grid of sizes
                cellsize = np.multiply.outer(csize[0], csize[1])
                for i in range(2, nidx):
                    cellsize = np.multiply.outer(cellsize, csize[i])

            # Compute integral
            normfac = np.sum(pdf*cellsize, axis = 
                             tuple(range(np.array(photprop).ndim-1, pdf.ndim)))

            # Normalize
            pdf = np.transpose(np.transpose(pdf)/normfac)

        # Return
        return grid_out, pdf



    ##################################################################
    # Function to return the marginal distribution of one or more of
    # the photometric properties for a specified set of physical
    # properties; this is just like mpdf, but in reverse
    ##################################################################

[docs]
    def mpdf_phot(self, idx, physprop, ngrid=128,
                  qmin=None, qmax=None, grid=None, norm=True):
        """
        Returns the marginal probability for one or mode photometric
        quantities corresponding to an input set of physical
        properties. Output quantities are computed on a grid of
        values, in the same style as meshgrid.

        Parameters:
           idx : int or listlike containing ints
              index of the photometric quantity whose PDF is to be
              computed, starting at 0; if this is an iterable, the
              joint distribution of the indicated quantities is returned
           physprop : arraylike, shape (nphys) or (..., nphys)
              physical properties to be used; if this is an array of
              nphys elements, these give the physical properties; if
              it is a multidimensional array, the operation is
              vectorized over the leading dimensions
              physical properties -- the function must take an array
              of (nphys) elements as an input, and return a floating
              point value representing the PDF evaluated at that set
              of physical properties as an output
           ngrid : int or listlike containing ints
              number of points in each dimension of the output grid;
              if this is an iterable, it must have the same number of
              elements as idx
           qmin : float or listlike
              minimum value in the output grid in each quantity; if
              left as None, defaults to the minimum value in the
              library; if this is an iterable, it must contain the
              same number of elements as idx
           qmax : float or listlike
              maximum value in the output grid in each quantity; if
              left as None, defaults to the maximum value in the
              library; if this is an iterable, it must contain the
              same number of elements as idx
           grid : listlike of arrays
              set of values defining the grid on which the PDF is to
              be evaluated, in the same format used by meshgrid
           norm : bool
              if True, returned pdf's will be normalized to integrate
              to 1

        Returns:
           grid_out : array
              array of values at which the PDF is evaluated; contents
              are the same as returned by meshgrid
           pdf : array
              array of marginal posterior probabilities at each point
              of the output grid, for each input set of properties; the leading
              dimensions match the leading dimensions produced by
              broadcasting the leading dimensions of photprop and
              photerr together, while the trailing dimensions match
              the dimensions of the output grid
        """

        # Check if we're marginalising over any dimensions, and if so
        # whether we have or should create a cache for those
        # dimensions
        if len(np.atleast_1d(idx)) < self.nphot:
            
            # Figure out which dimensions we're marginalising over
            keepdim = list(np.atleast_1d(idx)+self.nphys)
            margindim = list(set(range(self.nphys,self.ndim))
                             - set(keepdim))
            
            # If we're in lazy mode, add a cache for this set of
            # marginalised dimensions
            if self.caching == 'lazy':
                self.make_cache(margindim)
            
            # Look for this set of dimensions in our existing cache
            for c in self.__cache:
                if list(margindim) == c['margindims']:

                    # Found a match; call the bp object that matches
                    # and return its output
                    return c['bp'].mpdf_phot(
                        np.array(keepdim)-self.nphys,
                        physprop,
                        ngrid=ngrid,
                        qmin=qmin, qmax=qmax,
                        grid=grid, norm=norm)
                
        # Safety check
        if (np.array(physprop).shape[-1] != self.__nphys) and \
           (self.__nphot > 1):
            raise ValueError("need " + str(self.__nphys) +
                             " physical properties!")
        if (np.amax(np.asarray(idx)) > self.__nphot) or \
           (np.amin(np.asarray(idx)) < 0) or \
           (not np.array_equal(np.squeeze(np.unique(np.asarray(idx))), 
                               np.squeeze(np.array([idx])))):
            raise ValueError("need non-repeating indices in " +
                             "the range 0 - {:d}!".
                             format(self.__nphot-1))

        # Reshape arrays if necessary
        if (np.array(physprop).shape[-1] != self.__nphys) and \
           (self.__nphys == 1):
            physprop = physprop.reshape(physprop.shape+(1,))

        # Set up the grid of outputs, and the versions of it that we
        # will be passing to the c code
        if grid is not None:
            grid_out = np.array(grid)
            qmin_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            qmax_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
        else:
            if qmin is None:
                qmin = np.amin(self.__dataset[:,idx+self.__nphys], axis=0)
            if qmax is None:
                qmax = np.amax(self.__dataset[:,idx+self.__nphys], axis=0)
            griddims = []
            if hasattr(idx, '__len__'):
                nidx = len(idx)
            else:
                nidx = 1
            if nidx > 1:
                # Case for multiple indices
                griddims = []
                if hasattr(ngrid, '__len__'):
                    ngrid_tmp = np.array(ngrid, dtype=c_ulong)
                else:
                    ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
                for i in range(len(idx)):
                    griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) * 
                                    float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
                grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
                                                           indexing='ij')))
                out_shape = grid_out[0, ...].shape
                qmin_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
                qmax_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
            else:
                # Case for a single index
                ngrid_tmp = np.array([ngrid], dtype=c_ulong)
                qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
                qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
                grid_out = qmin + \
                           np.arange(ngrid) * \
                           float(qmax-qmin)/(ngrid-1)
                out_shape = grid_out.shape
        

        # Prepare things we'll be passing to the c library
        # Prepare data for c library
        if hasattr(idx, '__len__'):
            nidx = len(idx)
        else:
            nidx = 1
        dims = np.zeros(nidx+self.__nphys, dtype=c_ulong)
        dims[:nidx] = self.__nphys + idx
        dims[nidx:] = np.arange(self.__nphys, dtype=c_ulong)
        ndim = c_ulong(nidx + self.__nphys)

        # Prepare output holder
        pdf = np.zeros(np.array(physprop)[..., 0].shape +
                       out_shape)

        # Prepare inputs for passing to c
        nphys = np.array(physprop).size//self.__nphys
        phystmp = np.array(physprop, dtype=c_double)

        # Call c library; note that we call kd_pdf_int_reggrid if
        # we're actually marginalizing over any photometric
        # properties (which is the case if len(idx) <
        # self.__nphot), but we invoke kd_pdf_reggrid if we're not
        # actually marginalizing (len(idx)==self.__nphot) because
        # then we don't need to do any integration
        if nidx < self.__nphot:
            self.__clib.kd_pdf_int_reggrid(
                self.__kd, np.ravel(phystmp), dims[nidx:],
                self.__nphys, nphys, qmin_tmp, qmax_tmp,
                ngrid_tmp, dims[:nidx], nidx, self.reltol,
                self.abstol, np.ravel(pdf))
        else:
            self.__clib.kd_pdf_reggrid(
                self.__kd, np.ravel(phystmp), dims[nidx:],
                self.__nphys, nphys, qmin_tmp, qmax_tmp,
                ngrid_tmp, dims[:nidx], nidx,
                self.reltol, self.abstol, np.ravel(pdf))

        # Normalize if requested
        if norm:

            # Compute the sizes of the output cells
            if nidx == 1:
                cellsize = np.zeros(grid_out.size)
                cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
                cellsize[0] = grid_out[1] - grid_out[0]
                cellsize[-1] = grid_out[-1] - grid_out[-2]
            else:
                # Get the cell sizes in each dimension
                csize = []
                for i in range(nidx):
                    vec = grid_out[(i,)+i*(0,)+(slice(None),) + 
                                   (grid_out.shape[0]-i-1)*(0,)]
                    csize.append(np.zeros(vec.size))
                    csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
                    csize[i][0] = vec[1] - vec[0]
                    csize[i][-1] = vec[-1] - vec[-2]
                # Take outer product to get grid of sizes
                cellsize = np.multiply.outer(csize[0], csize[1])
                for i in range(2, nidx):
                    cellsize = np.multiply.outer(cellsize, csize[i])

            # Compute integral
            normfac = np.sum(pdf*cellsize, axis = 
                             tuple(range(np.array(physprop).ndim-1, pdf.ndim)))

            # Normalize
            pdf = np.transpose(np.transpose(pdf)/normfac)

        # Return
        return grid_out, pdf

                

    ##################################################################
    # This is the most general verison of mpdf. It allows the user to
    # fix or marginalize any number of physical or photometric
    # properties, returning the distribution in the remaining
    # properties
    ##################################################################

[docs]
    def mpdf_gen(self, fixeddim, fixedprop, margindim, ngrid=128,
                 qmin=None, qmax=None, grid=None, norm=True):
        """
        Returns the marginal probability for one or more physical or
        photometric properties, keeping other properties fixed and
        marginalizing over other quantities. This is the most general
        marginal PDF routine provided.

        Parameters:
           fixeddim : int | arraylike of ints | None
              The index or indices of the physical or photometric
              properties to be held fixed; physical properties are
              numbered 0 ... nphys-1, and phtometric ones are numbered
              nphys ... nphys + nphot - 1. This can also be set to
              None, in which case no properties are held fixed.
           fixedprop : array | None
              The values of the properties being held fixed; the size
              of the final dimension must be equal to the number of
              elements in fixeddim, and if fixeddim is None, this must
              be too
           margindim : int | arraylike of ints | None
              The index or indices of the physical or photometric
              properties to be maginalized over, numbered in the same
              way as with fixeddim; if set to None, no marginalization
              is performed
           ngrid : int or listlike containing ints
              number of points in each dimension of the output grid;
              if this is an iterable, it must have nphys + nphot -
              len(fixeddim) - len(margindim) elements
           qmin : float | arraylike
              minimum value in the output grid in each quantity; if
              left as None, defaults to the minimum value in the
              library; if this is an iterable, it must contain a
              number of elements equal to nphys + nphot -
              len(fixeddim) - len(margindim)
           qmax : float | arraylike
              maximum value in the output grid in each quantity; if
              left as None, defaults to the maximum value in the
              library; if this is an iterable, it must have the same
              number of elements as qmin
           grid : listlike of arrays
              set of values defining the grid on which the PDF is to
              be evaluated, in the same format used by meshgrid
           norm : bool
              if True, returned pdf's will be normalized to integrate
              to 1

        Returns:
           grid_out : array
              array of values at which the PDF is evaluated; contents
              are the same as returned by meshgrid
           pdf : array
              array of marginal posterior probabilities at each point
              of the output grid, for each input set of properties; the leading
              dimensions match the leading dimensions produced by
              broadcasting the leading dimensions of photprop and
              photerr together, while the trailing dimensions match
              the dimensions of the output grid
        """

        # If we're marginalising out some dimensions, and we have
        # cached data for that setup or are going to make it, call our
        # cached bp object to do this work for us
        if margindim is not None:

            # If we're in lazy mode, add a cache for this set of
            # marginalised dimensions
            if self.caching == 'lazy':
                self.make_cache(margindim)

            # Look for this set of dimensions in our existing cache
            for c in self.__cache:
                if list(margindim) == c['margindims']:

                    # Found a match; update the dimensional indexing
                    # on fixeddim, then call
                    if fixeddim is None:
                        fixeddim_ = None
                    else:
                        fixeddim_ = np.array(deepcopy(fixeddim))
                        diff = np.zeros(fixeddim_.size, dtype=np.int)
                        for d in margindim:
                            diff[fixeddim_ > d] += 1
                        fixeddim_ -= diff
                    return c['bp'].mpdf_gen(fixeddim_, fixedprop,
                                            None, ngrid=ngrid,
                                            qmin=qmin, qmax=qmax,
                                            grid=grid, norm=norm)

        # Safety check: make sure the dimensions all match up
        if fixeddim is None:
            nfixed = 0
        else:
            nfixed = np.atleast_1d(np.array(fixeddim)).shape[0]
            if len(set(np.atleast_1d(fixeddim))) != nfixed:
                raise ValueError("fixeddim must not have any "
                                 "repeated elements!")
            if np.array(fixedprop).shape[-1] != nfixed:
                raise ValueError("final dimension of fixedprop must" +
                                 " have {:d} dimensions".format(nfixed) +
                                 " to match fixeddim!")
        if margindim is None:
            nmargin = 0
        else:
            nmargin = np.atleast_1d(np.array(margindim)).shape[0]
            if len(set(np.atleast_1d(margindim))) != nmargin:
                raise ValueError("margindim must not have any "
                                 "repeated elements!")
        if qmin is not None:
            nq = np.atleast_1d(np.array(qmin)).shape[0]
            if nq + nmargin + nfixed != \
               self.__nphys + self.__nphot:
                raise ValueError("qmin must have {:d}".
                                 format(self.__nphys + self.__nphot -
                                        nfixed - nmargin) +
                                 " dimensions!")        
        else:
            nq = self.__nphys + self.__nphot - nfixed - nmargin
        if qmax is not None:
            if nq != np.atleast_1d(np.array(qmax)).shape[0]:
                raise ValueError("qmin and qmax must have matching" +
                                 " number of dimensions")
        if grid is not None:
            nq = len(grid)
            if nq + nmargin + nfixed != \
               self.__nphys + self.__nphot:
                raise ValueError("grid must have {:d}".
                                 format(self.__nphys + self.__nphot -
                                        nfixed - nmargin) +
                                 " elements!")
        if fixeddim is not None and margindim is not None:
            if len(set(margindim) & set(fixeddim)) > 0:
                raise ValueError("margindim and fixeddim must not "
                                 "have any elements in common!")

        # Make versions of all inputs arrays suitable for passing to c
        if margindim is not None:
            margindim_tmp = np.atleast_1d(
                np.array(margindim, dtype=c_ulong))
        if fixeddim is not None:
            fixeddim_tmp = np.atleast_1d(
                np.array(fixeddim, dtype=c_ulong))
            xfixed = np.atleast_1d(np.array(fixedprop, dtype=c_double))
            
        # Figure out the indices we're returning
        idx = set(range(self.__nphys+self.__nphot))
        if margindim is not None:
            idx = idx - set(margindim)
        if fixeddim is not None:
            idx = idx - set(fixeddim)
        idx = np.array(list(idx), dtype=c_ulong)
            
        # Set up output grid; make a version to pass to the c code
        # that has the axes tranposed, since the c code needs a
        # different data ordering
        nidx, qmin_tmp, qmax_tmp, grid_out, out_shape \
            = self.__make_outgrid(idx, ngrid, qmin, qmax, grid)
        grid_out_c = np.transpose(grid_out)

        # Set up array to hold the final pdf
        if fixedprop is not None:
            pdf = np.zeros(np.array(fixedprop)[..., 0] + out_shape,
                           dtype=c_double)
        else:
            pdf = np.zeros(out_shape, dtype=c_double)

        # Call the correct c library routine, depending on whether we
        # have fixed points, marginalized dimensions, both, or neither
        if fixeddim is None:

            if margindim is None:

                # No fixed points, no marginalized dimensions, so just
                # call kd_pdf_vec
                self.__clib.kd_pdf_vec(
                    self.__kd, np.ravel(grid_out_c), None,
                    grid_out_c.size//self.ndim,
                    self.reltol, self.abstol,
                    np.ravel(pdf))

            else:

                # Marginalizing over some dimensions, but no fixed
                # points, so call kd_pdf_int_vec
                self.__clib.kd_pdf_int_vec(
                    self.__kd, np.ravel(grid_out_c), None,
                    idx, len(idx), grid_out_c.size//len(idx),
                    self.reltol, self.abstol, np.ravel(pdf))

        else:

            if margindim is None:

                # We have fixed points, but no marginalized
                # dimensions, so call kd_pdf_reggrid
                self.__clib.kd_pdf_reggrid(
                    self.__kd, np.ravel(xfixed), fixeddim_tmp,
                    nfixed, xfixed.size//nfixed,
                    qmin_tmp, qmax_tmp, ngrid_tmp, idx,
                    idx.size, self.reltol, self.abstol,
                    np.ravel(pdf))

            else:

                # We have both fixed points and marginalized
                # dimensions, so call kd_pdf_int_reggrid
                self.__clib.kd_pdf_int_reggrid(
                    self.__kd, np.ravel(xfixed), fixeddim_tmp,
                    nfixed, xfixed.size//nfixed, qmin_tmp,
                    qmax_tmp, idx, idx.size, self.reltol,
                    self.abstol, np.ravel(pdf))

        # Normalize if requested
        if norm:
            pdf = self.__mpdf_normalize(nidx, grid_out, pdf)

        # Return
        return grid_out, pdf

    
    
    ##################################################################
    # Utility method to set up grids for outputs of marginal PDF
    # routines that are appropriate for passing to the c library. Used
    # by all the mpdf routines.
    ##################################################################
    def __make_outgrid(self, idx, ngrid, qmin, qmax, grid):

        # Were we given a grid?
        if grid is not None:
            # Yes, so just make it an array, and extract the limits
            # from it
            grid_out = np.array(grid, dtype=c_double)
            out_shape = grid_out.shape
            qmin_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            qmax_tmp \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            ngrid_tmp = np.array(grid_out.shape[1:], dtype=c_ulong)
        else:
            # No input grid was given

            # Were we given upper or lower limits? If not, read them
            # from the library. For photometric dimensions, exclude
            # values of 99, which indicate bad data.
            if qmin is None:
                qmin = np.amin(self.__dataset[:,idx], axis=0)
            if qmax is None:
                qmax = np.zeros(len(idx))
                for i in range(len(idx)):
                    if i < self.__nphys:
                        qmax[i] = np.amax(self.__dataset[:,i])
                    else:
                        qmax[i] = np.amax(self.__dataset[:,i][
                            self.__dataset[:,i] < 99.0])
            griddims = []

            # Figure out how many dimensions the output has
            if hasattr(idx, '__len__'):
                nidx = len(idx)
            else:
                nidx = 1
            if nidx > 1:
                # Case for multiple indices
                griddims = []
                if hasattr(ngrid, '__len__'):
                    ngrid_tmp = np.array(ngrid, dtype=c_ulong)
                else:
                    ngrid_tmp = np.array([ngrid]*len(idx), dtype=c_ulong)
                for i in range(len(idx)):
                    griddims.append(qmin[i] + np.arange(ngrid_tmp[i]) * 
                                    float(qmax[i]-qmin[i])/(ngrid_tmp[i]-1))
                grid_out = np.squeeze(
                    np.array(np.meshgrid(*griddims,
                                         indexing='ij'), dtype=c_double))
                out_shape = grid_out[0, ...].shape
                qmin_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)])
                qmax_tmp \
                    = np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)])
            else:
                # Case for a single index
                ngrid_tmp = np.array([ngrid], dtype=c_ulong)
                qmin_tmp = np.array([qmin], dtype=np.double).reshape(1)
                qmax_tmp = np.array([qmax], dtype=np.double).reshape(1)
                grid_out = np.array(
                    qmin + np.arange(ngrid) *
                    float(qmax-qmin)/(ngrid-1),
                    dtype=c_double)
                out_shape = grid_out.shape

        # Return what we've built
        return nidx, qmin_tmp, qmax_tmp, grid_out, out_shape
    

    ##################################################################
    # Utility method to normalize marginal PDFs
    ##################################################################
    def __mpdf_normalize(self, nidx, grid_out, pdf):
        # Compute the sizes of the output cells
        if nidx == 1:
            cellsize = np.zeros(grid_out.size)
            cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
            cellsize[0] = grid_out[1] - grid_out[0]
            cellsize[-1] = grid_out[-1] - grid_out[-2]
        else:
            # Get the cell sizes in each dimension
            csize = []
            for i in range(nidx):
                vec = grid_out[(i,)+i*(0,)+(slice(None),) + 
                               (grid_out.shape[0]-i-1)*(0,)]
                csize.append(np.zeros(vec.size))
                csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
                csize[i][0] = vec[1] - vec[0]
                csize[i][-1] = vec[-1] - vec[-2]
            # Take outer product to get grid of sizes
            cellsize = np.multiply.outer(csize[0], csize[1])
            for i in range(2, nidx):
                cellsize = np.multiply.outer(cellsize, csize[i])

        # Compute integral
        normfac = np.sum(pdf*np.abs(cellsize), axis = 
                         tuple(range(pdf.ndim-nidx, pdf.ndim)))

        # Normalize
        pdf = np.transpose(np.transpose(pdf)/normfac)

        # Return
        return pdf

    
    ##################################################################
    # Method to return log likelihood function at a specified set of
    # physical properties for a particular set of photometric
    # variables; this is set up for use by emcee, and is not intended
    # for use by humans
    ##################################################################
    def __logL(self, *args):
        x = np.zeros(self.__nphys+self.__nphot)
        x[:self.__nphys] = args[0]
        x[self.__nphys:] = args[1:-1]
        kd_tmp = args[-1]
        if not self.__diag_mode:
            return np.log(self.__clib.kd_pdf(kd_tmp, x, self.reltol,
                                             self.abstol))
        else:
            nodecheck = c_ulong(0)
            leafcheck = c_ulong(0)
            termcheck = c_ulong(0)
            return np.log(self.__clib.kd_pdf(kd_tmp, x, self.reltol,
                                             self.abstol, nodecheck,
                                             leafcheck, termcheck))


    ##################################################################
    # Function to compute an MCMC sampler for a particular set of
    # photometric values
    ##################################################################

[docs]
    def mcmc(self, photprop, photerr=None, mc_walkers=100,
             mc_steps=500, mc_burn_in=50):
        """
        This function returns a sample of MCMC walkers sampling the
        physical parameters at a specified set of photometric values.

        Parameters:
           photprop : arraylike, shape (nfilter) or (..., nfilter)
              array giving the photometric values; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           photerr : arraylike, shape (nfilter) or (..., nfilter)
              array giving photometric errors; for a multidimensional
              array, the operation is vectorized over the leading
              dimensions
           mc_walkers : int
              number of walkers to use in the MCMC
           mc_steps : int
              number of steps in the MCMC
           mc_burn_in : int
              number of steps to consider "burn-in" and discard

        Returns
           samples : array
              array of sample points returned by the MCMC
        """

        # See if we have emcee
        if not mc_avail:
            raise ImportError("unable to import emcee")

        # Safety check
        if (np.array(photprop).shape[-1] != self.__nphot) and \
           (self.__nphot > 1):
            raise ValueError("need " + str(self.__nphot) +
                             " photometric properties!")
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot > 1):
                raise ValueError("need " + str(self.__nphot) +
                                 " photometric errors!")
 
        # Reshape arrays if necessary
        if (np.array(photprop).shape[-1] != self.__nphot) and \
           (self.__nphot == 1):
            photprop = photprop.reshape(photprop.shape+(1,))
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot == 1):
                photerr = photerr.reshape(photerr.shape+(1,))

        # Prepare storage for output
        samples = []

        # Make dummy photometric errors if necessary
        if photerr is None:
            photerr = np.zeros(self.__nphot)

        # Loop over photometric errors
        kd_tmp = None
        for i in np.ndindex(*np.array(photerr).shape[:-1]):

            # Set bandwidth based on photometric error for this
            # iteration
            kd_tmp = self.__change_bw_err(photerr[i], kd_cur=kd_tmp)
 
            # Grab the clusters that go with this photometric error
            if photprop.ndim < photerr.ndim:
                ph = photprop
            else:
                ph = photprop[i]

            # Loop over clusters
            for j in np.ndindex(*np.array(ph).shape[:-1]):

                # Grab photometric values for this cluster
                ph_tmp = ph[j]

                # Search the data set for the sample cluster closest to
                # the observed luminosities; this will be the starting
                # point for the MCMC
                dims = np.arange(self.__nphys,
                                 self.__nphys+self.__nphot, 
                                 dtype=np.uint32)
                nearpt = np.zeros(self.__nphys+self.__nphot)
                wgt = np.zeros(1)
                dist2 = np.zeros(1)
                self.__clib. \
                    kd_neighbors(kd_tmp, ph_tmp,
                                 dims.ctypes.data_as(POINTER(c_ulong)),
                                 self.__nphot, 1, True, nearpt, 
                                 wgt.ctypes.data_as(POINTER(c_double)),
                                 dist2)

                # Generate a set of starting points by scattering walkers
                # around the starting position
                pos = [nearpt[:self.__nphys] + 
                       self.bandwidth[:self.__nphys] * 
                       np.random.randn(self.__nphys) 
                       for i in range(mc_walkers)]

                # Run the MCMC
                sampler=emcee.EnsembleSampler(mc_walkers, self.__nphys, 
                                              self.__logL, 
                                              args=[ph_tmp, kd_tmp])
                sampler.run_mcmc(pos, mc_steps)

                # Store the result
                samples.append(sampler.chain[:,mc_burn_in:,:].
                               reshape((-1,self.__nphys)))


        # Set bandwidth back to default if necessary
        if photerr is not None:
            self.__restore_bw_err(kd_tmp)

        # Reshape the samples
        samples = np.squeeze(
            np.array(samples).reshape(
                np.broadcast(np.array(photprop)[..., 0],
                             np.array(photerr)[..., 0]).shape +
                samples[0].shape))

        # Return
        return samples



    ##################################################################
    # Function to return the N best matches in the library to an input
    # set of photometric properties
    ##################################################################

[docs]
    def bestmatch(self, phot, photerr=None, nmatch=1, 
                  bandwidth_units=False):
        """
        Searches through the simulation library and returns the closest
        matches to an input set of photometry.

        Parameters:
           phot : arraylike, shape (nfilter) or (..., nfilter)
              array giving the photometric values; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           photerr : arraylike, shape (nfilter) or (..., nfilter)
              array giving photometric errors, which must have the
              same shape as phot; if this is not None,
              then distances will be measured in units of the
              photometric error if bandwidth_units is False, or in
              units of the bandwidth added in quadrature with the
              errors if it is True
           nmatch : int
              number of matches to return; returned matches will be
              ordered by distance from the input
           bandwidth_units : bool
              if False, distances are computed based on the
              logarithmic difference in luminosity; if True, they are
              measured in units of the bandwidth

        Returns:
           matches : array, shape (..., nmatch, nphys + nfilter)
              best matches to the input photometry; shape in the
              leading dimensions will be the same as for phot, and if
              nmatch == 1 then that dimension will be omitted
           dist : array, shape (..., nmatch)
              distances between the matches and the input photometry
        """

        # Safety check
        if (np.array(phot).shape[-1] != self.__nphot) and \
           (self.__nphot > 1):
            raise ValueError("need " + str(self.__nphot) +
                             " photometric properties!")
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot > 1):
                raise ValueError("need " + str(self.__nphot) +
                                 " photometric errors!")
            if np.array(photerr).shape != np.array(phot).shape:
                raise ValueError("phot and photerr must have the same shape!")

        # Reshape arrays if necessary
        if (np.array(phot).shape[-1] != self.__nphot) and \
           (self.__nphot == 1):
            phot = phot.reshape(phot.shape+(1,))
        if photerr is not None:
            if (np.array(photerr).shape[-1] != self.__nphot) and \
               (self.__nphot == 1):
                photerr = photerr.reshape(photerr.shape+(1,))

        # Figure out how many distinct photometric values we've been
        # given, and how many sets of photometric errors
        nphot = np.array(phot).size//self.__nphot
        nphot_err = np.array(photerr).size//self.__nphot

        # Figure out what shape the output should have
        if nphot == 1 and nphot_err == 1:
            outshape = [self.__nphys + self.__nphot]
        elif photerr is None:
            outshape = list(phot.shape[:-1]) + \
                       [self.__nphys + self.__nphot]
        else:
            outshape = list(
                np.broadcast(np.array(phot)[..., 0],
                             np.array(photerr)[..., 0]).shape) + \
                [self.__nphys + self.__nphot]
        if nmatch > 1:
            outshape.insert(-1, nmatch)

        # Create array to hold output
        matches = np.zeros(outshape)
        if len(outshape) > 1:
            wgts = np.zeros(outshape[:-1])
            d2 = np.zeros(outshape[:-1])
        else:
            wgts = np.zeros([1])
            d2 = np.zeros([1])

        # Do we have errors?
        if photerr is None:

            # No, so just call the c neighbor-finding routine
            dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
                             dtype=c_ulong)
            self.__clib.kd_neighbors_vec(
                self.__kd, np.ravel(phot), 
                dims.ctypes.data_as(POINTER(c_ulong)),
                self.__nphot, np.array(phot).size // self.__nphot,
                nmatch, bandwidth_units, np.ravel(matches),
                wgts.ctypes.data_as(POINTER(c_double)), 
                np.ravel(d2))

        elif nphot_err == 1:

            # Yes, we have errors, but only one set, so no need to
            # loop

            # Change the bandwidth to match the input errors
            kd_tmp = self.__change_bw_err(
                photerr, err_only = not bandwidth_units)

            # Now call c neighbor-finding routine
            dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
                             dtype=c_ulong)
            self.__clib.kd_neighbors_vec(
                kd_tmp, np.ravel(phot), 
                dims.ctypes.data_as(POINTER(c_ulong)),
                self.__nphot, np.array(phot).size//self.__nphot,
                nmatch, True, np.ravel(matches),
                wgts.ctypes.data_as(POINTER(c_double)), 
                np.ravel(d2))

            # Restore bandwidth to previous value
            self.__restore_bw_err(kd_tmp)

        else:

            # We have multiple sets of errors, so loop
            ptr = 0
            kd_tmp = None
            for i in np.ndindex(*photerr.shape[:-1]):

                # Set bandwidth based on photometric error for this
                # iteration
                kd_tmp = self.__change_bw_err(
                    photerr[i], kd_cur = kd_tmp,
                    err_only = not bandwidth_units)

                # Call c neighbor-finding routine
                offset1 = ptr*nmatch
                offset2 = offset1*(self.__nphot+self.__nphys)
                dims = np.arange(self.__nphys, self.__nphot+self.__nphys,
                                 dtype=c_ulong)                
                self.__clib.kd_neighbors_vec(
                    kd_tmp, np.ravel(phot[i]), 
                    dims.ctypes.data_as(POINTER(c_ulong)),
                    self.__nphot, np.array(phot[i]).size//self.__nphot,
                    nmatch, True, np.ravel(matches)[offset2:],
                    wgts.ctypes.data_as(POINTER(c_double)), 
                    np.ravel(d2)[offset1:])
                ptr = ptr+1

            # Restore bandwidth to previous value
            self.__restore_bw_err(kd_tmp)

        # Return
        return matches, np.sqrt(d2)


    ##################################################################
    # Function to return the N best matches in the library to an input
    # set of physical properties
    ##################################################################

[docs]
    def bestmatch_phys(self, phys, nmatch=1, bandwidth_units=False):
        """
        Searches through the simulation library and returns the closest
        matches to an input set of photometry.

        Parameters:
           phot : arraylike, shape (nphys) or (..., nphys)
              array giving the physical values; for a
              multidimensional array, the operation is vectorized over
              the leading dimensions
           nmatch : int
              number of matches to return; returned matches will be
              ordered by distance from the input
           bandwidth_units : bool
              if False, distances are computed based on the
              logarithmic difference in physical properties; if True,
              they are measured in units of the bandwidth

        Returns:
           matches : array, shape (..., nmatch, nphys + nfilter)
              best matches to the input properties; shape in the
              leading dimensions will be the same as for phot, and if
              nmatch == 1 then that dimension will be omitted
           dist : array, shape (..., nmatch)
              distances between the matches and the input physical
              properties
        """

        # Safety check
        if (np.array(phys).shape[-1] != self.__nphys) and \
           (self.__nphot > 1):
            raise ValueError("need " + str(self.__nphys) +
                             " physical properties!")


        # Figure out how many points we were given
        phys_tmp = np.array(phys)
        npt = phys_tmp.size // self.__nphys

        # Prepare the output arrays
        outshape = list(phys_tmp.shape[:-1]) + \
                   [self.__nphys + self.__nphot]
        if nmatch > 1:
            outshape.insert(-1, nmatch)

        # Create array to hold output
        matches = np.zeros(outshape)
        if len(outshape) > 1:
            wgts = np.zeros(outshape[:-1])
            d2 = np.zeros(outshape[:-1])
        else:
            wgts = np.zeros([1])
            d2 = np.zeros([1])

        # Call c routine
        dims = np.arange(self.__nphys, dtype=c_ulong)
        self.__clib.kd_neighbors_vec(
            self.__kd, np.ravel(phys_tmp), 
            dims.ctypes.data_as(POINTER(c_ulong)),
            self.__nphys, npt, nmatch, bandwidth_units, 
            np.ravel(matches),
            wgts.ctypes.data_as(POINTER(c_double)), 
            d2)

        # Return
        return matches, np.sqrt(d2)



    ##################################################################
    # Functions to return an approximate kernel density representation
    # of a given point; that is, for an input physical point or
    # photometric point (there are two versions below), this routine
    # returns a set of points and weights that can be used to compute
    # an approximation to the PDF for it anywhere in space, much
    # faster than could be done using the full data set
    ##################################################################

[docs]
    def make_approx_phot(self, phys, squeeze=True, filter_ignore=None):
        """
        Returns an object that can be used for a fast approximation of
        the PDF of photometric properties that corresponds to a set of
        physical properties. The PDF produced by summing over the
        points returned is guaranteed to account for at least 1-reltol
        of the marginal photometric probability, and to represent the
        shape of the PDF in photometric space within a local accuracy
        of reltol as well.

        Parameters:
           phys : arraylike, shape (nphys) or (N, nphys)
              the set or sets of physical properties for which the
              approximation is to be generated
           squeeze : bool
              if True, the representation returned will be squeezed to
              minimize the number of points included, using reltol as
              the error tolerance
           filter_ignore : None or listlike of bool
              if None, the kernel density representation returned
              covers all filters; otherwise this must be a listlike of
              bool, one entry per filter, with a value of False
              indicating that filter should be excluded from the
              values returned; suppressing filters can allow for more
              efficient representations

        Returns:
           x : array, shape (M, nphot), or a list of such arrays
              an array containing the list of points to be used for
              the approximation, where nphot is the number of
              photometric filters being returned
           wgts : array, shape (M), or a list of such arrays
              an array containing the weights of the points

        Notes:
           if the requested relative tolerance cannot be reached for
           numerical reasons (usually because the input point is too
           far from the library to allow accurate computation), x and
           wgts will be return as None, and a warning will be issued
        """

        # If given only one set of physical variables, make a
        # temporary list we can loop over
        if hasattr(phys[0], '__iter__'):
            phystmp = phys
        else:
            phystmp = [phys]

        # Specify dimensions to return, and set bandwidth for them
        if filter_ignore is None:
            dim_return_ptr = None
            ndim_return = self.__nphot
            bw = self.__bandwidth[self.__nphys:]
        else:
            dim_return = np.where(np.logical_not(
                np.array(filter_ignore)))[0] + self.__nphys
            dim_return_ptr = dim_return.ctypes.data_as(POINTER(c_ulong))
            ndim_return = len(dim_return)
            bw = self.__bandwidth[dim_return]

        # Loop over input physical variables
        x = []
        wgts = []
        for ph in phystmp:

            # Safety check
            if len(ph) != self.__nphys:
                raise ValueError("need " + str(self.__nphys) + 
                                 " physical properties!")

            # Call c library routine to make the representation
            xout = POINTER(c_double)()
            wgtsout = POINTER(c_double)()
            npts = self.__clib.kd_rep(self.__kd, np.array(ph), 
                                      np.arange(self.__nphys, dtype=c_ulong),
                                      self.__nphys, self.reltol, 
                                      dim_return_ptr, ndim_return,
                                      ctypes.byref(xout), 
                                      ctypes.byref(wgtsout))

            # Check if the c routine encountered an error due to
            # insufficient precision; if so, issue warning and return
            # None in place of xpt and wgts
            if npts == 0:
                warn("bp.make_approx_phot: requested precision cannot be reached")
                return None

            # Call c library routine to squeeze the representation
            if squeeze:
                npts = self.__clib.squeeze_rep(
                    npts, ndim_return, bw, self.reltol,
                    ctypes.byref(xout), ctypes.byref(wgtsout))

            # Convert the returned values into numpy arrays; copy the
            # data so that we can free the c buffers
            xsave = np.copy(npct.as_array(xout, shape=(npts, ndim_return)))
            wgtssave = np.copy(npct.as_array(wgtsout, shape=(npts,)))

            # Free the c buffers
            self.__clib.free_kd_rep(ctypes.byref(xout), 
                                    ctypes.byref(wgtsout))

            # Append to the lists we'll be returning
            x.append(xsave)
            wgts.append(wgtssave)

        # If we were given a single object, return something in the
        # same shape
        if not hasattr(phys[0], '__iter__'):
            x = x[0]
            wgts = wgts[0]

        # Return
        return x, wgts



[docs]
    def make_approx_phys(self, phot, photerr=None, squeeze=True,
                         phys_ignore=None, tol=None):
        """
        Returns an object that can be used for a fast approximation of
        the PDF of physical properties that corresponds to a set of
        photometric properties. The PDF produced by summing over the
        points returned is guaranteed to account for at least 1-reltol
        of the marginal photometric probability, and to represent the
        shape of the PDF in photometric space within a local accuracy
        of reltol as well.

        Parameters:
           phot : arraylike, shape (nfilter) or (N, nfilter)
              the set or sets of photometric properties for which the
              approximation is to be generated
           photerr : arraylike, shape (nfilter) or (N, nfilter)
              array giving photometric errors; the number of elements
              in the output lists will be the size that results from
              broadcasting together the leading dimensions of phot and
              photerr
           squeeze : bool
              if True, the representation returned will be squeezed to
              minimize the number of points included, using reltol as
              the error tolerance
           phys_ignore : None or listlike of bool
              if None, the kernel density representation returned
              covers all physical properties; otherwise this must be a
              listlike of bool, one entry per physical dimension, with
              a value of False indicating that dimension should be
              excluded from the values returned; suppressing
              dimensions can allow for more efficient representations
           tol : float
              if set, this tolerance overrides the value of reltol

        Returns:
           x : array, shape (M, nphys), or a list of such arrays
              an array containing the list of points to be used for
              the approximation, where nphys is the number of
              physical dimensions being returned
           wgts : array, shape (M), or a list of such arrays
              an array containing the weights of the points

        Notes:
           if the requested relative tolerance cannot be reached for
           numerical reasons (usually because the input point is too
           far from the library to allow accurate computation), x and
           wgts will be return as None, and a warning will be issued
        """

        # If given only one set of photometric variables, make a
        # temporary list we can loop over; same for photometric errors
        if hasattr(phot[0], '__iter__'):
            phottmp = phot
        else:
            phottmp = [phot]
        if photerr is not None:
            if hasattr(photerr[0], '__iter__'):
                photerrtmp = photerr
            else:
                photerrtmp = [photerr]
        else:
            photerrtmp = [None]

        # Specify dimensions to return, and set bandwidth for them
        if phys_ignore is None:
            dim_return_ptr = None
            ndim_return = self.__nphys
            bw = self.__bandwidth[:self.__nphys]
        else:
            dim_return = np.where(np.logical_not(
                np.array(phys_ignore)))[0]
            dim_return_ptr = dim_return.ctypes.data_as(POINTER(c_ulong))
            ndim_return = len(dim_return)
            bw = self.__bandwidth[dim_return]

        # Set tolerance
        if tol is None:
            tol = self.reltol

        # Loop over input photometric variables
        x = []
        wgts = []
        for ph in phottmp:

            # Safety check
            if len(ph) != self.__nphot:
                raise ValueError("need " + str(self.__nphot) + 
                                 " photometric properties!")

            # Loop over photometric errors
            kd_tmp = None
            for pherr in photerrtmp:

                # Change the bandwidth if necessary
                if pherr is not None:
                    if len(pherr) != self.__nphot:
                        raise ValueError("need " + str(self.__nphot) + 
                                         " photometric errors!")
                    kd_tmp = self.__change_bw_err(pherr, kd_cur=kd_tmp)

                # Call c library routine to make the representation
                xout = POINTER(c_double)()
                wgtsout = POINTER(c_double)()
                npts = self.__clib.kd_rep(
                    kd_tmp, np.array(ph), 
                    np.arange(self.__nphot, dtype=c_ulong)+self.__nphys,
                    self.__nphot, tol, 
                    dim_return_ptr, ndim_return,
                    ctypes.byref(xout), 
                    ctypes.byref(wgtsout))

                # Check if the c routine encountered an error due to
                # insufficient precision; if so, issue warning and
                # return None in place of xpt and wgts
                if npts == 0:
                    warn("bp.make_approx_phys: requested precision cannot be reached")
                    if kd_tmp is not None:
                        self.__restore_bw_err(kd_tmp)
                    return None

                # Call c library routine to squeeze the representation
                if squeeze:
                    npts = self.__clib.squeeze_rep(
                        npts, ndim_return, bw, self.reltol,
                        ctypes.byref(xout), ctypes.byref(wgtsout))

                # Convert the returned values into numpy arrays; copy the
                # data so that we can free the c buffers
                xsave = np.copy(npct.as_array(xout, 
                                              shape=(npts, ndim_return)))
                wgtssave = np.copy(npct.as_array(wgtsout, shape=(npts,)))

                # Free the c buffers
                self.__clib.free_kd_rep(ctypes.byref(xout), 
                                        ctypes.byref(wgtsout))

                # Append to the lists we'll be returning
                x.append(xsave)
                wgts.append(wgtssave)

        # Reset the bandwidth
        if photerr is not None:
            self.__restore_bw_err(kd_tmp)

        # If we were given a single object, return something in the
        # same shape
        if not hasattr(phot[0], '__iter__'):
            if photerr is None:
                x = x[0]
                wgts = wgts[0]
            elif not hasattr(photerr[0], '__iter__'):
                x = x[0]
                wgts = wgts[0]

        # Return
        return x, wgts

            
    ##################################################################
    # Method to squeeze a representation that has already been created
    ##################################################################

[docs]
    def squeeze_rep(self, x, wgts, dims=None):
        """
        Takes an input array of positions and weights that form a
        kernel density representation and approximates them using
        fewer points, using an error tolerance of reltol

        Parameters:
           x : array, shape (N, ndim)
              an array of points forming a kernel density
              representation; on exit, x will be resized to (M, ndim)
              with M <= N
           wgts : array, shape (N)
              an array of weights for the kernel density
              representation; on exit, wgts will be resized to (M),
              with M <= N
           dims : array, shape (ndim)
              array specifying which dimensions in the kernel density
              representation the coordinates in x correspond to; if
              left as None, they are assumed to correspond to the
              first ndim dimensions in the data set

        Returns:
           Nothing
        """

        # Get the bandwidth
        if dims is None:
            bw = self.__bandwidth[:x.shape[1]]
        else:
            bw = self.__bandwidth[dims]

        # Prepare data to pass to the c routine
        npt = x.shape[0]
        ndim = x.shape[1]
        xtmp = POINTER(c_double)(x.ctypes.data_as(POINTER(c_double)))
        wgtstmp = POINTER(wgts.ctypes.data_as(POINTER(c_double)))

        # Call the c squeeze routine
        npt_out = self.__clib.squeeze_rep(
            npt, ndim, bw, self.reltol,
            ctypes.byref(xtmp), ctypes.byref(wgtstmp))

        # Discard the old metadata for x and wgts, and rebuild them
        # using the new metadata
        x = npct.as_array(xtmp, shape=(npt_out, ndim))
        wgts = npct.as_array(wgtstmp, shape=(ndim,))



    ##################################################################
    # Routines to use the approximate representations returned by
    # make_approx_phys and make_approx_phot to compute marginal
    # posterior probabilities
    ##################################################################


[docs]
    def mpdf_approx(self, x, wgts, dims='phys', dims_return=None,
                    ngrid=64, qmin='all', qmax='all', grid=None,
                    norm=True):
        """
        Returns the marginal posterior PDF computed from a kernel
        density approximation returned by make_approx_phys or
        make_approx_phot. Outputs are computed on a grid of values, in
        the same style as meshgrid.

        Parameters:
           x : array, shape (M, ndim), or a list of such arrays
              array of points retured by make_approx_phot or
              make_approx_phys
           wgts : array, shape (M) or a list of such arrays
              array of weights returned by make_approx_phot or
              make_approx_phys
           dims : 'phys' | 'phot' | arraylike of ints
              dimensions covered by x and wgts; the strings 'phys' or
              'phot' indicate that they cover all physical or
              photometric dimensions, and correspond to the defaults
              returned by make_approx_phys and make_approx_phot,
              respectively; if dims is an array of ints, these specify
              the dimensions covered by x and wgts, where the
              physical dimensions are numbered 0, 1, ... nphys-1, and
              the photometric ones are nphys, nphys+1,
              ... nphys+nphot-1
           dims_return : None or arraylike of ints
              if None, the output PDF has the same dimensions as
              specified in dms; if not, then dimreturn must be a
              subset of dim, and a marginal PDF in certain dimensions
              will be generated
           ngrid : int or listlike containing ints
              number of points in each dimension of the output grid;
              if this is an iterable, it must have the same number of
              elements as idx
           qmin : float | listlike | 'zoom' | 'all' 
              minimum value in the output grid in each quantity; if
              this a float, it is applied to each dimension; if it is
              an iterable, it must contain the same number of elements
              as the number of dimensions being returned, as gives the
              minimum in each dimension; if it is 'zoom' or 'all', the
              minimum is chosen automatically, with 'zoom' focusing on
              a region encompassing the probability maximum, and 'all'
              encompassing all the points in the representation
           qmax : float | listlike | 'zoom' | 'all'
              same as qmin, but for the maximum of the output grid
           grid : listlike of arrays
              set of values defining the grid on which the PDF is to
              be evaluated, in the same format used by meshgrid
           norm : bool
              if True, returned pdf's will be normalized to integrate
              to 1

        Returns:
           grid_out : array
              array of values at which the PDF is evaluated; contents
              are the same as returned by meshgrid
           pdf : array
              array of marginal posterior probabilities at each point
              of the output grid, for each input cluster; the leading
              dimensions match the leading dimensions produced by
              broadcasting the leading dimensions of photprop and
              photerr together, while the trailing dimensions match
              the dimensions of the output grid
        """

        # Set up input and output dimensions
        if dims == 'phys':
            dim_in = np.arange(self.__nphys)
        elif dims == 'phot':
            dim_in = np.arange(self.__nphot) + self.__nphys
        else:
            dim_in = dims
        if dims_return is None:
            dim_out = dim_in
        elif not hasattr(dims_return, '__iter__'):
            dim_out = [dims_return]
            if not dim_out in dim_in:
                raise ValueError("dims_return must be a subset of dims!")
        elif set(dims_return) <= set(dim_in):
            dim_out = dims_return
        else:
            raise ValueError("dims_return must be a subset of dims!")
        nidx = len(dim_out)

        # Set up the output grid
        if grid is not None:
            grid_out = np.meshgrid(grid)
            qmin \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(0,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            qmax \
                = np.array(
                    np.copy(grid_out[(Ellipsis,)+(-1,)*(grid_out.ndim-1)]),
                    dtype=np.double)
            ngrid_tmp = np.squeeze(grid_out).shape
            for i in range(len(dim_out)):
                griddims.append(np.linspace(qmin[i], qmax[i], 
                                            ngrid_tmp[i]))
        else:
            if qmin == 'zoom' or qmin == 'all' or \
               qmax == 'zoom' or qmax == 'all':
                if qmin == 'zoom' or qmin == 'all':
                    get_qmin = True
                    qmin = []
                else:
                    get_qmin = False
                if qmax == 'zoom' or qmax == 'all':
                    qmax = []
                    get_qmax = True
                else:
                    get_qmax = False
                for d in dim_out:
                    xtmp = np.copy(x[:,d])
                    wgttmp = np.copy(wgts)
                    idx = np.argsort(xtmp)
                    xtmp = xtmp[idx]
                    wgttmp = wgttmp[idx]
                    wgtsum = np.cumsum(wgttmp)
                    if get_qmin:
                        if qmin == 'all':
                            qmin.append(xtmp[0])
                        else:
                            qmin.append(xtmp[np.argmax(wgtsum > 0.001)])
                    if get_qmax:
                        if wgtsum[-1] > 0.999 and qmax != 'all':
                            qmax.append(xtmp[np.argmax(wgtsum >
                                                       0.999)])
                        else:
                            qmax.append(xtmp[-1])
                if get_qmin:
                    qmin = np.array(qmin)
                if get_qmax:
                    qmax = np.array(qmax)
            griddims = []
            if nidx > 1:
                # Case for multiple indices
                griddims = []
                if hasattr(ngrid, '__len__'):
                    ngrid_tmp = np.array(ngrid)
                else:
                    ngrid_tmp = np.array([ngrid]*len(dim_out))
                for i in range(len(dim_out)):
                    griddims.append(np.linspace(qmin[i], qmax[i], 
                                                ngrid_tmp[i]))
                grid_out = np.squeeze(np.array(np.meshgrid(*griddims,
                                                           indexing='ij')))
            else:
                # Case for a single index
                grid_out = qmin + \
                           np.arange(ngrid) * \
                           float(qmax-qmin)/(ngrid-1)
                griddims = [grid_out]

        # Compute result; in the multi-dimensional case this involves
        # some tricky array indexing
        if nidx == 1:
            pdf = np.einsum('i,ij', wgts, 
                           np.exp(-np.subtract.outer(
                               x[:,dim_out[0]], grid_out)**2 / 
                                  (2*self.__bandwidth[dim_out[0]]**2)))
                         
        else:
            compsum = np.exp(
                -np.subtract.outer(x[:,dim_out[0]], griddims[0])**2 /
                (2.0*self.__bandwidth[dim_out[0]]**2))
            for d, grd in zip(dim_out[1:], griddims[1:]):
                comp = np.exp(-np.subtract.outer(x[:,d], grd)**2 / \
                              (2.0*self.__bandwidth[d]**2))
                compsum = np.einsum('...j,...k->...jk', compsum, comp)
            pdf = np.einsum('i,i...', wgts, compsum)

        # Normalize if requested
        if norm:

            # Compute the sizes of the output cells
            if nidx == 1:
                cellsize = np.zeros(grid_out.size)
                cellsize[1:-1] = 0.5*(grid_out[2:]-grid_out[:-2])
                cellsize[0] = grid_out[1] - grid_out[0]
                cellsize[-1] = grid_out[-1] - grid_out[-2]
            else:
                # Get the cell sizes in each dimension
                csize = []
                for i in range(nidx):
                    vec = grid_out[(i,)+i*(0,)+(slice(None),) + 
                                   (grid_out.shape[0]-i-1)*(0,)]
                    csize.append(np.zeros(vec.size))
                    csize[i][1:-1] = 0.5*(vec[2:]-vec[:-2])
                    csize[i][0] = vec[1] - vec[0]
                    csize[i][-1] = vec[-1] - vec[-2]
                # Take outer product to get grid of sizes
                cellsize = np.multiply.outer(csize[0], csize[1])
                for i in range(2, nidx):
                    cellsize = np.multiply.outer(cellsize, csize[i])

            # Compute integral
            normfac = np.sum(pdf*cellsize)

            # Normalize
            pdf = pdf/normfac

        # Return
        return grid_out, pdf



    ##################################################################
    # Routines to draw from the kernel density PDF, constraining 
    # dimensions or not
    ##################################################################

[docs]
    def draw_sample(self, photerr=None, nsample=1):
        """
        Returns a randomly-drawn sample of physical and photometric
        properties from the kernel density function

        Parameters:
           nsample : int
              number of random samples to draw for each set of
              physical properties; must be positive
           photerr : arraylike, shape (nphot)
              photometric errors to apply to the output photometry;
              these are added in quadrature with the kernel density
              estimation bandwidth

        Returns:
           samples : array, shape (nsample, nphys+nphot)
              random sample drawn from the kernel density object; for
              the final dimension in the output, the first nphys
              elements are the physical quantities, the next nphot are
              the photometric quantities
        """

        # Safety check
        if (nsample < 1):
            raise ValueError("need to draw at least 1 sample")
        
        # Allocate a random number generator from gsl if we do not
        # already have one
        if not hasattr(self, '__rng'):
            self.__rng = self.__clib.rng_init(0)

        # Allocate array to hold outputs
        samples = np.zeros((nsample, self.__nphys+self.__nphot),
                           dtype=c_double)

        # Apply photometric errors if requested
        if photerr is None:
            kd_tmp = self.__kd
        else:
            kd_tmp = self.__change_bw_err(photerr)

        # Call library function
        self.__clib.kd_pdf_draw(kd_tmp, None, None, 0,
                                nsample, 1, self.__rng,
                                np.ravel(samples))

        # Restore kernel density bandwidth
        if photerr is not None:
            self.__restore_bw_err(kd_tmp)

        # Return samples
        return samples

            
    

[docs]
    def draw_phot(self, physprop, physidx=None, photerr=None,
                  nsample=1):
        """
        Returns a randomly-drawn sample of photometric properties for
        one or more input sets of physical properties.

        Parameters:
           physprop : arraylike
              physical properties to be used; the final dimension of
              the input must have len(physidx) indices, or nphys
              indicates if physidx is None; if the input is a
              multidimensional array, the operation is vectorized over
              the leading dimensions physical properties
           physidx : arraylike
              indices of the physical quantities being constrained; if
              left as None, all physical properties are set, and
              physprop must have a trailing dimension of size equal to
              nphys; otherwise this must be an arraylike of <= nphys
              positive integers, each unique and in the range [0,
              nphys), specying which physical dimensions are
              constrained
           photerr : arraylike, shape (nphot)
              photometric errors to apply to the output photometry;
              these are added in quadrature with the kernel density
              estimation bandwidth
           nsample : int
              number of random samples to draw for each set of
              physical properties; must be positive

        Returns:
           samples : arraylike
              sample of photometric properties; the shape of the
              output is (..., nsample, nphot), where nphot is the
              number of photometric properties, and the leading
              dimension(s) match the leading dimension(s) of physprop
        """

        # Safety check and sanitization on inputs
        if (nsample < 1):
            raise ValueError("need to draw at least 1 sample")
        if physidx is None:
            physidx_ = np.arange(self.__nphys, dtype=c_ulong)
        else:
            physidx_ = np.flatten(np.array(physidx_))
            if (len(physidx_) > self.__nphys) or \
               (len(physidx_) != len(np.unique(physidx))) or \
               (np.amin(physidx_) < 0) or \
               (np.amax(physidx_) >= self.__nphys):
                raise ValueError("physidx must be an array of "
                                 "unique indices from 0 to {:d}".
                                 format(self.__nphys-1))
            
        physprop_ = np.array(physprop)
        if physprop_.shape[-1] != physidx_.size:
            raise ValueError("physidx must have as many elements as "
                             "trailing dimension of physprop")

        # Set up array to hold outputs
        if physprop_.ndim == 1:
            physprop_ = physprop_.reshape((1,)+physprop_.shape)
        sample_dim = physprop_.shape[:-1]
        samples = np.zeros(sample_dim + (nsample, self.__nphot))

        # Apply photometric errors if requested
        if photerr is None:
            kd_tmp = self.__kd
        else:
            kd_tmp = self.__change_bw_err(photerr)

        # Allocate a random number generator from gsl if we do not
        # already have one
        if not hasattr(self, '__rng'):
            self.__rng = self.__clib.rng_init(0)

        # Loop over leading dimensions of input physical properties
        for i in np.ndindex(*physprop_.shape[:-1]):

            # Call library function
            self.__clib.kd_pdf_draw(
                kd_tmp,
                physprop_[i].ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
                physidx_.ctypes.data_as(ctypes.POINTER(c_ulong)),
                physidx_.size,
                nsample, 0,
                self.__rng,
                np.ravel(samples[i]))

        # Restore kernel density bandwidth
        if photerr is not None:
            self.__restore_bw_err(kd_tmp)

        # Reshape output if needed
        if physprop_.ndim == 1:
            samples = samples[0]

        # Return samples
        return samples
Source code for slugpy.bayesphot.bp

slug

Navigation

Related Topics