Source code for slugpy.cluster_slug.cluster_slug

"""
This defines a class that can be used to estimate the PDF of star
cluster properties (mass, age, extinction) from a set of input
photometry in various bands.
"""

import numpy as np
import copy
import os
import os.path as osp
import warnings
from copy import deepcopy
import errno
try:
    # Python 3
    from urllib.request import urlopen
except ImportError:
    # Python 2
    from urllib2 import urlopen

# Import the data reading and Bayesian inference stuff we need
from ..bayesphot import bp
from ..read_cluster_prop import read_cluster_prop
from ..read_cluster_phot import read_cluster_phot

##################################################################
# Sample density of the default library                          #
##################################################################
def _default_sample_density(physprop):
    logm = physprop[:,0]
    logt = physprop[:,1]
    sden = np.ones(len(logm))
    sden[logm > 4] = sden[logm > 4] * 1.0/10.**(logm[logm > 4]-4)
    sden[logt > 8] = sden[logt > 8] * 1.0/10.**(logt[logt > 8]-8)
    return sden

##################################################################
# Define the cluster_slug class                                  #
##################################################################

[docs]class cluster_slug(object): """ A class that can be used to estimate the PDF of star cluster physical properties from a set of input photometry in various bands. Properties priors : array, shape (N) | callable | None prior probability on each data point; interpretation depends on the type passed; array, shape (N): values are interpreted as the prior probability of each data point; callable: the callable must take as an argument an array of shape (N, nphys), and return an array of shape (N) giving the prior probability at each data point; None: all data points have equal prior probability abstol : float absolute error tolerance for kernel density estimation reltol : float relative error tolerance for kernel density estimation thread_safe : bool if True, the computation routines will run in thread-safe mode, allowing use with multiprocessing; this incurs a small performance penalty Methods physprop() : returns the list of physical properties available for inference filters() : returns list of filters available in the library filtersets() : return a list of the currently-loaded filter sets filter_units() : returns units for available filters add_filters() : adds a set of filters for use in parameter estimation logL() : compute log likelihood at a particular set of physical and photometric parameters mpdf() : computer marginal posterior probability distribution for one or more physical properties from a set of photometric measurements mcmc() : do MCMC estimation of the posterior PDF on a set of photometric measurments bestmatch() : find the simulations in the library that are the closest matches to the input photometry make_approx_phot() : given a set of physical properties, return a set of points that can be used for fast approximation of the corresponding photometric properties make_approx_phys() : given a set of photometric properties, return a set of points that can be used for fast approximation of the corresponding physical properties draw_sample(): draw a random sample of cluster physical and photometric properties draw_phot(): given a set of physical properties, return a randomly-selected set of photometric properties """ ################################################################## # Initializer method ##################################################################
[docs] def __init__(self, libname=None, filters=None, photsystem=None, lib=None, bw_phys=0.1, bw_phot=None, ktype='gaussian', priors=None, sample_density=None, pobs=None, reltol=1.0e-2, abstol=1.0e-8, leafsize=16, use_nebular=True, use_extinction=True, use_phot_as_phys=[], thread_safe=True, pruning=False, caching='none', vp_list=[]): """ Initialize a cluster_slug object. Parameters libname : string name of the SLUG model to load; if left as None, the default is $SLUG_DIR/cluster_slug/modp020_chabrier_MW lib : object a library read by the read_cluster function; if specified this overrides the libname option; the library must contain both physical properties and photometry, and must include filter data; if this is not None, then the photsystem keyword is ignored filters : iterable of stringlike list of filter names to be used for inferenence photsystem : None or string If photsystem is None, the library will be left in whatever photometric system was used to write it. Alternately, if it is a string, the data will be converted to the specified photometric system. Allowable values are 'L_nu', 'L_lambda', 'AB', 'STMAG', and 'Vega', corresponding to the options defined in the SLUG code. Once this is set, any subsequent photometric data input are assumed to be in the same photometric system. bw_phys : 'auto' | float | array, shape (2) | array, shape (3) bandwidth for the physical quantities in the kernel density estimation; if set to 'auto', the bandwidth will be estimated automatically; if set to a scalar quantity, this will be used for all physical quantities; if set to an array, the array must have 2 elements if use_extinction is False, or 3 if it is True bw_phot : None | 'auto' | float | array bandwidth for the photometric quantities; if set to None, defaults to 0.25 mag / 0.1 dex; if set to 'auto', bandwidth is estimated automatically; if set to a float, this bandwidth is used for all photometric dimensions; if set to an array, the array must have the same number of dimensions as len(filters) ktype : string type of kernel to be used in densty estimation; allowed values are 'gaussian' (default), 'epanechnikov', and 'tophat'; only Gaussian can be used with error bars priors : array, shape (N) | callable | None prior probability on each data point; interpretation depends on the type passed; array, shape (N): values are interpreted as the prior probability of each data point; callable: the callable must take as an argument an array of shape (N, nphys), and return an array of shape (N) giving the prior probability at each data point; None: all data points have equal prior probability pobs : array, shape (N) | callable | None probability of being observed on each data point; interpretation depends on the type passed; array, shape (N): values are interpreted as the prior probability of each data point; callable: the callable must take as an argument an array of shape (N, nphys), and return an array of shape (N) giving the prior probability at each data point; None: all data points have equal prior probability sample_density : array, shape (N) | callable | 'auto' | None the density of the data samples at each data point; this need not match the prior density; interpretation depends on the type passed; array, shape (N): values are interpreted as the density of data sampling at each sample point; callable: the callable must take as an argument an array of shape (N, nphys), and return an array of shape (N) giving the sampling density at each point; 'auto': the sample density will be computed directly from the data set; note that this can be quite slow for large data sets, so it is preferable to specify this analytically if it is known; None: data are assumed to be uniformly sampled, or to be sampled as the default library is if libname is also None reltol : float relative error tolerance; errors on all returned probabilities p will satisfy either abs(p_est - p_true) <= reltol * p_est OR abs(p_est - p_true) <= abstol, where p_est is the returned estimate and p_true is the true value abstol : float absolute error tolerance; see above leafsize : int number of data points in each leaf of the KD tree use_nebular : bool if True, photometry including nebular emission will be used if available; if not, nebular emission will be omitted use_extinction : bool if True, photometry including extinction will be used; if not, it will be omitted, and in this case no results making use of the A_V dimension will be available use_phot_as_phys : listlike a list of photometric filters to be treated as "physical properties", so that inference can be carried out on them from other photometric bands; for example, setting this to [ "Lbol", "QH0" ] would make it possible to derive the marginal posterior probability of bolometric luminosity or hydrogen-ionizing luminosity frm the observed photometry thread_safe : bool if True, cluster_slug will make extra copies of internals as needed to ensure thread safety when the computation routines (logL, mpdf, mcmc, bestmatch, make_approx_phot, make_approx_phys, mpdf_approx) are used with multiprocessing; this incurs a minor performance penalty, and can be disabled by setting to False if the code will not be run with the multiprocessing module pruning : bool if True, the underlying bayesphot objects will be pruned of clusters for which pobs or priors are zero, speeding up evaluations; the current implementation is limited in that pruning for priors is only done when the cluster_slug object is instantiated, and pruning for pobs is only done when each filter set is added, so the list of pruned clusters cannot be modified later caching : 'aggressive' | 'lazy' | 'none' strategy for caching subsets of the data with some dimensions marginalised out; behavior is as follows: 'agressive' on construction, store sorted data for fast calculation of 1D PDFs of variables by themselves, and 1D PDFs of all physical variables marginalised over all other physical variables; this significantly increases the memory footprint and construction time, but greatly speeds up subsequent evaluation of any of these quantities, and is generally the best choice for prudction work in parallel 'lazy' sorted data sets for fast computation are created and cached as needed; this will make the first computation of any marginal PDF slower, but speed up all subsequent ones, without imposing any extra time at initial construction; this mode is generally best for interactive work, but is not thread_safe = True; memory cost depends on how many different marginal PDF combinations are calculated, but is always less than aggressive 'none' no caching is performed automatically; the user may still manually cache data by calling the make_cache method vp_list : list A list with an element for each of the variable parameters in the data. An element is set to True if we wish to use that parameter here, or False if we do not. Returns Nothing Raises IOError, if the library cannot be found """ # If using the default library, assign the library name if libname is None and lib is None: self.__libname = osp.join('cluster_slug', 'modp020_chabrier_MW') if 'SLUG_DIR' in os.environ: self.__libname = osp.join(os.environ['SLUG_DIR'], self.__libname) else: self.__libname = libname # If using a library passed in, store a pointer to is if lib is not None: self.__lib = lib self.__libname = None else: self.__lib = None # Load the cluster physical properties try: if self.__lib is None: prop = read_cluster_prop(self.__libname) else: prop = self.__lib except IOError: # If we're here, we failed to load the library. If we were # given a library file name explicitly, just raise an # error. if libname is not None: raise IOError(errno.ENOENT, "unable to open library {}". format(self.__libname)) # If we've made it to here, we were asked to open the # default library but failed to do so. Check if the # failure could be because we don't have astropy and thus # can't open fits files. If that's the cause, print out a # helpful error message. try: import astropy.io.fits as fits except ImportError: raise IOError(errno.EIO, "failed to read default cluster_slug " + "library cluster_slug/clusterslug_mw " + "due to missing " + "astropy.io.fits; install astropy " + "or specify a library in a non-FITS " + "format") # If we're here, we couldn't open the default library, and # it's not because we don't have FITS capability. The file # must not exist, or must be damaged. Check if we're in # interactive mode. If not, just raise an error and # suggest the user to go get the library file. errstr = "Unable to open default cluster_slug " + \ "library file cluster_slug/clusterslug_mw." import __main__ as main if hasattr(main, '__file__'): # We're not interactive; just raise an error raise IOError(errno.EIO, errstr + " " + "Try downloading it from " + "https://sites.google.com/site/runslug/data") # If we're here, we don't have the library file, but we # are in interactive mode. Thus offer the user an option # to go download the file now. usr_response \ = raw_input(errstr + " Would you like to download it " "now (warning: 20 GB)? [y/n] ").\ lower().strip() if not usr_response in ['yes', 'y', 'ye']: # User didn't say yes, so raise error raise IOError(errno.EIO, "Unable to proceeed") # If we're here, download the files print("Fetching modp020_chabrier_MW_cluster_prop " + "(this may take a while)...") url = urllib2.urlopen( 'https://www.dropbox.com/s/tg2ogad713nfux0/modp020_chabrier_MW_cluster_prop.fits?dl=0') rawdata = url.read() url.close() fp = open(osp.join(osp.dirname(self.__libname), 'modp020_chabrier_MW_cluster_phot.fits'), 'wb') fp.write(rawdata) fp.close() print("Fetching modp020_chabrier_MW_cluster_phot.fits " + "(this make take a while)...") url = urllib2.urlopen( 'https://www.dropbox.com/s/inmeeuldkxsazzs/' 'modp020_chabrier_MW_cluster_phot.fits?dl=0') rawdata = url.read() url.close() fp = open(osp.join(osp.dirname(self.__libname), 'modp020_chabrier_MW_cluster_phot.fits'), 'wb') fp.write(rawdata) fp.close() # Now try reading the data try: prop = read_integrated_prop(self.__libname) except IOError: raise IOError(errno.EIO, "still unable to open default library") # Find out how many variable parameters we have and how many # we want to use nvp_total = np.size(vp_list) nvp = np.size(np.where(vp_list)) if nvp_total > 0: print("Total Number of Variable Parameters in data: " +str(nvp_total)) print("Number of Variable Parameters to use: "+str(nvp)) # Record available filters if self.__lib is None: filter_info = read_cluster_phot(self.__libname, filters_only=True, nofilterdata=True) else: filter_info = self.__lib self.__allfilters = filter_info.filter_names self.__allunits = filter_info.filter_units # Set number and names of physical properties self.__physprop = [ "log_mass", "log_age"] if use_extinction: self.__physprop.append("A_V") for i in range(nvp): self.__physprop.append("VP"+str(i)) for pp in use_phot_as_phys: self.__physprop.append(pp) self.__nphys = len(self.__physprop) # Store the physical properties self.__ds_phys = np.zeros((len(prop.id), self.__nphys)) propptr = 0 self.__ds_phys[:,propptr] = np.log10(prop.actual_mass) propptr += 1 self.__ds_phys[:,propptr] = np.log10(prop.time - prop.form_time) if use_extinction: self.__ds_phys[:,propptr] = prop.A_V propptr += 1 # Grab the variable parameters we want to use for vpi in range (0,nvp_total,1): if vp_list[vpi] is True: self.__ds_phys[:,propptr] \ = getattr(prop, "VP"+repr(vpi)) propptr += 1 # Record other stuff that we'll use later if self.__lib is None: self.__photsystem = photsystem else: self.__photsystem = None self.__use_nebular = use_nebular self.__use_extinction = use_extinction self.__ktype = ktype self.__priors = priors if (sample_density is not None) or \ (libname is not None): self.__sample_density = sample_density else: self.__sample_density = _default_sample_density self.__reltol = reltol self.__abstol = abstol self.__bw_phot_default = bw_phot self.__thread_safe = thread_safe self.__pruning = pruning # If we are pruning, use the priors object to figure out which # clusters should be pruned if self.__pruning: if self.__priors is not None: if hasattr(self.__priors, '__call__'): pr = self.__priors(self.__ds_phys) self.__prior_nonzero = pr > 0.0 else: self.__prior_nonzero = priors > 0.0 else: self.__prior_nonzero = np.ones(len(self.__ds_phys), dtype=np.bool) # Set the physical bandwidth self.__bw_phys = copy.deepcopy(bw_phys) # Initialize list of photometric data we've read to empty dict self.__photdata = {} self.__photbw = {} # Initialize an empty list of filter sets self.__filtersets = [] # Save caching mode self.__caching = caching # If we are using some photometric properties as physical # properties to be inferred, read them in now for pp in use_phot_as_phys: if pp not in self.__photdata.keys(): self.load_data(pp) self.__ds_phys[:,propptr] = self.__photdata[pp] propptr += 1 # If we have been given a filter list, create the data set to # go with it if filters is not None: self.add_filters(filters, bandwidth=bw_phot, pobs=pobs)
################################################################## # Method to load the data off disk for a particular filter, or to # set up arrays pointing to data if using a library stored in # memory ##################################################################
[docs] def load_data(self, filter_name, bandwidth=None, force_reload=False): """ Loads photometric data for the specified filter into memory Parameters: filter_name : string name of filter to load bandwidth : float default bandwidth for this filter force_reload : bool if True, reinitialize the data even if has already been stored Returns: None Raises: ValueError, if filter_name is not one of the available filters """ # Do nothing if data has already been read, unless we've been # told to force a re-read if filter_name in self.__photdata.keys() and not force_reload: return # Make sure we have this filter; if not, raise error if filter_name not in self.__allfilters: raise ValueError("no data available for filter {}". format(filter_name)) # Suppress obnoxious numpy warning messages here errstate = np.geterr() np.seterr(divide='ignore', invalid='ignore', over='ignore', under='ignore') # Special case: for ionizing fluxes, always load the # non-nebular, non-extincted value, and don't do any # photometric system conversion if filter_name == 'QH0' or filter_name == 'QHe0' or \ filter_name == 'QHe1': if self.__lib is None: phot = read_cluster_phot(self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True) phdata = np.squeeze(phot.phot) else: phot = self.__lib phdata = np.squeeze( np.copy(self.__lib.phot [:,self.__allfilters.index(filter_name)])) else: # Load data; first try reading the requested combination # of nebular and extincted values if self.__lib is None: phot = read_cluster_phot(self.__libname, read_filters=filter_name, read_nebular=self.__use_nebular, read_extinct=self.__use_extinction, phot_only=True, photsystem=self.__photsystem) else: phot = self.__lib # Make sure we have the data we want; if not, lots of ugly # special cases as fallbacks if (self.__use_nebular and self.__use_extinction): # Wanted nebular + extinction if 'phot_neb_ex' in phot._fields: # Got it if self.__lib is None: phdata = np.squeeze(phot.phot_neb_ex) else: phdata = np.copy(phot.phot_neb_ex [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = False nebular = True extinct = True else: # Didn't get it; try without nebular if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=True, phot_only=True, photsystem=self.__photsystem) if 'phot_ex' in phot._fields: if self.__lib is None: phdata = np.squeeze(phot.phot_ex) else: phdata = np.copy(phot.phot_ex [:,self.__allfilters.index(filter_name)]) warn_nebular = True warn_extinct = False nebular = False extinct = True else: # Couldn't get extinction only; try nebular only if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=True, read_extinct=False, phot_only=True, photsystem=self.__photsystem) if 'phot_neb' in phot._fields: if self.__lib is None: phdata = np.squeeze(phot.phot_neb) else: phdata = np.copy(phot.phot_neb [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = True nebular = True extinct = False else: # Ultimate fallback: no nebular or extinction if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot) else: phdata = np.copy(phot.phot [:,self.__allfilters.index(filter_name)]) warn_nebular = True warn_extinct = True nebular = False extinct = False elif self.__use_nebular: # Want nebular, no extinction if 'phot_neb' in phot._fields: # Got it if self.__lib is None: phdata = np.squeeze(phot.phot_neb) else: phdata = np.copy(phot.phot_neb [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = False nebular = True extinct = False else: # Didn't get it; go to no nebular or extinction if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot) else: phdata = np.copy(phot.phot [:,self.__allfilters.index(filter_name)]) warn_nebular = True warn_extinct = False nebular = False extinct = False elif self.__use_extinction: # Wanted extinction, no nebular if 'phot_ex' in phot._fields: # Got it if self.__lib is None: phdata = np.squeeze(phot.phot_ex) else: phdata = np.copy(phot.phot_ex [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = False nebular = False extinct = True else: # Didn't get it; go to no nebular or extinction if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot) else: phdata = np.copy(phot.phot [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = True nebular = False extinct = False elif self.__use_extinction is False and self.__use_nebular is False: # Didn't get it; go to no nebular or extinction if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot) else: phdata = np.copy(phot.phot [:,self.__allfilters.index(filter_name)]) warn_nebular = False warn_extinct = False nebular = False extinct = False # Make sure data aren't NaN's. If they are, that means we # don't have extinction for this filter, so we need to use # non-extincted data. if np.isnan(phdata[0]): warn_extinct = True if nebular: if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=True, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot_neb) else: phdata = np.copy(phot.phot_neb [:,self.__allfilters.index(filter_name)]) else: if self.__lib is None: phot = read_cluster_phot( self.__libname, read_filters=filter_name, read_nebular=False, read_extinct=False, phot_only=True, photsystem=self.__photsystem) phdata = np.squeeze(phot.phot) else: phdata = np.copy(phot.phot [:,self.__allfilters.index(filter_name)]) else: warn_extinct = False # Issue warnings if necessary if warn_nebular: warnstr = ("cluster_slug: nebular data requested for "+ \ "filter {}, but is not available; using non-"+ \ "nebular data instead").format(filter_name) warnings.warn(warnstr) if warn_extinct: warnstr = ("cluster_slug: extincted data requested for "+ \ "filter {}, but is not available; using non-"+ \ "extinced data instead").format(filter_name) warnings.warn(warnstr) # Take the log of the photometric values if they're recorded # in a linear system; also set the bandwidth based on whether # we're in a magnitude system or not; we can override this # later if we want if 'mag' not in phot.filter_units[0]: phdata[phdata <= 0] = 1.0e-99 phdata = np.log10(phdata) if bandwidth is None: if self.__bw_phot_default is None: self.__photbw[filter_name] = 0.1 else: self.__photbw[filter_name] = self.__bw_phot_default else: self.__photbw[filter_name] = bandwidth else: if bandwidth is None: if self.__bw_phot_default is None: self.__photbw[filter_name] = 0.25 else: self.__photbw[filter_name] = self.__bw_phot_default else: self.__photbw[filter_name] = bandwidth # Fix any inf's that were generated by photometric system # conversions or taking logs phdata[np.isinf(phdata)] = 99.0 # Store the data self.__photdata[filter_name] = phdata # Restore the numpy error state np.seterr(divide=errstate['divide'], over=errstate['over'], under=errstate['under'], invalid=errstate['invalid'])
################################################################## # Method to return information on available filters ##################################################################
[docs] def physprop(self): """ Returns list of all physical properties available for inference Parameters: None Returns: physprop : list of string list of available physical properties """ return copy.deepcopy(self.__physprop)
[docs] def filters(self): """ Returns list of all available filters Parameters: None Returns: filters : list of strings list of available filter names """ return copy.deepcopy(self.__allfilters)
[docs] def filter_units(self): """ Returns list of all available filter units Parameters: None Returns: units : list of strings list of available filter units """ return copy.deepcopy(self.__allunits)
[docs] def filtersets(self): """ Returns list of all currently-loaded filter sets Parameters: None Returns: filtersets : list of list of strings list of currently-loaded filter sets """ return copy.deepcopy([f['filters'] for f in self.__filtersets])
################################################################## # Method to prepare to analyze a particular set of filters ##################################################################
[docs] def add_filters(self, filters, bandwidth=None, pobs=None): """ Add a set of filters to use for cluster property estimation Parameters filters : iterable of stringlike list of filter names to be used for inferenence bandwidth : None | 'auto' | float | array bandwidth for the photometric quantities; if set to None, the bandwidth is unchanged for an existing filter set, and for a newly-created one the default physical and photometric bandwidths are used; if set to 'auto', bandwidth is estimated automatically; if set to a float, this bandwidth is used for all physical and photometric dimensions; if set to an array, the array must have the same number of entries as nphys+len(filters) pobs : array, shape (N) | callable | 'equal' | None the probability that a particular object would be observed, which is used, like prior, to weight the library; interpretation depends on type. 'equal' means all objects are equally likely to be observed, array is an array giving the observation probability of each object in the library, and callable means must be a function that takes an array containing the photometry, of shape (N, nhpot), as an argument, and returns an array of shape (N) giving the probability of observation for that object. Finally, None leaves the observational probability unchanged Returns nothing """ # If we already have this filter set in our dict, just set the # bandwidth and pobs and return for i, f in enumerate(self.__filtersets): if filters == f['filters']: if bandwidth is not None: self.__filtersets[i]['bp'].bandwidth = bandwidth if pobs is not None: if pobs == 'equal': self.__filtersets[i]['bp'].pobs = None elif not self.__pruning or hasattr(pobs, '__call__'): self.__filtersets[i]['bp'].pobs = pobs else: pobs_tmp = pobs[f['keep']] self.__filtersets[i]['bp'].pobs = pobs_tmp return # We're adding a new filter set, so save its name newfilter = { 'filters' : copy.deepcopy(filters) } # Construct data set to use with this filter combination, and # add the physical property data dataset = np.zeros((self.__ds_phys.shape[0], self.__nphys+len(filters))) dataset[:,:self.__nphys] = self.__ds_phys # Loop over filters for i, f in enumerate(filters): # Do we have this filter loaded already? If not, read it. if f not in self.__photdata.keys(): if bandwidth is None or type(bandwidth) is str: self.load_data(f) else: if hasattr(bandwidth, '__iter__'): self.load_data(f, bandwidth=bandwidth[i]) else: self.load_data(f, bandwidth=bandwidth) # Add data for this filter dataset[:,self.__nphys+i] = self.__photdata[f] # Make copies of the priors and sample_density for use by the # derived bayesphot class; we make copies because bp needs to # be able to sort these priors = deepcopy(self.__priors) sample_density = deepcopy(self.__sample_density) # Are we pruning? if self.__pruning: # Get pruning based on p_obs if hasattr(pobs, '__call__'): po = pobs(dataset[:,self.__nphys:]) pobs_nonzero = po > 0.0 elif pobs is not None: pobs_nonzero = pobs > 0.0 else: pobs_nonzero = np.ones(len(dataset), dtype=np.bool) # Get list of clusters with p_obs > 0 and prior > 0; store # this because we will need it to change p_obs or prior # later newfilter['keep'] = np.logical_and(self.__prior_nonzero, pobs_nonzero) # Prune the data set dataset = dataset[newfilter['keep']] # Make pruned versions of sample_density, priors, and pobs # if they are not callables; if they are callables, they # will be computed on the fly by the bp object, and we # don't have to worry about it if not hasattr(sample_density, '__call__') and \ sample_density is not None: sample_density = sample_density[newfilter['keep']] if not hasattr(priors, '__call__') and priors is not None: priors = priors[newfilter['keep']] if not hasattr(pobs, '__call__') and pobs is not None: pobs_tmp = pobs[newfilter['keep']] else: pobs_tmp = pobs else: # Just set pointer to pobs if we're not doing any pruning pobs_tmp = pobs # Store the new data set newfilter['dataset'] = dataset # Set bandwidth if self.__bw_phys == 'auto' or bandwidth == 'auto': bw = 'auto' else: bw = np.zeros(self.__nphys+len(filters)) bw[:self.__nphys] = self.__bw_phys if bandwidth is not None: bw[self.__nphys:] = bandwidth else: for i in range(len(filters)): bw[self.__nphys+i] = self.__photbw[f] # Build the bp object newfilter['bp'] \ = bp(newfilter['dataset'], self.__nphys, filters = filters, bandwidth = bw, ktype = self.__ktype, priors = priors, pobs = pobs_tmp, sample_density = sample_density, reltol = self.__reltol, abstol = self.__abstol, thread_safe = self.__thread_safe, caching = self.__caching) # Save to the master filter list self.__filtersets.append(newfilter)
################################################################## # Method to delete a set of filters ##################################################################
[docs] def del_filters(self, filters): """ Remove a set of filters, freeing the memory associated with them. Note that this does not delete the underlying library data, just the data for the KD tree used internally. Parameters filters : iterable of stringlike list of filter names Returns Nothing Raises KeyError if the input set of filters is not loaded """ # Figure out which filter set to delete idel = -1 for i, f in enumerate(self.__filtersets): if filters == f['filters']: idel = i break if idel == -1: raise KeyError("filter set not found") # Delete filter set; destroy the bayesphot object explicitly # to ensure that it is remove from scope immediately, since it # can be rather large del self.__filtersets[idel]['bp'] self.__filtersets.pop(idel)
################################################################## # Define the priors property. This just wraps around the # corresponding property defined for bp objects, but we do have to # be careful about pruning if it is turned on ################################################################## @property def priors(self): return self.__priors @priors.setter def priors(self, pr): self.__priors = pr if not self.__pruning or hasattr(pr, '__call__') or \ pr is None: for f in self.__filtersets: f['bp'].priors = deepcopy(self.__priors) else: # If the data have been pruned before passing to the bp # object, we need to apply the same pruning to the new # priors for f in self.__filtersets: pr_tmp = pr[f['keep']] f['bp'].priors = pr_tmp ################################################################## # Define the pobs property. This wraps around the corresponding # property for bp objects, with the complication that if there's # more than one filter set is has to accept or return a list, and # that we have to handle pruning ################################################################## @property def pobs(self): po = [] for f in self.__filtersets: po.append(f['bp'].pobs) return po @pobs.setter def pobs(self, po): # If po is an iterable and matches the number of filter sets, # we assume that this is an observation probability to be # assigned to each filter set. If not, we try assigning it to # each filter set if hasattr(po, '__iter__'): if len(po) == len(self.__filtersets): for f, p in zip(self.__filtersets, po): if not self.__pruning or hasattr(p, '__call__'): f['bp'].pobs = p else: p_tmp = p[f['keep']] f['bp'].pobs = p_tmp return else: if not self.__pruning or hasattr(po, '__call__') \ or po is None: for f in self.__filtersets: f['bp'].pobs = deepcopy(po) else: for f in self.__filtersets: po_tmp = deepcopy(po)[f['keep']] f['bp'].pobs = po_tmp ################################################################## # Define properties that update the current values for the # cluster_slug object, and also update all the child bp objects. ################################################################## @property def abstol(self): return self.__abstol @abstol.setter def abstol(self, newtol): self.__abstol = newtol for f in self.__filtersets: f['bp'].abstol = self.__abstol @property def reltol(self): return self.__reltol @reltol.setter def reltol(self, newtol): self.__reltol = newtol for f in self.__filtersets: f['bp'].reltol = self.__reltol @property def thread_safe(self): return self.__thread_safe @thread_safe.setter def thread_safe(self, new_thread_safe): self.__thread_safe = new_thread_safe for f in self.__filtersets: f['bp'].thread_safe = self.__thread_safe ################################################################## # Methods to make and destroy caches ##################################################################
[docs] def make_cache(self, margindims, filters=None): """ This method builds a cache to do faster calculation of PDFs where certain dimensions are marginalised out. If such caches exist, they are used automatically by all the computation methods. Parameters margindims : listlike of integers list of dimensions to be marginalised out; physical dimensions go from 0 - nphys-1, photometric dimensions from nphys to nphys + nphot - 1; note that the indexing depends on the filter set specified by filters filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns Nothing """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ make_cache(margindims) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Make cache bp.make_cache(margindims)
[docs] def clear_cache(self, margindims=None): """ This method deletes from the cache Parameters margindims : listlike of integers list of marginalised dimensions that should be removed from the cache, in the same format as make_cache; if left as None, the cache is completely emptied filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns Nothing """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ clear_cache(margindims) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Make cache bp.clear_cache(margindims)
################################################################## # The functions below just wrap around the bayesphot functions of # the same name. They all have in common that they accept an # additional keyword argument, filters, which specifies the filter # set to use. ##################################################################
[docs] def logL(self, physprop, photprop, photerr=None, filters=None, margindim=None): """ This function returns the natural log of the likelihood function evaluated at a particular log mass, log age, extinction, and set of log luminosities Parameters: physprop : arraylike, shape (nhpys) or (..., nphys) array giving values of the log M, log T, and A_V; for a multidimensional array, the operation is vectorized over the leading dimensions; if created with use_extinct = False, the A_V dimension should be omitted. Will also include variable any variable parameters VPx if they are requested. photprop : arraylike, shape (nfilter) or (..., nfilter) array giving the photometric values; for a multidimensional array, the operation is vectorized over the leading dimensions photerr : arraylike, shape (nfilter) or (..., nfilter) array giving photometric errors; for a multidimensional array, the operation is vectorized over the leading dimensions filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default margindim : int | arraylike of ints | None The index or indices of the physical or photometric properties to be maginalized over, numbered from 0 - nphys-1 for physical properties and from nphys - nfilter + nfilter - 1 for photometric properties. If this keyword is set, then physprop and/or photprop should have fewer than nphys or nphot elements due to the omission of marginalised dimensions. If all physical or photometric dimensions are marginalised out, that corresponding argument for physprop or photprop should be set to None Returns: logL : float or arraylike natural log of the likelihood function """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ logL(physprop, photprop, photerr=photerr, margindim=margindim) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the logL method return bp.logL(physprop, photprop, photerr=photerr, margindim=margindim)
[docs] def mpdf(self, idx, photprop, photerr=None, ngrid=128, qmin=None, qmax=None, grid=None, norm=True, filters=None): """ Returns the marginal probability for one or mode physical quantities for one or more input sets of photometric properties. Output quantities are computed on a grid of values, in the same style as meshgrid Parameters: idx : int or listlike containing ints index of the physical quantity whose PDF is to be computed; 0 = log M, 1 = log T, 2 = A_V, (2 or 3)+x = VPx; if this is an iterable, the joint distribution of the indicated quantities is returned photprop : arraylike, shape (nfilter) or (..., nfilter) array giving the photometric values; for a multidimensional array, the operation is vectorized over the leading dimensions photerr : arraylike, shape (nfilter) or (..., nfilter) array giving photometric errors; for a multidimensional array, the operation is vectorized over the leading dimensions ngrid : int or listlike containing ints number of points in each dimension of the output grid; if this is an iterable, it must have the same number of elements as idx qmin : float or listlike minimum value in the output grid in each quantity; if left as None, defaults to the minimum value in the library; if this is an iterable, it must contain the same number of elements as idx qmax : float or listlike maximum value in the output grid in each quantity; if left as None, defaults to the maximum value in the library; if this is an iterable, it must contain the same number of elements as idx grid : listlike of arrays set of values defining the grid on which the PDF is to be evaluated, in the same format used by meshgrid norm : bool if True, returned pdf's will be normalized to integrate to 1 filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: grid_out : array array of values at which the PDF is evaluated; contents are the same as returned by meshgrid pdf : array array of marginal posterior probabilities at each point of the output grid, for each input cluster; the leading dimensions match the leading dimensions produced by broadcasting the leading dimensions of photprop and photerr together, while the trailing dimensions match the dimensions of the output grid """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ mpdf(idx, photprop, photerr=photerr, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the logL method return bp.mpdf(idx, photprop, photerr=photerr, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm)
[docs] def mpdf_phot(self, idx, physprop, ngrid=128, qmin=None, qmax=None, grid=None, norm=True, filters=None): """ Returns the marginal probability for one or more photometric quantities corresponding to an input set or distribution of physical properties. Output quantities are computed on a grid of values, in the same style as meshgrid. Parameters: idx : int or listlike containing ints index of the photometric quantity whose PDF is to be computed, starting at 0; indices correspond to the order of elements in the filters argument; if this is an iterable, the joint distribution of the indicated quantities is returned physprop : arraylike, shape (nphys) or (..., nphys) physical properties to be used; if this is an array of nphys elements, these give the physical properties; if it is a multidimensional array, the operation is vectoried over the leading dimensions physical properties -- the function must take an array of (nphys) elements as an input, and return a floating point value representing the PDF evaluated at that set of physical properties as an output ngrid : int or listlike containing ints number of points in each dimension of the output grid; if this is an iterable, it must have the same number of elements as idx qmin : float or listlike minimum value in the output grid in each quantity; if left as None, defaults to the minimum value in the library; if this is an iterable, it must contain the same number of elements as idx qmax : float or listlike maximum value in the output grid in each quantity; if left as None, defaults to the maximum value in the library; if this is an iterable, it must contain the same number of elements as idx grid : listlike of arrays set of values defining the grid on which the PDF is to be evaluated, in the same format used by meshgrid norm : bool if True, returned pdf's will be normalized to integrate to 1 filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: grid_out : array array of values at which the PDF is evaluated; contents are the same as returned by meshgrid pdf : array array of marginal posterior probabilities at each point of the output grid, for each input set of properties; the leading dimensions match the leading dimensions produced by broadcasting the leading dimensions of photprop and photerr together, while the trailing dimensions match the dimensions of the output grid """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ mpdf_phot(idx, physprop, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the logL method return bp.mpdf_phot(idx, physprop, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm)
[docs] def mpdf_gen(self, fixeddim, fixedprop, margindim, ngrid=128, qmin=None, qmax=None, grid=None, norm=True, filters=None): """ Returns the marginal probability for one or more physical or photometric properties, keeping other properties fixed and marginalizing over other quantities. This is the most general marginal PDF routine provided. Parameters: fixeddim : int | arraylike of ints | None The index or indices of the physical or photometric properties to be held fixed; physical properties are numbered 0 ... nphys-1, and photometric ones are numbered nphys ... nphys + nphot - 1. This can also be set to None, in which case no properties are held fixed. fixedprop : array | None The values of the properties being held fixed; the size of the final dimension must be equal to the number of elements in fixeddim, and if fixeddim is None, this must be too margindim : int | arraylike of ints | None The index or indices of the physical or photometric properties to be maginalized over, numbered in the same way as with fixeddim; if set to None, no marginalization is performed ngrid : int or listlike containing ints number of points in each dimension of the output grid; if this is an iterable, it must have nphys + nphot - len(fixeddim) - len(margindim) elements qmin : float | arraylike minimum value in the output grid in each quantity; if left as None, defaults to the minimum value in the library; if this is an iterable, it must contain a number of elements equal to nphys + nphot - len(fixeddim) - len(margindim) qmax : float | arraylike maximum value in the output grid in each quantity; if left as None, defaults to the maximum value in the library; if this is an iterable, it must have the same number of elements as qmin grid : listlike of arrays set of values defining the grid on which the PDF is to be evaluated, in the same format used by meshgrid norm : bool if True, returned pdf's will be normalized to integrate to 1 filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: grid_out : array array of values at which the PDF is evaluated; contents are the same as returned by meshgrid pdf : array array of marginal posterior probabilities at each point of the output grid, for each input set of properties; the leading dimensions match the leading dimensions produced by broadcasting the leading dimensions of photprop and photerr together, while the trailing dimensions match the dimensions of the output grid """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ mpdf_gen(fixeddim, fixedprop, margindim, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the logL method return bp.mpdf_gen(fixeddim, fixedprop, margindim, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm)
[docs] def mcmc(self, photprop, photerr=None, mc_walkers=100, mc_steps=500, mc_burn_in=50, filters=None): """ This function returns a sample of MCMC walkers for cluster physical properties Parameters: photprop : arraylike, shape (nfilter) or (..., nfilter) array giving the photometric values; for a multidimensional array, the operation is vectorized over the leading dimensions photerr : arraylike, shape (nfilter) or (..., nfilter) array giving photometric errors; for a multidimensional array, the operation is vectorized over the leading dimensions mc_walkers : int number of walkers to use in the MCMC mc_steps : int number of steps in the MCMC mc_burn_in : int number of steps to consider "burn-in" and discard filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns samples : array array of sample points returned by the MCMC """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ mcmc(photprop, photerr, mc_walkers, mc_steps, mc_burn_in) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the mcmc method return bp.mcmc(photprop, photerr, mc_walkers, mc_steps, mc_burn_in)
[docs] def bestmatch(self, phot, photerr=None, nmatch=1, bandwidth_units=False, filters=None): """ Searches through the simulation library and returns the closest matches to an input set of photometry. Parameters: phot : arraylike, shape (nfilter) or (..., nfilter) array giving the photometric values; for a multidimensional array, the operation is vectorized over the leading dimensions photerr : arraylike, shape (nfilter) or (..., nfilter) array giving photometric errors, which must have the same shape as phot; if this is not None, then distances will be measured in units of the photometric error if bandwidth_units is False, or in units of the bandwidth added in quadrature with the errors if it is True nmatch : int number of matches to return; returned matches will be ordered by distance from the input bandwidth_units : bool if False, distances are computed based on the logarithmic difference in luminosity; if True, they are measured in units of the bandwidth filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: matches : array, shape (..., nmatch, nphys + nfilter) best matches to the input photometry; shape in the leading dimensions will be the same as for phot, and if nmatch == 1 then that dimension will be omitted; in the final dimension, the first 3 elements give log M, log T, and A_V, while the last nfilter give the photometric values; if created with use_extinct = False, the A_V dimension is omitted dist : array, shape (..., nmatch) distances between the matches and the input photometry """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ bestmatch(phot, photerr, nmatch, bandwidth_units) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the bestmatch method return bp.bestmatch(phot, photerr, nmatch, bandwidth_units)
[docs] def bestmatch_phys(self, phys, nmatch=1, bandwidth_units=False, filters=None): """ Searches through the simulation library and returns the closest matches to an input set of photometry. Parameters: phys : arraylike, shape (nphys) or (..., nphys) array giving the physical values; for a multidimensional array, the operation is vectorized over the leading dimensions nmatch : int number of matches to return; returned matches will be ordered by distance from the input bandwidth_units : bool if False, distances are computed based on the logarithmic difference in physical properties; if True, they are measured in units of the bandwidth filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: matches : array, shape (..., nmatch, nphys + nfilter) best matches to the input properties; shape in the leading dimensions will be the same as for phot, and if nmatch == 1 then that dimension will be omitted dist : array, shape (..., nmatch) distances between the matches and the input physical properties """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ bestmatch_phys(phys, nmatch, bandwidth_units) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the bestmatch method return bp.bestmatch_phys(phys, nmatch, bandwidth_units)
[docs] def make_approx_phot(self, phys, squeeze=True, filter_ignore=None, filters=None): """ Returns an object that can be used for a fast approximation of the PDF of photometric properties that corresponds to a set of physical properties. The PDF produced by summing over the points returned is guaranteed to account for at least 1-reltol of the marginal photometric probability, and to represent the shape of the PDF in photometric space within a local accuracy of reltol as well. Parameters: phys : arraylike, shape (nphys) or (N, nphys) the set or sets of physical properties for which the approximation is to be generated squeeze : bool if True, the representation returned will be squeezed to minimize the number of points included, using reltol as the error tolerance filter_ignore : None or listlike of bool if None, the kernel density representation returned covers all filters; otherwise this must be a listlike of bool, one entry per filter, with a value of False indicating that filter should be excluded from the values returned; suppressing filters can allow for more efficient representations filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: x : array, shape (M, nphot), or a list of such arrays an array containing the list of points to be used for the approximation wgts : array, shape (M), or a list of such arrays an array containing the weights of the points Notes: if the requested relative tolerance cannot be reached for numerical reasons (usually because the input point is too far from the library to allow accurate computation), x and wgts will be return as None, and a warning will be issued """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ make_approx_phot(phys, squeeze=squeeze, filter_ignore=filter_ignore) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method return bp.make_approx_phot(phys, squeeze=squeeze, filter_ignore=filter_ignore)
[docs] def make_approx_phys(self, phot, photerr=None, squeeze=True, phys_ignore=None, filters=None, tol=None): """ Returns an object that can be used for a fast approximation of the PDF of physical properties that corresponds to a set of photometric properties. The PDF produced by summing over the points returned is guaranteed to account for at least 1-reltol of the marginal photometric probability, and to represent the shape of the PDF in photometric space within a local accuracy of reltol as well. Parameters: phot : arraylike, shape (nfilter) or (N, nfilter) the set or sets of photometric properties for which the approximation is to be generated photerr : arraylike, shape (nfilter) or (N, nfilter) array giving photometric errors; the number of elements in the output lists will be the size that results from broadcasting together the leading dimensions of phot and photerr squeeze : bool if True, the representation returned will be squeezed to minimize the number of points included, using reltol as the error tolerance phys_ignore : None or listlike of bool if None, the kernel density representation returned covers all physical properties; otherwise this must be a listlike of bool, one entry per physical dimension, with a value of False indicating that dimension should be excluded from the values returned; suppressing dimensions can allow for more efficient representations filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default tol : float if set, this tolerance overrides the value of reltol Returns: x : array, shape (M, nphys), or a list of such arrays an array containing the list of points to be used for the approximation, where nphys is the number of physical dimensions being returned wgts : array, shape (M), or a list of such arrays an array containing the weights of the points Notes: if the requested relative tolerance cannot be reached for numerical reasons (usually because the input point is too far from the library to allow accurate computation), x and wgts will be return as None, and a warning will be issued """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ make_approx_phys(phot, photerr=photerr, squeeze=squeeze, phys_ignore=phys_ignore, tol=tol) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method return bp.make_approx_phys(phot, photerr=photerr, squeeze=squeeze, phys_ignore=phys_ignore, tol=tol)
[docs] def squeeze_rep(self, x, wgts, dims=None, filters=None): """ Takes an input array of positions and weights that form a kernel density representation and approximates them using fewer points, using an error tolerance of reltol Parameters: x : array, shape (N, ndim) an array of points forming a kernel density representation; on exit, x will be resized to (M, ndim) with M <= N wgts : array, shape (N) an array of weights for the kernel density representation; on exit, wgts will be resized to (M), with M <= N dims : array, shape (ndim) array specifying which dimensions in the kernel density representation the coordinates in x correspond to; if left as None, they are assumed to correspond to the first ndim dimensions in the data set filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: Nothing """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it, then return if len(self.__filtersets) == 1: self.__filtersets[0]['bp']. \ squeeze_rep(x, wgts, dims=dims) return else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method and return bp.squeeze_rep(x, wgts, dims=dims) return
[docs] def mpdf_approx(self, x, wgts, dims='phys', dims_return=None, ngrid=64, qmin='all', qmax='all', grid=None, norm=True, filters=None): """ Returns the marginal posterior PDF computed from a kernel density approximation returned by make_approx_phys or make_approx_phot. Outputs are computed on a grid of values, in the same style as meshgrid. Parameters: x : array, shape (M, ndim), or a list of such arrays array of points retured by make_approx_phot or make_approx_phys wgts : array, shape (M) or a list of such arrays array of weights returned by make_approx_phot or make_approx_phys dims : 'phys' | 'phot' | arraylike of ints dimensions covered by x and wgts; the strings 'phys' or 'phot' indicate that they cover all physical or photometric dimensions, and correspond to the defaults returned by make_approx_phys and make_approx_phot, respectively; if dims is an array of ints, these specify the dimensions covered by x and wgts, where the physical dimensions are numbered 0, 1, ... nphys-1, and the photometric ones are nphys, nphys+1, ... nphys+nphot-1 dims_return : None or arraylike of ints if None, the output PDF has the same dimensions as specified in dims; if not, then dims_return must be a subset of dims, and a marginal PDF in certain dimensions will be generated ngrid : int or listlike containing ints number of points in each dimension of the output grid; if this is an iterable, it must have the same number of elements as idx qmin : float | listlike | 'zoom' | 'all' minimum value in the output grid in each quantity; if this a float, it is applied to each dimension; if it is an iterable, it must contain the same number of elements as the number of dimensions being returned, as gives the minimum in each dimension; if it is 'zoom' or 'all', the minimum is chosen automatically, with 'zoom' focusing on a region encompassing the probability maximum, and 'all' encompassing all the points in the representation qmax : float | listlike | 'zoom' | 'all' same as qmin, but for the maximum of the output grid grid : listlike of arrays set of values defining the grid on which the PDF is to be evaluated, in the same format used by meshgrid norm : bool if True, returned pdf's will be normalized to integrate to 1 filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: grid_out : array array of values at which the PDF is evaluated; contents are the same as returned by meshgrid pdf : array array of marginal posterior probabilities at each point of the output grid, for each input cluster; the leading dimensions match the leading dimensions produced by broadcasting the leading dimensions of photprop and photerr together, while the trailing dimensions match the dimensions of the output grid """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ mpdf_approx(x, wgts, dims=dims, dims_return=dims_return, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method return bp.mpdf_approx(x, wgts, dims=dims, dims_return=dims_return, ngrid=ngrid, qmin=qmin, qmax=qmax, grid=grid, norm=norm)
################################################################## # Methods to draw samples ##################################################################
[docs] def draw_sample(self, photerr=None, nsample=1, filters=None): """ Returns a randomly-drawn sample of clusters for a given filter set Parameters: photerr : arraylike, shape (nphot) photometric errors to apply to the output photometry; these are added in quadrature with the kernel density estimation bandwidth nsample : int number of random samples to draw for each set of physical properties; must be positive filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: samples : array, shape (nsample, nphys+nphot) random sample drawn from the kernel density object; for the final dimension in the output, the first nphys elements are the physical quantities, the next nphot are the photometric quantities """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ draw_sample(photerr=photerr, nsample=nsample) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method return bp.draw_sample(photerr=photerr, nsample=nsample)
[docs] def draw_phot(self, physprop, physidx=None, photerr=None, nsample=1, filters=None): """ Returns a randomly-drawn sample of clusters for a given filter set Parameters: physprop : arraylike physical properties to be used; the final dimension of the input must have len(physidx) indices, or nphys indicates if physidx is None; if the input is a multidimensional array, the operation is vectorized over the leading dimensions physical properties physidx : arraylike indices of the physical quantities being constrained; if left as None, all physical properties are set, and physprop must have a trailing dimension of size equal to nphys; otherwise this must be an arraylike of <= nphys positive integers, each unique and in the range [0, nphys), specying which physical dimensions are constrained photerr : arraylike, shape (nphot) photometric errors to apply to the output photometry; these are added in quadrature with the kernel density estimation bandwidth nsample : int number of random samples to draw for each set of physical properties; must be positive filters : listlike of strings list of photometric filters to use; if left as None, and only 1 set of photometric filters has been defined for the cluster_slug object, that set will be used by default Returns: samples : array, shape (nsample, nphys+nphot) random sample drawn from the kernel density object; for the final dimension in the output, the first nphys elements are the physical quantities, the next nphot are the photometric quantities """ # Were we given a set of filters? if filters is None: # No filters given; if we have only a single filter set # stored, just use it if len(self.__filtersets) == 1: return self.__filtersets[0]['bp']. \ draw_phot(physprop, physidx=physidx, photerr=photerr, nsample=nsample) else: raise ValueError("must specify a filter set") else: # We were given a filter set; add it if it doesn't exist self.add_filters(filters) # Find the bp object we should use for f in self.__filtersets: if f['filters'] == filters: bp = f['bp'] break # Call the method return bp.draw_phot(physprop, physidx=physidx, photerr=photerr, nsample=nsample)