"""
This defines a class that can be used to estimate the PDF of true star
formation rate from a set of input point mass estimates of the star
formation rate.
"""
import numpy as np
import copy
import os
import os.path as osp
import warnings
import errno
try:
# Python 3
from urllib.request import urlopen
except ImportError:
# Python 2
from urllib2 import urlopen
# Import the data reading and Bayesian inference stuff we need
from ..bayesphot import bp
from ..read_integrated_prop import read_integrated_prop
from ..read_integrated_phot import read_integrated_phot
# Functions for pre-defined prior probability distributions on the SFR
def dndlogsfr_flat(logsfr):
return 1.0+logsfr*0.0
def dndlogsfr_schechter(logsfr):
sfr = np.exp(logsfr)
return sfr**(-0.51) * np.exp(-sfr/9.2)
# Main sfr_slug class
[docs]class sfr_slug(object):
"""
A class that can be used to estimate the PDF of true star
formation rate from a set of input point mass estimates of the
star formation rate.
Properties
priors : array, shape (N) | callable | 'flat' | 'schechter' | None
prior probability on each data point; interpretation
depends on the type passed; array, shape (N): values are
interpreted as the prior probability of each data point;
callable: the callable must take as an argument an array
of shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point; None:
all data points have equal prior probability; the values
'flat' and 'schechter' use priors p(log SFR) ~ constant and
p(log SFR) ~ SFR^alpha exp(-SFR/SFR_*), respectively, where
alpha = -0.51 and SFR_* = 9.2 Msun/yr are the values
measured by Bothwell et al. (2011)
bandwidth : 'auto' | array, shape (M)
bandwidth for kernel density estimation; if set to
'auto', the bandwidth will be estimated automatically
"""
##################################################################
# Initializer method
##################################################################
[docs] def __init__(self, libname=None, detname=None, filters=None,
bandwidth=0.1, ktype='gaussian', priors=None,
sample_density='read', reltol=1.0e-3,
abstol=1.0e-10, leafsize=16):
"""
Initialize an sfr_slug object.
Parameters
libname : string
name of the SLUG model to load; if left as None, the default
is $SLUG_DIR/sfr_slug/SFR_SLUG
detname : string
name of a SLUG model run with the same parameters but no
stochasticity; used to establish the non-stochastic
photometry to SFR conversions; if left as None, the default
is libname_DET
filters : iterable of stringlike
list of filter names to be used for inferenence
bandwidth : 'auto' | float | array, shape (M)
bandwidth for kernel density estimation; if set to
'auto', the bandwidth will be estimated automatically;
if set to a float, the same bandwidth is used in all
dimensions
ktype : string
type of kernel to be used in densty estimation; allowed
values are 'gaussian' (default), 'epanechnikov', and
'tophat'; only Gaussian can be used with error bars
priors : array, shape (N) | callable | None
prior probability on each data point; interpretation
depends on the type passed; array, shape (N): values are
interpreted as the prior probability of each data point;
callable: the callable must take as an argument an array
of shape (N, nphys), and return an array of shape (N)
giving the prior probability at each data point; None:
all data points have equal prior probability
sample_density : array, shape (N) | callable | 'auto' | 'read' | None
the density of the data samples at each data point; this
need not match the prior density; interpretation depends
on the type passed; array, shape (N): values are
interpreted as the density of data sampling at each
sample point; callable: the callable must take as an
argument an array of shape (N, nphys), and return an
array of shape (N) giving the sampling density at each
point; 'auto': the sample density will be computed
directly from the data set; note that this can be quite
slow for large data sets, so it is preferable to specify
this analytically if it is known; 'read': the sample
density is to be read from a numpy save file whose name
matches that of the library, with the extension _density.npy
added; None: data are assumed to be uniformly sampled
reltol : float
relative error tolerance; errors on all returned
probabilities p will satisfy either
abs(p_est - p_true) <= reltol * p_est OR
abs(p_est - p_true) <= abstol,
where p_est is the returned estimate and p_true is the
true value
abstol : float
absolute error tolerance; see above
leafsize : int
number of data points in each leaf of the KD tree
Returns
Nothing
Raises
IOError, if the library cannot be found
"""
# If using the default library, assign the library name
if libname is None:
self.__libname = osp.join('sfr_slug', 'SFR_SLUG')
if 'SLUG_DIR' in os.environ:
self.__libname = osp.join(os.environ['SLUG_DIR'],
self.__libname)
else:
self.__libname = libname
# Load the data
try:
prop = read_integrated_prop(self.__libname)
phot = read_integrated_phot(self.__libname)
except IOError:
# If we're here, we failed to load the library. If we were
# given a library file name explicitly, just raise an
# error.
if libname is not None:
raise IOError(errno.ENOENT,
"unable to open library {}".
format(self.__libname))
# If we've made it to here, we were asked to open the
# default library but failed to do so. Check if the
# failure could be because we don't have astropy and thus
# can't open fits files. If that's the cause, print out a
# helpful error message.
try:
import astropy.io.fits as fits
except ImportError:
raise IOError(errno.EIO,
"failed to read default sfr_slug " +
"library sfr_slug/SFR_SLUG due to missing " +
"astropy.io.fits; install astropy " +
"or specify a library in a non-FITS " +
"format")
# If we're here, we couldn't open the default library, and
# it's not because we don't have FITS capability. The file
# must not exist, or must be damaged. Check if we're in
# interactive mode. If not, just raise an error and
# suggest the user to go get the library file.
errstr = "Unable to open default sfr_slug " + \
"library file sfr_slug/SFR_SLUG."
import __main__ as main
if hasattr(main, '__file__'):
# We're not interactive; just raise an error
raise IOError(errno.EIO,
errstr + " " +
"Try downloading it from " +
"https://sites.google.com/site/runslug/data")
# If we're here, we don't have hte library file, but we
# are in interactive mode. Thus offer the user an option
# to go download the file now.
usr_response \
= raw_input(errstr + " Would you like to download it "
"now (total size 160 MB)? [y/n] ").\
lower().strip()
if not usr_response in ['yes', 'y', 'ye']:
# User didn't say yes, so raise error
raise IOError(errno.EIO, "Unable to proceeed")
# If we're here, download the files
print("Fetching SFR_SLUG_integrated_prop.fits " +
"(this may take a while)...")
url = urlopen(
'https://dl.dropboxusercontent.com/s/7la1b5h986rdz29/SFR_SLUG_integrated_prop.fits')
rawdata = url.read()
url.close()
fp = open(osp.join(osp.dirname(self.__libname),
'SFR_SLUG_integrated_prop.fits'), 'wb')
fp.write(rawdata)
fp.close()
print("Fetching SFR_SLUG_integrated_phot.fits " +
"(this make take a while)...")
url = urlopen(
'https://dl.dropboxusercontent.com/s/ra8qf5raqutcf50/SFR_SLUG_integrated_phot.fits')
rawdata = url.read()
url.close()
fp = open(osp.join(osp.dirname(self.__libname),
'SFR_SLUG_integrated_phot.fits'), 'wb')
fp.write(rawdata)
fp.close()
# Now try reading the data
try:
prop = read_integrated_prop(self.__libname)
phot = read_integrated_phot(self.__libname)
except IOError:
raise IOError(errno.EIO,
"still unable to open default library")
# Load the determinstic run
if detname is None:
self.__detname = self.__libname + '_DET'
else:
self.__detname = detname
propdet = read_integrated_prop(self.__detname)
photdet = read_integrated_phot(self.__detname)
# If we have been told to read the sample density, do so
try:
if sample_density == 'read':
self.__sample_density = np.load(self.__libname +
'_density.npy')
except IOError:
warnings.warn("unable to load requested sample "+
"density file " +
self.__libname + "_density.npy; setting " +
"sample_density = 'auto' instead")
sample_density = 'auto'
# Store filters
self.__allfilters = phot.filter_names
# Get conversions from photometry to SFR for the deterministic run
sfrdet = propdet.target_mass[0,0]/propdet.time[0]
self.__conversions = photdet.phot[:,0,0] / sfrdet
# Deduce SFR from target mass and time
logsfr = np.log10(
np.transpose(np.transpose(prop.target_mass)/prop.time[0]))
# Convert the photometry from the stochastic runs to SFRs
# estimated using the point mass approximation
sfrphot = np.transpose(np.transpose(phot.phot) /
self.__conversions)
sfrphotclip = np.clip(sfrphot, 1e-100, np.inf)
logsfrphot = np.log10(sfrphotclip)
# Build array of log SFR, log SFR_phot
self.__ds = np.zeros((logsfrphot.shape[2],
logsfrphot.shape[0]*
logsfrphot.shape[1]+1))
self.__ds[:,0] = logsfr
self.__ds[:,1:] = np.transpose(logsfrphot.reshape(
(logsfrphot.shape[0]*logsfrphot.shape[1],
logsfrphot.shape[2])))
# Record other stuff that we'll use to construct bp objects
# later
if type(bandwidth) is not np.ndarray:
bandwidth = np.array([bandwidth]*(1+len(phot.filter_names)))
self.__bandwidth = bandwidth
self.__ktype = ktype
self.__reltol = reltol
self.__abstol = abstol
if sample_density != 'read':
self.__sample_density = sample_density
# Initialize empty dict containing filter sets
self.__filtersets = []
# Set priors
self.priors = priors
# If we have been given a filter list, create the data set to
# go with it
if filters is not None:
self.add_filters(filters)
##################################################################
# Method to return list of available filters
##################################################################
[docs] def filters(self):
"""
Returns list of all available filters
Parameters:
None
Returns:
filters : list of strings
list of available filter names
"""
return copy.deepcopy(self.__allfilters)
##################################################################
# Method to prepare to analyze a particular set of filters
##################################################################
[docs] def add_filters(self, filters):
"""
Add a set of filters to use for cluster property estimation
Parameters
filters : iterable of stringlike
list of filter names to be used for inferenence
Returns
nothing
"""
# Handle the case where we're given just a string for one filter
if type(filters) is str:
filters = [filters]
# If we already have this filter set in our dict, do nothing
for f in self.__filtersets:
if filters == f['filters']:
return
# We're adding a new filter set, so save its name
newfilter = { 'filters' : filters}
# Construct data set to use with this filter combination, and
# fill it in
newfilter['dataset'] = np.zeros((self.__ds.shape[0],
1+len(filters)))
newfilter['dataset'][:,:1] = self.__ds[:,:1]
newfilter['idx'] = np.zeros(1+len(filters), dtype=int)
newfilter['idx'][0] = 0
for i, f in enumerate(filters):
if f not in self.__allfilters:
raise ValueError("unknown filter "+str(f))
idx = self.__allfilters.index(f)
newfilter['dataset'][:,1+i] = self.__ds[:,1+idx]
newfilter['idx'][i+1] = 1+idx
# Build a bp object to go with this data set
newfilter['bp'] = bp(newfilter['dataset'], 1,
filters=filters,
bandwidth = self.__bandwidth,
ktype = self.__ktype,
priors = self.__priors,
sample_density = self.__sample_density,
reltol = self.__reltol,
abstol = self.__abstol)
# Save to the master filter list
self.__filtersets.append(newfilter)
##################################################################
# Define the priors property. This just wraps around the
# corresponding property defined for bp objects.
##################################################################
@property
def priors(self):
return self.__priors
@priors.setter
def priors(self, pr):
if pr == 'flat':
self.priors = dndlogsfr_flat
elif pr == 'schechter':
self.priors = dndlogsfr_schechter
else:
self.__priors = pr
for f in self.__filtersets:
f['bp'].priors = self.__priors
##################################################################
# Define the bandwidth property. This just wraps around the
# corresponding property defined for bp objects.
##################################################################
@property
def bandwidth(self):
return self.__bandwidth
@bandwidth.setter
def bandwidth(self, bandwidth):
self.__bandwidth = bandwidth
for f in self.__filtersets:
f['bp'].bandwidth = np.array(bandwidth)[f['idx']]
##################################################################
# Wrappers around the bp logL, mpdf, and mcmc functions
##################################################################
[docs] def logL(self, logSFR, logSFRphot, logSFRphoterr=None,
filters=None):
"""
This function returns the natural log of the likelihood
function evaluated at a particular log SFR and set of log
luminosities
Parameters:
logSFR : float or arraylike
float or array giving values of the log SFR; for an
array, the operation is vectorized
logSFRphot : float or arraylike, shape (nfilter) or (..., nfilter)
float or array giving the SFR inferred from photometry using a
deterministic conversion; for an array, the operation is
vectorized over the leading dimensions
logSFRphoterr : float arraylike, shape (nfilter) or (..., nfilter)
float or array giving photometric SFR errors; for a
multidimensional array, the operation is vectorized over
the leading dimensions
filters : listlike of strings
list of photometric filters used for the SFR estimation;
if left as None, and only 1 set of photometric filters
has been defined for the sfr_slug object, that set will
be used by default
Returns:
logL : float or arraylike
natural log of the likelihood function
"""
# If we were given a floats, make them an arrays of shape 1 for
# the purposes of communicating with c
if not type(logSFR) is np.ndarray:
physprop = np.array(logSFR)
if physprop.ndim == 0:
physprop = np.reshape(physprop, (1,))
else:
physprop = logSFR
if not type(logSFRphot) is np.ndarray:
photprop = np.array(logSFRphot)
if photprop.ndim == 0:
photprop = np.reshape(photprop, (1,))
else:
photprop = logSFRphot
if logSFRphoterr is not None and \
type(logSFRphoterr) is not np.ndarray:
photerr = np.array(logSFRphoterr)
if photerr.ndim == 0:
photerr = np.reshape(photerr, (1,))
else:
photerr = logSFRphoterr
# Were we given a set of filters?
if filters is None:
# No filters given; if we have only a single filter set
# stored, just use it
if len(self.__filtersets) == 1:
return self.__filtersets[0]['bp']. \
logL(physprop, photprop, photerr)
else:
raise ValueError("must specify a filter set")
else:
# We were given a filter set; add it if it doesn't exist
self.add_filters(filters)
# Handle the case where we're given just a string for one filter
if type(filters) is str:
filters = [filters]
# Find the bp object we should use
for f in self.__filtersets:
if f['filters'] == filters:
bp = f['bp']
break
# Call the logL method
return bp.logL(physprop, photprop, photerr)
[docs] def mpdf(self, logSFRphot, logSFRphoterr=None, ngrid=128,
qmin=None, qmax=None, grid=None, norm=True,
filters=None):
"""
Returns the marginal probability of log SFR for one or more
input sets of photometric properties. Output quantities are
computed on a grid of values, in the same style as meshgrid
Parameters:
logSFRphot : float or arraylike
float or array giving the log SFR inferred from
photometry using a deterministic conversion; if the
argument is an array, the operation is vectorized over
it
logSFRphoterr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors; for a multidimensional
array, the operation is vectorized over the leading
dimensions
ngrid : int
number of points in the output log SFR grid
qmin : float
minimum value in the output log SFR grid
qmax : float
maximum value in the output log SFR grid
grid : array
set of values defining the grid of SFR values at which
to evaluate; if set, overrides ngrid, qmin, and qmax
norm : bool
if True, returned pdf's will be normalized to integrate
to 1
filters : listlike of strings
list of photometric filters to use; if left as None, and
only 1 set of photometric filters has been defined for
the cluster_slug object, that set will be used by
default
Returns:
grid_out : array
array of log SFR values at which the PDF is evaluated
pdf : array
array of marginal posterior probabilities at each point
of the output grid, for each input photometric value;
the leading dimensions match the leading dimensions
produced by broadcasting the leading dimensions of
photprop and photerr together, while the trailing
dimensions match the dimensions of the output grid
"""
# If we were given a floats, make them an arrays of shape 1 for
# the purposes of communicating with c
if not type(logSFRphot) is np.ndarray:
photprop = np.array(logSFRphot)
if photprop.ndim == 0:
photprop = np.reshape(photprop, (1,))
else:
photprop = logSFRphot
if logSFRphoterr is not None and \
type(logSFRphoterr) is not np.ndarray:
photerr = np.array(logSFRphoterr)
if photerr.ndim == 0:
photerr = np.reshape(photerr, (1,))
else:
photerr = logSFRphoterr
# Were we given a set of filters?
if filters is None:
# No filters given; if we have only a single filter set
# stored, just use it
if len(self.__filtersets) == 1:
return self.__filtersets[0]['bp']. \
mpdf(0, photprop, photerr, ngrid,
qmin, qmax, grid, norm)
else:
raise ValueError("must specify a filter set")
else:
# We were given a filter set; add it if it doesn't exist
self.add_filters(filters)
# Handle the case where we're given just a string for one filter
if type(filters) is str:
filters = [filters]
# Find the bp object we should use
for f in self.__filtersets:
if f['filters'] == filters:
bp = f['bp']
break
# Call the logL method
return bp.mpdf(0, photprop, photerr, ngrid,
qmin, qmax, grid, norm)
[docs] def mcmc(self, photprop, photerr=None, mc_walkers=100,
mc_steps=500, mc_burn_in=50, filters=None):
"""
This function returns a sample of MCMC walkers for log SFR
Parameters:
photprop : arraylike, shape (nfilter) or (..., nfilter)
array giving the photometric values; for a
multidimensional array, the operation is vectorized over
the leading dimensions
photerr : arraylike, shape (nfilter) or (..., nfilter)
array giving photometric errors; for a multidimensional
array, the operation is vectorized over the leading
dimensions
mc_walkers : int
number of walkers to use in the MCMC
mc_steps : int
number of steps in the MCMC
mc_burn_in : int
number of steps to consider "burn-in" and discard
filters : listlike of strings
list of photometric filters to use; if left as None, and
only 1 set of photometric filters has been defined for
the cluster_slug object, that set will be used by
default
Returns
samples : array
array of sample points returned by the MCMC
"""
# If we were given a floats, make them an arrays of shape 1 for
# the purposes of communicating with c
if not type(logSFRphot) is np.ndarray:
photprop = np.array([logSFRphot])
if photprop.ndim == 0:
photprop = np.reshape(photprop, (1,))
else:
photprop = logSFRphot
if logSFRphoterr is not None and \
type(logSFRphoterr) is not np.ndarray:
photerr = np.array([logSFRphoterr])
if photerr.ndim == 0:
photerr = np.reshape(photerr, (1,))
else:
photerr = logSFRphoterr
# Were we given a set of filters?
if filters is None:
# No filters given; if we have only a single filter set
# stored, just use it
if len(self.__filtersets) == 1:
return self.__filtersets[0]['bp']. \
mcmc(photprop, photerr, mc_walkers, mc_steps,
mc_burn_in)
else:
raise ValueError("must specify a filter set")
else:
# We were given a filter set; add it if it doesn't exist
self.add_filters(filters)
# Find the bp object we should use
for f in self.__filtersets:
if f['filters'] == filters:
bp = f['bp']
break
# Handle the case where we're given just a string for one filter
if type(filters) is str:
filters = [filters]
# Call the logL method
return bp.mcmc(photprop, photerr, mc_walkers, mc_steps,
mc_burn_in)