Source code for slugpy.slug_pdf

"""
This defines a class that can be used to parse slug PDF files and draw
random values from them. This code is thread-safe, in the sense that
if multiple slug_pdf instances are instantiated in different threads,
the random streams they generate will not be identical.
"""

# Special check for readthedocs
import os
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
if not on_rtd:
    from numpy.random import RandomState
else:
    def RandomState():
        return None

import numpy as np
import errno
from .slug_pdf_delta import slug_pdf_delta
from .slug_pdf_exponential import slug_pdf_exponential
from .slug_pdf_powerlaw import slug_pdf_powerlaw
from .slug_pdf_lognormal import slug_pdf_lognormal
from .slug_pdf_powerlaw import slug_pdf_powerlaw
from .slug_pdf_normal import slug_pdf_normal


[docs]
class slug_pdf(object):
    """
    A class that implements the SLUG PDF drawing method. This class
    contains a method to parse slug-formatted PDF files and then draw
    values from the PDFs they specify. This class is thread-safe, in
    the sense that if multiple slug_pdf instances are instantiated in
    different threads, the random streams they generate will not be
    identical.
    """

    ##################################################################
    # Initialization method
    ##################################################################
    def __init__(self, pdffile=None):
        self.segments = []
        self.wgts = np.array(0)
        # Use /dev/urandom to initialize, so should be thread-safe
        self._rnd = RandomState()
        if pdffile is not None:
            self.readfile(pdffile)

    ##################################################################
    # Method to parse PDF files
    ##################################################################
    def readfile(self, pdffile):

        # Open the file
        fp = open(pdffile, 'r')

        # Read until we see breakpoints or advanced
        for line in fp:

            # Strip trailing comments
            while line.rfind('#') != -1:
                line = line[:line.rfind('#')]

            # Strip leading and trailing whitespace
            line = line.strip()

            # Skip blank lines
            if len(line) == 0:
                continue

            # Is this line advanced or breakpoints? If not, raise
            # error.
            if line.split()[0] == 'breakpoints':
                basic = True
            elif line.strip() == 'advanced':
                basic = False
            else:
                raise IOError(errno.EIO, "slug_pdf: expected 'breakpoints' "+
                              "or 'advanced', found '"+line+"'")

            # If we are in basic mode, extract the breakpoints
            if basic:
                if len(line.split()) < 3:
                    raise IOError(errno.EIO, 
                        "slug_pdf: need at least 2 breakpoints")
                self.bkpts = np.array(line.split()[1:], dtype='d')

            # Done reading header
            break

        # Make sure we haven't reached EOF with setting a mode
        if basic is None:
            raise IOError(errno.EIO, "slug_pdf: didn't find 'breakpoints' "+
                          "or 'advanced' in file "+pdffile)
        
        # Now parse the rest of the file
        if basic:
            self.parse_basic(fp)
        else:
            self.parse_advanced(fp)

        # Close file
        fp.close()

    ##################################################################
    # Method to parse basic PDF files
    ##################################################################
    def parse_basic(self, fp):

        # Flag if we're in a segment
        in_segment = False

        # Initialize segment list
        self.segments = []
        
        # Loop through file
        for line in fp:

            # Strip trailing comments
            while line.rfind('#') != -1:
                line = line[:line.rfind('#')]

            # Strip leading and trailing whitespace
            line = line.strip()

            # Skip blank lines
            if len(line) == 0:
                continue

            # Parse command
            if line == 'segment':

                # We're being asked to start a new segment
                if not in_segment:
                    in_segment = True
                else:
                    raise IOError(errno.EIO, "slug_pdf: expected 'type "+
                                  "TYPENAME', found"+
                                  " '"+line+"'")

            elif line.split()[0] == 'type':

                # We're being given a segment type; make sure we're in
                # a segment
                if not in_segment:
                    raise IOError(errno.EIO, "slug_pdf: expected 'segment',"+
                                  " found '"+line+"'")

                # Make sure we have a valid segment type
                if len(line.split()) != 2:
                    raise IOError(errno.EIO, "slug_pdf: expected 'type "+
                                  "TYPENAME', found"+
                                  " '"+line+"'")

                # Read segment type
                segtype = line.split()[1]

                # Grab breakpoints for this segment
                ptr = len(self.segments)
                if ptr > len(self.bkpts)-1:
                    raise IOError(errno.EIO, 
                        "slug_pdf: expected {:d}".len(self.bkpts-1)+
                        " segments, found too many")
                a = self.bkpts[ptr]
                b = self.bkpts[ptr+1]
                
                # Call parsing routine for this segment
                if segtype == 'delta':
                    if a != b:
                        raise IOError(errno.EIO, "slug_pdf: delta segments "+
                                      "must have a == b")
                    self.segments.append(slug_pdf_delta(a))
                elif segtype == 'exponential':
                    self.segments.append(
                        slug_pdf_exponential(a, b, fp=fp, rand=self._rnd))
                elif segtype == 'lognormal':
                    self.segments.append(
                        slug_pdf_lognormal(a, b, fp=fp, rand=self._rnd))
                elif segtype == 'powerlaw':
                    self.segments.append(
                        slug_pdf_powerlaw(a, b, fp=fp, rand=self._rnd))
                elif segtype == 'normal':
                    self.segments.append(
                        slug_pdf_normal(a, b, fp=fp, rand=self._rnd))
                elif segtype == 'schechter':
                    raise NotImplementedError(
                        "slug_pdf: schechter segments not yet "+
                        "implemented")

                # Reset the segment pointer
                in_segment = False

        # Make sure we got enough segments
        if len(self.segments) != len(self.bkpts)-1:
            raise IOError(errno.EIO, 
                "slug_pdf: expected {:d} segments, found {:d}".
                format(len(self.bkpts)-1, len(self.segments)))

        # Convert segments to array
        self.segments = np.array(self.segments)
        
        # Compute the relative weights required to make the segments
        # continuous
        self.wgts = np.zeros(len(self.segments))
        self.wgts[0] = 1.0
        for i in range(1, len(self.segments)):
            self.wgts[i] = self.wgts[i-1] * \
                           self.segments[i-1](self.segments[i-1].b) / \
                           self.segments[i](self.segments[i].a)

        # Set weights to give overall normalization of 1
        self.wgts = self.wgts / np.sum(self.wgts)

        # Store cumulative weights
        self.cumwgts = np.cumsum(self.wgts)

        
    ##################################################################
    # Method to parse advanced PDF files
    ##################################################################
    def parse_advance(self, fp):
        raise NotImplementedError( 
            "slug_pdf: advanced mode files not yet supported")

    ##################################################################
    # Methods to evaluate PDF and to draw from it
    ##################################################################
    def __call__(self, x):
        """
        Return the value of the PDF evaluated at x

        Parameters:
           x : float or array
              The value(s) at which to evaluate the PDF

        Returns:
           pdf : float or array
              The value of the PDF evaluated at x
        """
        if hasattr(x, '__iter__'):
            xarr = np.array(x)
            pdf = np.zeros(xarr.shape)
            for s, w in zip(self.segments, self.wgts):
                pdf += w*s(xarr)
        else:
            pdf = 0.0
            for s, w in zip(self.segments, self.wgts):
                pdf += w*s(x)
        return pdf


[docs]
    def draw(self, *d):
        """
        Draw from the PDF

        Parameters:
           d0, d1, ..., dn : int, optional
              Dimensions of the returned array; if left unspecified, a
              single python float is returned

        Returns:
           x : float or array
              One or more numbers drawn from the PDF
        """

        # Figure out which segments to draw from
        dev = self._rnd.rand(*d)
        seg = np.digitize(dev, self.cumwgts)

        # Draw
        if hasattr(dev, '__iter__'):
            samples = np.zeros(dev.shape)
            for i, s in enumerate(self.segments):
                idx = (i == seg)
                nidx = np.sum(idx)
                if nidx > 0:
                    samples[idx] = s.draw(nidx)
        else:
            samples = self.segments[seg].draw()

        # Return
        return samples
Source code for slugpy.slug_pdf

slug

Navigation

Related Topics