Source code for stingray.powerspectrum

from __future__ import division
import numpy as np
import scipy
import scipy.stats
import scipy.fftpack
import scipy.optimize
import logging

import stingray.lightcurve as lightcurve
import stingray.utils as utils
from stingray.utils import simon
from stingray.crossspectrum import Crossspectrum, AveragedCrossspectrum


__all__ = ["Powerspectrum", "AveragedPowerspectrum"]


def classical_pvalue(power, nspec):
    """
    Compute the probability of detecting the current power under
    the assumption that there is no periodic oscillation in the data.

    This computes the single-trial p-value that the power was
    observed under the null hypothesis that there is no signal in
    the data.

    Important: the underlying assumptions that make this calculation valid
    are:
    (1) the powers in the power spectrum follow a chi-square distribution
    (2) the power spectrum is normalized according to Leahy (1984), such
    that the powers have a mean of 2 and a variance of 4
    (3) there is only white noise in the light curve. That is, there is no
    aperiodic variability that would change the overall shape of the power
    spectrum.

    Also note that the p-value is for a *single trial*, i.e. the power
    currently being tested. If more than one power or more than one power
    spectrum are being tested, the resulting p-value must be corrected for the
    number of trials (Bonferroni correction).

    Mathematical formulation in Groth, 1975.
    Original implementation in IDL by Anna L. Watts.

    Parameters
    ----------
    power :  float
        The squared Fourier amplitude of a spectrum to be evaluated

    nspec : int
        The number of spectra or frequency bins averaged in `power`.
        This matters because averaging spectra or frequency bins increases
        the signal-to-noise ratio, i.e. makes the statistical distributions
        of the noise narrower, such that a smaller power might be very
        significant in averaged spectra even though it would not be in a single
        power spectrum.

    Returns
    -------
    pval : float
        The classical p-value of the observed power being consistent with
        the null hypothesis of white noise

    """

    assert np.isfinite(power), "power must be a finite floating point number!"
    assert power > 0, "power must be a positive real number!"
    assert np.isfinite(nspec), "nspec must be a finite integer number"
    assert nspec >= 1, "nspec must be larger or equal to 1"
    assert np.isclose(nspec % 1, 0), "nspec must be an integer number!"

    # If the power is really big, it's safe to say it's significant,
    # and the p-value will be nearly zero
    if (power*nspec) > 30000:
        simon("Probability of no signal too miniscule to calculate.")
        return 0.0

    else:
        pval = _pavnosigfun(power, nspec)
        return pval


def _pavnosigfun(power, nspec):
    """
    Helper function doing the actual calculation of the p-value.
    """
    sum = 0.0
    m = nspec - 1

    pn = power * nspec

    while m >= 0:

        s = 0.0
        for i in range(int(m)-1):
            s += np.log(float(m-i))

        logterm = m*np.log(pn/2) - pn/2 - s
        term = np.exp(logterm)
        ratio = sum / term

        if ratio > 1.0e15:
            return sum

        sum += term
        m -= 1

    return sum


[docs]class Powerspectrum(Crossspectrum):

    def __init__(self, lc=None, norm='frac'):
        """
        Make a Periodogram (power spectrum) from a (binned) light curve.
        Periodograms can be Leahy normalized or fractional rms normalized.
        You can also make an empty Periodogram object to populate with your
        own fourier-transformed data (this can sometimes be useful when making
        binned periodograms).

        Parameters
        ----------
        lc: lightcurve.Lightcurve object, optional, default None
            The light curve data to be Fourier-transformed.

        norm: {"leahy" | "rms"}, optional, default "rms"
            The normaliation of the periodogram to be used. Options are
            "leahy" or "rms", default is "rms".


        Attributes
        ----------
        norm: {"leahy" | "rms"}
            the normalization of the periodogram

        freq: numpy.ndarray
            The array of mid-bin frequencies that the Fourier transform samples

        power: numpy.ndarray
            The array of normalized squared absolute values of Fourier
            amplitudes

        df: float
            The frequency resolution

        m: int
            The number of averaged powers in each bin

        n: int
            The number of data points in the light curve

        nphots: float
            The total number of photons in the light curve

        """
        Crossspectrum.__init__(self, lc1=lc, lc2=lc, norm=norm)
        self.nphots = self.nphots1

[docs]    def rebin(self, df, method="mean"):
        bin_ps = Crossspectrum.rebin(self, df=df, method=method)
        bin_ps.nphots = bin_ps.nphots1

        return bin_ps

[docs]    def compute_rms(self, min_freq, max_freq):
        """
        Compute the fractional rms amplitude in the periodgram
        between two frequencies.

        Parameters
        ----------
        min_freq: float
            The lower frequency bound for the calculation

        max_freq: float
            The upper frequency bound for the calculation

        Returns
        -------
        rms: float
            The fractional rms amplitude contained between min_freq and
            max_freq

        """
        # assert min_freq >= self.freq[0], "Lower frequency bound must be " \
        #                                 "larger or equal the minimum " \
        #                                 "frequency in the periodogram!"

        # assert max_freq <= self.freq[-1], "Upper frequency bound must be " \
        #                                 "smaller or equal the maximum " \
        #                                 "frequency in the periodogram!"

        minind = self.freq.searchsorted(min_freq)
        maxind = self.freq.searchsorted(max_freq)
        powers = self.power[minind:maxind]
        if self.norm.lower() == 'leahy':
            rms = np.sqrt(np.sum(powers)/self.nphots)

        elif self.norm.lower() == "frac":
            rms = np.sqrt(np.sum(powers*self.df))
        else:
            raise Exception("Normalization not recognized!")

        rms_err = self._rms_error(powers)

        return rms, rms_err

    def _rms_error(self, powers):
        """
        Compute the error on the fractional rms amplitude using error
        propagation.
        Note: this uses the actual measured powers, which is not
        strictly correct. We should be using the underlying power spectrum,
        but in the absence of an estimate of that, this will have to do.

        Parameters
        ----------
        powers: iterable
            The list of powers used to compute the fractional rms amplitude.

        Returns
        -------
        delta_rms: float
            the error on the fractional rms amplitude
        """
        p_err = scipy.stats.chi2(2.0*self.m).var() * powers / self.m
        drms_dp = 1 / (2*np.sqrt(np.sum(powers)*self.df))
        delta_rms = np.sum(p_err*drms_dp*self.df)
        return delta_rms

[docs]    def classical_significances(self, threshold=1, trial_correction=False):
        """
        Compute the classical significances for the powers in the power
        spectrum, assuming an underlying noise distribution that follows a
        chi-square distributions with 2M degrees of freedom, where M is the
        number of powers averaged in each bin.

        Note that this function will *only* produce correct results when the
        following underlying assumptions are fulfilled:
        (1) The power spectrum is Leahy-normalized
        (2) There is no source of variability in the data other than the
        periodic signal to be determined with this method. This is important!
        If there are other sources of (aperiodic) variability in the data, this
        method will *not* produce correct results, but instead produce a large
        number of spurious false positive detections!
        (3) There are no significant instrumental effects changing the
        statistical distribution of the powers (e.g. pile-up or dead time)

        By default, the method produces (index,p-values) for all powers in
        the power spectrum, where index is the numerical index of the power in
        question. If a `threshold` is set, then only powers with p-values
        *below* that threshold with their respective indices. If
        `trial_correction` is set to True, then the threshold will be corrected
        for the number of trials (frequencies) in the power spectrum before
        being used.

        Parameters
        ----------
        threshold : float
            The threshold to be used when reporting p-values of potentially
            significant powers. Must be between 0 and 1.
            Default is 1 (all p-values will be reported).

        trial_correction : bool
            A Boolean flag that sets whether the `threshold` will be correted
            by the number of frequencies before being applied. This decreases
            the threshold (p-values need to be lower to count as significant).
            Default is False (report all powers) though for any application
            where `threshold` is set to something meaningful, this should also
            be applied!

        Returns
        -------
        pvals : iterable
            A list of (index, p-value) tuples for all powers that have p-values
            lower than the threshold specified in `threshold`.

        """
        assert self.norm == "leahy", "This method only works on " \
                                     "Leahy-normalized power spectra!"

        # calculate p-values for all powers
        # leave out zeroth power since it just encodes the number of photons!
        pv = np.array([classical_pvalue(power, self.m)
                      for power in self.power])

        # if trial correction is used, then correct the threshold for
        # the number of powers in the power spectrum
        if trial_correction:
            threshold /= self.power.shape[0]

        # need to add 1 to the indices to make up for the fact that
        # we left out the first power above!
        indices = np.where(pv < threshold)[0]

        pvals = np.vstack([pv[indices], indices])

        return pvals


[docs]class AveragedPowerspectrum(AveragedCrossspectrum, Powerspectrum):

    def __init__(self, lc, segment_size, norm="frac"):
        """
        Make an averaged periodogram from a light curve by segmenting the light
        curve, Fourier-transforming each segment and then averaging the
        resulting periodograms.

        Parameters
        ----------
        lc: lightcurve.Lightcurve object OR
            iterable of lightcurve.Lightcurve objects
            The light curve data to be Fourier-transformed.

        segment_size: float
            The size of each segment to average. Note that if the total
            duration of each Lightcurve object in lc is not an integer multiple
            of the segment_size, then any fraction left-over at the end of the
            time series will be lost.

        norm: {"leahy" | "rms"}, optional, default "rms"
            The normaliation of the periodogram to be used. Options are
            "leahy" or "rms", default is "rms".


        Attributes
        ----------
        norm: {"leahy" | "rms"}
            the normalization of the periodogram

        freq: numpy.ndarray
            The array of mid-bin frequencies that the Fourier transform samples

        power: numpy.ndarray
            The array of normalized squared absolute values of Fourier
            amplitudes

        df: float
            The frequency resolution

        m: int
            The number of averaged periodograms

        n: int
            The number of data points in the light curve

        nphots: float
            The total number of photons in the light curve


        """

        self.type = "powerspectrum"


        assert isinstance(norm, str), "norm is not a string!"

        assert norm.lower() in ["frac", "abs", "leahy", "none"], \
                "norm must be 'frac', 'abs', 'leahy', or 'none'!"

        self.norm = norm.lower()

        assert np.isfinite(segment_size), "segment_size must be finite!"

        self.segment_size = segment_size

        Powerspectrum.__init__(self, lc, norm)

        return

    def _make_segment_spectrum(self, lc, segment_size):

        assert isinstance(lc, lightcurve.Lightcurve)

        # number of bins per segment
        nbins = int(segment_size/lc.dt)

        start_ind = 0
        end_ind = nbins

        power_all = []
        nphots_all = []
        while end_ind <= lc.counts.shape[0]:
            time = lc.time[start_ind:end_ind]
            counts = lc.counts[start_ind:end_ind]
            lc_seg = lightcurve.Lightcurve(time, counts)
            power_seg = Powerspectrum(lc_seg, norm=self.norm)
            power_all.append(power_seg)
            nphots_all.append(np.sum(lc_seg.counts))
            start_ind += nbins
            end_ind += nbins

        return power_all, nphots_all
Navigation

Source code for stingray.powerspectrum