Source code for stingray.powerspectrum

from __future__ import division
import numpy as np
import scipy
import scipy.stats
import scipy.fftpack
import scipy.optimize
import logging

import stingray.lightcurve as lightcurve
import stingray.utils as utils
from stingray.utils import simon
from stingray.crossspectrum import Crossspectrum, AveragedCrossspectrum


__all__ = ["Powerspectrum", "AveragedPowerspectrum"]


def classical_pvalue(power, nspec):
    """
    Compute the probability of detecting the current power under
    the assumption that there is no periodic oscillation in the data.

    This computes the single-trial p-value that the power was
    observed under the null hypothesis that there is no signal in
    the data.

    Important: the underlying assumptions that make this calculation valid
    are:
    (1) the powers in the power spectrum follow a chi-square distribution
    (2) the power spectrum is normalized according to Leahy (1984), such
    that the powers have a mean of 2 and a variance of 4
    (3) there is only white noise in the light curve. That is, there is no
    aperiodic variability that would change the overall shape of the power
    spectrum.

    Also note that the p-value is for a *single trial*, i.e. the power
    currently being tested. If more than one power or more than one power
    spectrum are being tested, the resulting p-value must be corrected for the
    number of trials (Bonferroni correction).

    Mathematical formulation in Groth, 1975.
    Original implementation in IDL by Anna L. Watts.

    Parameters
    ----------
    power :  float
        The squared Fourier amplitude of a spectrum to be evaluated

    nspec : int
        The number of spectra or frequency bins averaged in `power`.
        This matters because averaging spectra or frequency bins increases
        the signal-to-noise ratio, i.e. makes the statistical distributions
        of the noise narrower, such that a smaller power might be very
        significant in averaged spectra even though it would not be in a single
        power spectrum.

    Returns
    -------
    pval : float
        The classical p-value of the observed power being consistent with
        the null hypothesis of white noise

    """

    assert np.isfinite(power), "power must be a finite floating point number!"
    assert power > 0, "power must be a positive real number!"
    assert np.isfinite(nspec), "nspec must be a finite integer number"
    assert nspec >= 1, "nspec must be larger or equal to 1"
    assert np.isclose(nspec % 1, 0), "nspec must be an integer number!"

    # If the power is really big, it's safe to say it's significant,
    # and the p-value will be nearly zero
    if (power*nspec) > 30000:
        simon("Probability of no signal too miniscule to calculate.")
        return 0.0

    else:
        pval = _pavnosigfun(power, nspec)
        return pval


def _pavnosigfun(power, nspec):
    """
    Helper function doing the actual calculation of the p-value.
    """
    sum = 0.0
    m = nspec - 1

    pn = power * nspec

    while m >= 0:

        s = 0.0
        for i in range(int(m)-1):
            s += np.log(float(m-i))

        logterm = m*np.log(pn/2) - pn/2 - s
        term = np.exp(logterm)
        ratio = sum / term

        if ratio > 1.0e15:
            return sum

        sum += term
        m -= 1

    return sum


[docs]class Powerspectrum(Crossspectrum): def __init__(self, lc=None, norm='frac'): """ Make a Periodogram (power spectrum) from a (binned) light curve. Periodograms can be Leahy normalized or fractional rms normalized. You can also make an empty Periodogram object to populate with your own fourier-transformed data (this can sometimes be useful when making binned periodograms). Parameters ---------- lc: lightcurve.Lightcurve object, optional, default None The light curve data to be Fourier-transformed. norm: {"leahy" | "rms"}, optional, default "rms" The normaliation of the periodogram to be used. Options are "leahy" or "rms", default is "rms". Attributes ---------- norm: {"leahy" | "rms"} the normalization of the periodogram freq: numpy.ndarray The array of mid-bin frequencies that the Fourier transform samples power: numpy.ndarray The array of normalized squared absolute values of Fourier amplitudes df: float The frequency resolution m: int The number of averaged powers in each bin n: int The number of data points in the light curve nphots: float The total number of photons in the light curve """ Crossspectrum.__init__(self, lc1=lc, lc2=lc, norm=norm) self.nphots = self.nphots1
[docs] def rebin(self, df, method="mean"): bin_ps = Crossspectrum.rebin(self, df=df, method=method) bin_ps.nphots = bin_ps.nphots1 return bin_ps
[docs] def compute_rms(self, min_freq, max_freq): """ Compute the fractional rms amplitude in the periodgram between two frequencies. Parameters ---------- min_freq: float The lower frequency bound for the calculation max_freq: float The upper frequency bound for the calculation Returns ------- rms: float The fractional rms amplitude contained between min_freq and max_freq """ # assert min_freq >= self.freq[0], "Lower frequency bound must be " \ # "larger or equal the minimum " \ # "frequency in the periodogram!" # assert max_freq <= self.freq[-1], "Upper frequency bound must be " \ # "smaller or equal the maximum " \ # "frequency in the periodogram!" minind = self.freq.searchsorted(min_freq) maxind = self.freq.searchsorted(max_freq) powers = self.power[minind:maxind] if self.norm.lower() == 'leahy': rms = np.sqrt(np.sum(powers)/self.nphots) elif self.norm.lower() == "frac": rms = np.sqrt(np.sum(powers*self.df)) else: raise Exception("Normalization not recognized!") rms_err = self._rms_error(powers) return rms, rms_err
def _rms_error(self, powers): """ Compute the error on the fractional rms amplitude using error propagation. Note: this uses the actual measured powers, which is not strictly correct. We should be using the underlying power spectrum, but in the absence of an estimate of that, this will have to do. Parameters ---------- powers: iterable The list of powers used to compute the fractional rms amplitude. Returns ------- delta_rms: float the error on the fractional rms amplitude """ p_err = scipy.stats.chi2(2.0*self.m).var() * powers / self.m drms_dp = 1 / (2*np.sqrt(np.sum(powers)*self.df)) delta_rms = np.sum(p_err*drms_dp*self.df) return delta_rms
[docs] def classical_significances(self, threshold=1, trial_correction=False): """ Compute the classical significances for the powers in the power spectrum, assuming an underlying noise distribution that follows a chi-square distributions with 2M degrees of freedom, where M is the number of powers averaged in each bin. Note that this function will *only* produce correct results when the following underlying assumptions are fulfilled: (1) The power spectrum is Leahy-normalized (2) There is no source of variability in the data other than the periodic signal to be determined with this method. This is important! If there are other sources of (aperiodic) variability in the data, this method will *not* produce correct results, but instead produce a large number of spurious false positive detections! (3) There are no significant instrumental effects changing the statistical distribution of the powers (e.g. pile-up or dead time) By default, the method produces (index,p-values) for all powers in the power spectrum, where index is the numerical index of the power in question. If a `threshold` is set, then only powers with p-values *below* that threshold with their respective indices. If `trial_correction` is set to True, then the threshold will be corrected for the number of trials (frequencies) in the power spectrum before being used. Parameters ---------- threshold : float The threshold to be used when reporting p-values of potentially significant powers. Must be between 0 and 1. Default is 1 (all p-values will be reported). trial_correction : bool A Boolean flag that sets whether the `threshold` will be correted by the number of frequencies before being applied. This decreases the threshold (p-values need to be lower to count as significant). Default is False (report all powers) though for any application where `threshold` is set to something meaningful, this should also be applied! Returns ------- pvals : iterable A list of (index, p-value) tuples for all powers that have p-values lower than the threshold specified in `threshold`. """ assert self.norm == "leahy", "This method only works on " \ "Leahy-normalized power spectra!" # calculate p-values for all powers # leave out zeroth power since it just encodes the number of photons! pv = np.array([classical_pvalue(power, self.m) for power in self.power]) # if trial correction is used, then correct the threshold for # the number of powers in the power spectrum if trial_correction: threshold /= self.power.shape[0] # need to add 1 to the indices to make up for the fact that # we left out the first power above! indices = np.where(pv < threshold)[0] pvals = np.vstack([pv[indices], indices]) return pvals
[docs]class AveragedPowerspectrum(AveragedCrossspectrum, Powerspectrum): def __init__(self, lc, segment_size, norm="frac"): """ Make an averaged periodogram from a light curve by segmenting the light curve, Fourier-transforming each segment and then averaging the resulting periodograms. Parameters ---------- lc: lightcurve.Lightcurve object OR iterable of lightcurve.Lightcurve objects The light curve data to be Fourier-transformed. segment_size: float The size of each segment to average. Note that if the total duration of each Lightcurve object in lc is not an integer multiple of the segment_size, then any fraction left-over at the end of the time series will be lost. norm: {"leahy" | "rms"}, optional, default "rms" The normaliation of the periodogram to be used. Options are "leahy" or "rms", default is "rms". Attributes ---------- norm: {"leahy" | "rms"} the normalization of the periodogram freq: numpy.ndarray The array of mid-bin frequencies that the Fourier transform samples power: numpy.ndarray The array of normalized squared absolute values of Fourier amplitudes df: float The frequency resolution m: int The number of averaged periodograms n: int The number of data points in the light curve nphots: float The total number of photons in the light curve """ self.type = "powerspectrum" assert isinstance(norm, str), "norm is not a string!" assert norm.lower() in ["frac", "abs", "leahy", "none"], \ "norm must be 'frac', 'abs', 'leahy', or 'none'!" self.norm = norm.lower() assert np.isfinite(segment_size), "segment_size must be finite!" self.segment_size = segment_size Powerspectrum.__init__(self, lc, norm) return def _make_segment_spectrum(self, lc, segment_size): assert isinstance(lc, lightcurve.Lightcurve) # number of bins per segment nbins = int(segment_size/lc.dt) start_ind = 0 end_ind = nbins power_all = [] nphots_all = [] while end_ind <= lc.counts.shape[0]: time = lc.time[start_ind:end_ind] counts = lc.counts[start_ind:end_ind] lc_seg = lightcurve.Lightcurve(time, counts) power_seg = Powerspectrum(lc_seg, norm=self.norm) power_all.append(power_seg) nphots_all.append(np.sum(lc_seg.counts)) start_ind += nbins end_ind += nbins return power_all, nphots_all