Source code for fiducia.stats

 # -*- coding: utf-8 -*-
"""
Created on Tue Jul 21 12:11:12 2020

Common statistic operations

@author: Myles T. Brophy
"""
# python modules
import numpy as np
 
# listing all functions declared in this file so that sphinx-automodapi
# correctly documents them and doesn't document imported functions.
__all__ = ["simpsVariance",
           "trapzVariance",
           "gradientVariance",
           "dotVariance",
           "interpVariance",
           ]


[docs]def simpsVariance(yUnc, x=None, dx=1.0): r""" Propagates variance (:math:`\sigma^2`) through Simpson's rule numerical integration. NOTE: THIS FUNCTION IS INCOMPLETE AND HAS NOT BEEN VERIFIED 2020-08-21 PMK. Parameters ---------- yUnc: list, numpy.ndarray Uncertainties in the vertical axis. x: list, numpy.ndarray, optional Horizontal coordinates corresponding to yUnc. Default is None, which generates a uniformly spaced linear array of horizontal coordinates based on the length of yUnc and the value of dx. dx: float, optional Uniform spacing between horizontal coordinates corresponding to yUnc. Default is 1.0. Returns ------- variance: float Variance (:math:`\sigma^2`) on value of integral from Simpson's rule numerical integration. Notes ----- Based on a modified verison of: https://en.wikipedia.org/wiki/Simpson's_rule#Composite_Simpson's_rule_for_irregularly_spaced_data See also -------- Examples -------- """ N = len(yUnc) - 1 yUnc = np.asarray(yUnc) if x is None: d = np.full(N, dx) else: d = np.diff(np.asarray(x)) variance = 0.0 #TODO validate with hand calculations for i in range(1, N, 2): hph = d[i] + d[i - 1] variance += yUnc[i]**2 * ( ( d[i]**3 + d[i - 1]**3 + 3. * d[i] * d[i - 1] * hph )\ / ( 6 * d[i] * d[i - 1] ) )**2 variance += yUnc[i - 1]**2 * ( ( 2. * d[i - 1]**3 - d[i]**3 + 3. * d[i] * d[i - 1]**2)\ / ( 6 * d[i - 1] * hph) )**2 variance += yUnc[i + 1]**2 * ( ( 2. * d[i]**3 - d[i - 1]**3 + 3. * d[i - 1] * d[i]**2)\ / ( 6 * d[i] * hph ) )**2 if (N + 1) % 2 == 0: variance += yUnc[N]**2 * ( ( 2 * d[N - 1]**2 + 3. * d[N - 2] * d[N - 1])\ / ( 6 * ( d[N - 2] + d[N - 1] )) )**2 variance += yUnc[N - 1]**2 * ( ( d[N - 1]**2 + 3*d[N - 1]* d[N - 2] )\ / ( 6 * d[N - 2] ) )**2 variance -= yUnc[N - 2]**2 * (d[N - 1]**3\ / ( 6 * d[N - 2] * ( d[N - 2] + d[N - 1] )))**2 return variance
[docs]def trapzVariance(yUnc, x=None, dx=1.0): r""" Error propogation for Trapezoidal rule integration using uniform or non-uniform grids. Parameters ---------- yUnc : list, numpy.ndarray The list of uncertainities, referenced as :math:`\sigma_i`. x : list, numpy.ndarray, optional The sampling points for which the uncertainites ''y'' were found. Must be the same length as ''y''. If none are provided, then the step size will be uniform and set with ''dx''. The default is None. dx : int, float, optional Step size. Only applies if sampling points aren't specified with ''x''. The default is 1.0. Returns ------- variance : float The total variance (:math:`\sigma^2`) found by propagating ''y''. Notes ----- Trap rule integration with non uniform spacing takes the form .. math:: \sum_{k=1}^N \frac{\Delta x_i}{2} \left(f(x)_{i-1} + f(y)_i \right) Propogating the uncertainties through this integration results in .. math:: \sigma^2 = \frac{1}{4} \left(\sum_{k=1}^N \Delta x_i \sigma_{i-1}^2 + \sigma_i^2 + 2\sum_{k=1}^{N-1} \Delta x_i \Delta x_i+1 \sigma_i^2 \right) The equation is generalized and applies to uniform and non-uniform step sizes. See also -------- Examples -------- """ N = len(yUnc) yUnc = np.asarray(yUnc) if x is None: d = np.full(N-1, dx) else: d = np.diff(np.asarray(x)) variance = 0.0 #add variance of indiviual trapezoid variances for i in range(1, N): variance += d[i-1] ** 2 * 0.25 * (yUnc[i-1] ** 2 + yUnc[i] ** 2) #add covariant terms because of edge overlap for i in range(1, N-1): variance += 0.5 * d[i-1] * d[i] * yUnc[i] ** 2 return variance
[docs]def gradientVariance(yUnc, x=None, dx=1.0): r""" Propogates uncertainty for the gradient operator of an array of a given step size. Parameters ---------- yUnc : list, numpy.ndarray The list of uncertainities, referenced as :math:'\sigma_i'. x : list, numpy.ndarray, optional The sampling points for which the uncertainites ''y'' were found. Must be the same length as ''y''. If none are provided, then the step size will be uniform and set with ''dx''. The default is None. dx : list, numpy.ndarray, optional Step size. Only applies if sampling points aren't specified with ''x''. The default is 1.0. Returns ------- variance : float The total variance (:math:`\sigma^2`) found by propagating ''y''. Notes ----- .. math:: \operatorname{Var}(\nabla y_i) = \frac{h_{i-1}^2 \sigma_{i+1}^2 + (h_i^2 + h_{i-1}^2)^2 \sigma_i^2 - h_i^4 \sigma_{i-1}^2}{(h_i h_{i-1}(h_i + h_{i-1}))^2} At the boundaries .. math:: \operatorName{Var}(\nabla y_0) = \frac{\sigma_1^2 - \sigma_0^2}{h_0^2}, \operatorName{Var}(\nabla y_{N-1}) = \frac{\sigma_{N-1}^2 - \sigma_{N-2}^2}{h_{N-2}^2} See also -------- Examples -------- """ N = len(yUnc) np.asarray(yUnc) if x is None: h = np.full(N-1, dx) else: h = np.diff(np.asarray(x)) grad = np.zeros(N) #square the uncertainites, cleaner to do it here than in the loop yUnc = yUnc**2 grad[0] == (yUnc[1]-yUnc[0])/(h[0]**2) grad[N-1] == (yUnc[N-1]-yUnc[N-2])/(h[N-2]**2) for i in range(1, N-1): firstTerm = h[i-1]**4*yUnc[i+1] secondTerm = (h[i]**2 - h[i-1]**2)**2 * yUnc[i] thirdTerm = h[i]**4*yUnc[i-1] denom = (h[i]*h[i-1]*(h[i]+h[i-1]))**2 grad[i] = (firstTerm + secondTerm - thirdTerm) / denom return grad
[docs]def dotVariance(a, b, aUncertainty=None, bUncertainty=None): r""" Propogate uncertainty for the dot product of matrix a and 1D vector b. Propogate uncertainty for the dot product of a matrix and a 1D vector. Assumes no covariance between `a` and `b`. Methodology is similar to :func:`numpy.dot` where: - If both `a` and `b` are 1D, the uncertainty of the inner product of vectors is returned. - If 'a' is N dimensional (Where :math: `N>=2`) and `b` is 1D, the uncertainty of the sum product of the last axis of a with b is returned. 0-D (scalar) arrays are not supported. `b` arrays that have more than one axis are not supported. `a` and `b` must have the same shape as `aUncertainty` and `bUncertainty`, respectively. Parameters ---------- a : numpy.ndarray, list Matrix or vector to dot with 'b'. b : numpy.ndarray, list Vector that 'a' will be dotted with. Must be the same size as the last axis of a. aUncertainty : numpy.ndarray, optional Uncertainty of each element in 'a'. The default is None. bUncertainty : numpy.ndarray, optional Uncertainty of each element in 'b'. The default is None. Returns ------- variance : float Notes ----- .. math:: \operatorname{Var}(A \cdot B) = \sum_{i=1}^N \operatorname{Var}(a_i b_i) Assuming covariance between independent variables .. math:: \sum_{i=1}^N (a_i\sigma_{b_i})^2 + (b_i\sigma_{a_i})^2 See also -------- Examples -------- """ a = np.asarray(a) b = np.asarray(b) if aUncertainty is None: aUncertainty = np.zeros(a.shape) else: aUncertainty = np.asarray(aUncertainty) if bUncertainty is None: bUncertainty = np.zeros(b.shape) else: bUncertainty = np.asarray(bUncertainty) #first see if the shapes are compatible #also checks that try: if b.ndim != 1: raise NotImplementedError("Propogating uncertainty not implemented for b arguments that aren't 1D.") if b.shape != bUncertainty.shape or a.shape != aUncertainty.shape: raise ValueError("Array does not have the same shape as it's uncertainty array.") np.dot(a, b) np.dot(a, bUncertainty) np.dot(aUncertainty, b) except ValueError: raise else: #uncertainty propagation for dotting vectors if a.ndim == 1: variance = np.dot(b ** 2, aUncertainty ** 2) + np.dot(a ** 2, bUncertainty ** 2) return variance #uncertainty propagation for dotting matrix with vector elif a.ndim >=2: variance = np.zeros(a.shape[:-1]) for i in range(a.shape[0]): variance[i] = dotVariance(a[i], b, aUncertainty[i], bUncertainty) return variance
[docs]def interpVariance(x, xp, fpUnc, leftVar=None, rightVar=None, period=None): r""" Propagate uncertainty for linear interpolation. Parameters ---------- x : numpy.ndarray, list The x-coordinates at which to evaluate the interpolated values. xp : numpy.ndarray, list The 1D x-coordinates of the data points, must be increasing order fpUnc : numpy.ndarray, list The uncertainty in the y-coordinates of the data points, same length as `xp`. leftVar : float, optional Variance to return for `x < xp[0]`. If not given, the first `yUnc` element will be used. Default is `None` rightVar : float, optional Variance to return for `x > xp[-1]`. If not given, the last `yUnc` element will be used. Default is `None` Returns ------- yVar : numpy.ndarray 1D array containing the variance for each interpolated `x`. Notes ----- Variance of interpolated point, assuming no uncertainty in `x` and `xp`, and no covariance between y-coordinates, is given by .. math:: \operatorname{Var}(y) = \frac{1}{(x_1 - x_0)^2} ( (x_1 - x)^2 \sigma_{y_0}^2 + (x-x_0)^2 \sigma_{y_1}^2 ) See also -------- Examples -------- """ #conver to ndarray x = np.asarray(x) xp = np.asarray(xp) fpUnc = np.asarray(fpUnc) #make space for final answer yVar = np.zeros(x.shape) #find the indexs where x is between xp_i and xp_{i+1} indices = np.searchsorted(xp, x, side='left') for i, index in enumerate(indices): #if index is 0 and is not equal to the first data point if index == 0 and x[i] != xp[0]: #set to default value if leftVar: variance = leftVar else: variance = fpUnc[0]**2 #if the index is after the last datapoint elif index >= len(xp): #set to default value if rightVar: variance = rightVar else: variance = fpUnc[-1]**2 else: rightIndex = index leftIndex = index-1 #if we already have this value if xp[rightIndex] == x[i]: variance = fpUnc[rightIndex] ** 2 elif xp[leftIndex] == x[i]: variance = fpUnc[leftIndex] ** 2 else: #new value, actually need to do interpolation propagation term1 = 1 / (xp[rightIndex] - xp[leftIndex]) ** 2 term2 = (xp[rightIndex] - x[i]) ** 2 * fpUnc[leftIndex] ** 2 + (x[i] - xp[leftIndex]) ** 2 *fpUnc[rightIndex] ** 2 variance = term1 * term2 yVar[i] = variance return yVar