awips2/pythonPackages/scientific/Scientific/Statistics/__init__.py
root 3360eb6c5f Initial revision of AWIPS2 11.9.0-7p5
Former-commit-id: 9f19e3f712 [formerly 64fa9254b946eae7e61bbc3f513b7c3696c4f54f]
Former-commit-id: 06a8b51d6d
2012-01-06 08:55:05 -06:00

184 lines
5.1 KiB
Python
Executable file

# Basic statistics functions.
#
# Written by Konrad Hinsen <hinsen@cnrs-orleans.fr>
# With contributions from Moshe Zadka <mzadka@geocities.com>
# last revision: 2006-5-28
#
"""
Basic monovariate statistics
"""
from Scientific import N
#
# Univariate statistics functions
#
def moment(data, order, about=None, theoretical=1):
"""
Calculate the moments of the distribution corresponding to a set of data
@param data: a sequence of data points
@type data: C{Numeric.array} of numbers
@param order: the order of the moment to be calculated
@type order: C{int}
@param about: the center of the distribution. If C{None}, use the
mean value as the center.
@type about: number
@param theoretical: for internal use
@returns: the moment of the given order about the specified center
@rtype: number
"""
data = 1.*N.array(data)
if about is None:
about = mean(data)
theoretical = 0
ln = len(data)-(1-theoretical)
return 1.*N.add.reduce((data-about)**order)/ln
def mean(data):
"""
Mean (average value, first moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the mean of the number sequence
@rtype: number
"""
return moment(data, 1, 0)
average = mean
def weightedMean(data, sigma):
"""
Weighted mean of a sequence of numbers with given standard deviations
@param data: a sequence of measurements
@type data: C{list} or C{Numeric.array}
@param sigma: a sequence of corresponding standard deviations
@type sigma: C{list} or C{Numeric.array}
@returns: the weighted mean and corresponding standard deviation
@rtype: C{tuple} of two elements
"""
if len(data) != len(sigma):
raise ValueError
data = 1.*N.array(data)
sigma = 1.*N.array(sigma)
nom = N.sum(data/sigma**2)
denom = N.sum(1./sigma**2)
mean = nom / denom
sig = N.sqrt(1./denom)
return mean, sig
def variance(data):
"""
Variance (second moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the variance of the number sequence
@rtype: number
"""
return moment(data, 2)
def standardDeviation(data):
"""
Standard deviation
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the standard deviation of the number sequence
@rtype: number
"""
return N.sqrt(variance(data))
def median(data):
"""
Median
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the median of the number sequence
@rtype: number
"""
data = N.sort(N.array(data))
l = (len(data)-1)/2.
return (data[int(N.floor(l))]+data[int(N.ceil(l))])/2.
def mode(data):
h = {}
for n in data:
try: h[n] = h[n]+1
except KeyError: h[n] = 1
a = map(lambda x: (x[1], x[0]), h.items())
return max(a)[1]
def normalizedMoment(data, order):
"""
Calculate the normalized moments of the distribution corresponding
to a set of data. Normalization means division by the n-th power of
the standard deviation.
@param data: a sequence of data points
@type data: C{Numeric.array} of numbers
@param order: the order of the moment to be calculated
@type order: C{int}
@returns: the normalized moment of the given order
@rtype: number
"""
mn = mean(data)
return moment(data, order, mn)/N.sqrt(moment(data, 2, mn)**order)
def skewness(data):
"""
Skewness (third normalized moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the skewness of the number sequence
@rtype: number
"""
return normalizedMoment(data, 3)
def kurtosis(data):
"""
Kurtosis (fourth normalized moment)
@param data: a sequence of numbers
@type data: C{list} or C{Numeric.array}
@returns: the kurtosis of the number sequence
@rtype: number
"""
return normalizedMoment(data, 4)
## def chiSquare(data):
## h = {}
## for n in data:
## try: h[n] = h[n]+1
## except KeyError: h[n] = 1
## h = N.array(h.values())
## h = h/N.add.reduce(h)
## return moment(h, 2, 1./len(h))
def correlation(data1, data2):
"""
Calculates the correlation coefficient between two data sequences
@param data1: first data sequence
@type data1: C{Numeric.array} or C{list}
@param data2: second data sequence of length identical to data1
@type data2: C{Numeric.array} or C{list}
@returns: the correlation coefficient between data1 and data2
@rtype: number
@raises ValueError: if data1 and data2 have different lengths
"""
if len(data1) != len(data2):
raise ValueError("data series must have equal length")
data1 = N.array(data1)
data2 = N.array(data2)
data1 = data1 - N.add.reduce(data1)/len(data1)
data2 = data2 - N.add.reduce(data2)/len(data2)
return N.add.reduce(data1*data2) / \
N.sqrt(N.add.reduce(data1*data1) \
* N.add.reduce(data2*data2))