%PDF- %PDF-
| Direktori : /lib/python3/dist-packages/scipy/stats/ |
| Current File : //lib/python3/dist-packages/scipy/stats/_hypotests_pythran.py |
import numpy as np
#pythran export _Aij(float[:,:], int, int)
#pythran export _Aij(int[:,:], int, int)
def _Aij(A, i, j):
"""Sum of upper-left and lower right blocks of contingency table."""
# See `somersd` References [2] bottom of page 309
return A[:i, :j].sum() + A[i+1:, j+1:].sum()
#pythran export _Dij(float[:,:], int, int)
#pythran export _Dij(int[:,:], int, int)
def _Dij(A, i, j):
"""Sum of lower-left and upper-right blocks of contingency table."""
# See `somersd` References [2] bottom of page 309
return A[i+1:, :j].sum() + A[:i, j+1:].sum()
#pythran export _P(float[:,:])
#pythran export _P(int[:,:])
def _P(A):
"""Twice the number of concordant pairs, excluding ties."""
# See `somersd` References [2] bottom of page 309
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*_Aij(A, i, j)
return count
#pythran export _Q(float[:,:])
#pythran export _Q(int[:,:])
def _Q(A):
"""Twice the number of discordant pairs, excluding ties."""
# See `somersd` References [2] bottom of page 309
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*_Dij(A, i, j)
return count
#pythran export _a_ij_Aij_Dij2(float[:,:])
#pythran export _a_ij_Aij_Dij2(int[:,:])
def _a_ij_Aij_Dij2(A):
"""A term that appears in the ASE of Kendall's tau and Somers' D."""
# See `somersd` References [2] section 4: Modified ASEs to test the null hypothesis...
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*(_Aij(A, i, j) - _Dij(A, i, j))**2
return count
#pythran export _compute_outer_prob_inside_method(int64, int64, int64, int64)
def _compute_outer_prob_inside_method(m, n, g, h):
"""
Count the proportion of paths that do not stay strictly inside two
diagonal lines.
Parameters
----------
m : integer
m > 0
n : integer
n > 0
g : integer
g is greatest common divisor of m and n
h : integer
0 <= h <= lcm(m,n)
Returns
-------
p : float
The proportion of paths that do not stay inside the two lines.
The classical algorithm counts the integer lattice paths from (0, 0)
to (m, n) which satisfy |x/m - y/n| < h / lcm(m, n).
The paths make steps of size +1 in either positive x or positive y
directions.
We are, however, interested in 1 - proportion to computes p-values,
so we change the recursion to compute 1 - p directly while staying
within the "inside method" a described by Hodges.
We generally follow Hodges' treatment of Drion/Gnedenko/Korolyuk.
Hodges, J.L. Jr.,
"The Significance Probability of the Smirnov Two-Sample Test,"
Arkiv fiur Matematik, 3, No. 43 (1958), 469-86.
For the recursion for 1-p see
Viehmann, T.: "Numerically more stable computation of the p-values
for the two-sample Kolmogorov-Smirnov test," arXiv: 2102.08037
"""
# Probability is symmetrical in m, n. Computation below uses m >= n.
if m < n:
m, n = n, m
mg = m // g
ng = n // g
# Count the integer lattice paths from (0, 0) to (m, n) which satisfy
# |nx/g - my/g| < h.
# Compute matrix A such that:
# A(x, 0) = A(0, y) = 1
# A(x, y) = A(x, y-1) + A(x-1, y), for x,y>=1, except that
# A(x, y) = 0 if |x/m - y/n|>= h
# Probability is A(m, n)/binom(m+n, n)
# Optimizations exist for m==n, m==n*p.
# Only need to preserve a single column of A, and only a
# sliding window of it.
# minj keeps track of the slide.
minj, maxj = 0, min(int(np.ceil(h / mg)), n + 1)
curlen = maxj - minj
# Make a vector long enough to hold maximum window needed.
lenA = min(2 * maxj + 2, n + 1)
# This is an integer calculation, but the entries are essentially
# binomial coefficients, hence grow quickly.
# Scaling after each column is computed avoids dividing by a
# large binomial coefficient at the end, but is not sufficient to avoid
# the large dyanamic range which appears during the calculation.
# Instead we rescale based on the magnitude of the right most term in
# the column and keep track of an exponent separately and apply
# it at the end of the calculation. Similarly when multiplying by
# the binomial coefficient
dtype = np.float64
A = np.ones(lenA, dtype=dtype)
# Initialize the first column
A[minj:maxj] = 0.0
for i in range(1, m + 1):
# Generate the next column.
# First calculate the sliding window
lastminj, lastlen = minj, curlen
minj = max(int(np.floor((ng * i - h) / mg)) + 1, 0)
minj = min(minj, n)
maxj = min(int(np.ceil((ng * i + h) / mg)), n + 1)
if maxj <= minj:
return 1.0
# Now fill in the values. We cannot use cumsum, unfortunately.
val = 0.0 if minj == 0 else 1.0
for jj in range(maxj - minj):
j = jj + minj
val = (A[jj + minj - lastminj] * i + val * j) / (i + j)
A[jj] = val
curlen = maxj - minj
if lastlen > curlen:
# Set some carried-over elements to 1
A[maxj - minj:maxj - minj + (lastlen - curlen)] = 1
return A[maxj - minj - 1]