-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimilarity.py
107 lines (83 loc) · 3.53 KB
/
similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import scipy as sp
import scipy.stats as stats
from scipy.spatial.distance import pdist, squareform
import copy
def distcorr(u, v, pval, nruns=500):
X = u.flatten().reshape(-1, 1)
Y = v.flatten().reshape(-1, 1)
n = X.shape[0]
if Y.shape[0] != X.shape[0]:
raise ValueError('Number of samples must match')
a = squareform(pdist(X))
b = squareform(pdist(Y))
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
dcov2_xy = (A * B).sum() / float(n * n)
dcov2_xx = (A * A).sum() / float(n * n)
dcov2_yy = (B * B).sum() / float(n * n)
dcor = np.sqrt(dcov2_xy) / np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
if pval:
greater = 0
Y_r = Y.copy()
for i in range(nruns):
np.random.shuffle(Y_r)
if distcorr(X.copy(), Y_r, pval=False) >= dcor:
greater += 1
return (dcor, greater / float(nruns))
else:
return dcor
def pearson(u, v, pval):
if pval == True:
corr, pval = sp.stats.pearsonr(u, v)
return(float(corr), float(pval))
else:
return(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)))
def tau(u, v, pval):
tau, pval = stats.stats.kendalltau(u, v)
if pval == True:
return(float(tau), float(pval))
else:
return(float(tau))
################################################################################################################################
################################################################################################################################
################################################################################################################################
# Here is the tau* method of calculating associations. Amazing method:
################################################################################################################################
################################################################################################################################
################################################################################################################################
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
utils = importr("TauStar")
ro.r('library("TauStar")')
def taustar(u, v, pval):
ro.r('x = c{}'.format(tuple(u)))
ro.r('y = c{}'.format(tuple(v)))
tau_star = ro.r('tStar(x, y)')[0]
if pval == False:
return(tau_star)
elif pval == True:
ro.r('testResults = tauStarTest(x,y)')
pvalue = ro.r('testResults$pVal[1]')[0]
return([float(tau_star)] + [float(pvalue)])
def matrix_associations(A, method, pval=False):
n, p = A.shape
if method == "pearson":
method = pearson
elif method == "tau":
method = tau
elif method == "distcorr":
method = distcorr
elif method == "taustar":
method = taustar
if pval == False:
distances = [np.array([float(method(A[:, j], A[:, k], pval=pval)) for k in range(p) if j < k]) for j in range(p)]
indeces = [[(j, k) for k in range(p) if j<k] for j in range(p)]
return(indeces, distances)
else:
raise ValueError("pvalue should be False, since this option is not available yet.")
# A = np.random.uniform(0, 1, size = (100, 3))
# print(matrix_associations(A, "distcorr"))
# print(matrix_associations(A, pearson))
# print(matrix_associations(A, tau))
# print(matrix_associations(A, taustar))