Source code for spalor.matrix_tools.leverage_score

import numpy as np
import sklearn.random_projection
from scipy import fftpack
from scipy.sparse.linalg import svds



[docs]def leverage_score_exact(X,r=10): (m, n) = X.shape u,s,v=svds(X,r) ls=np.square(np.apply_along_axis(np.linalg.norm, 1,u)) #Q,R=np.linalg.qr(X) #ls= np.square(np.apply_along_axis(np.linalg.norm, 1,Q))/n return ls
[docs]def leverage_score_aprx(X, c1, c2): (m,n)=X.shape SX=np.transpose(np.linalg.pinv(fftpack.dct(np.transpose(X),type=2, n=c1))) transformer=sklearn.random_projection.GaussianRandomProjection(c2) SXPI = transformer.fit_transform(SX) ls=np.square(np.apply_along_axis(np.linalg.norm, 1, X.dot(SXPI))) ls=m*ls/np.sum(ls) return ls
[docs]def leverage_score(A, k=10, method="exact", axis=0): ''' Calcluates the leverage statistic for each row (or column) of X when calculating the rank k approximation of A. Parameters: ------------- A: either a n by d np-array or a tuple containing the SVD of A (the output of np.linalg.svd or the output of scipy.sparse.linalg.svds) k: rank for which the leverage statitics of the low rank approximation are calculated for method: If exact, calculate leverage scores using the rank-k svd [1]. if approximate, use the Fast Johnson-Lindenstrauss Transform to approximate leverage scores[2]. axis: dimension of the matrix to calclute leverage scores for (0: calculate score for columns, 1: calculate for rows) Returns: ----------- l: vector of leverage scores with length A.shape[axis] References: ------------ [1] Randomized algorithms for matrices and data, Michael W. Mahoney (page 7), https://arxiv.org/pdf/1104.5557.pdf [2] Fast Approximation of Matrix Coherence and Statistical Leverage, Petros Drineas and Malik Magdon-Ismail and Michael W. Mahoney and David P. Woodruff, JMLR, https://www.stat.berkeley.edu/~mmahoney/pubs/coherence-jmlr12.pdf ''' if type(A) ==tuple and len(A)==3: # if svd is provided svdA=A u=svdA[0] s=svdA[1] v=svdA[2] if axis==1: (u, v)= (v.T, u.T) n=u.shape[0] d=v.shape[1] if len(s)>k: ''' if more singular values than needed are calculated, get the top k. Note that different SVD solvers give results in different orders, so dont make assumptions about the order ''' ind=s.argsort()[-k:][::-1] s=s[ind] u=u[:, ind] v=v[ind, :] else: # if svd is not provided if axis== 1: A=A.T (n,d) = A.shape if method=='exact': [u,s,v]=svds(A,k) if method=="exact": ls=np.square(np.apply_along_axis(np.linalg.norm, 1,u))/k if method=="approximate": ls=leverage_score_aprx(A,k, k)/n return ls
if __name__=="__main__": A=np.array([[1, 1, 2, 2], [2, 1, 3, 5], [1, 2, 3, 1], [3, 1, 4, 8]], dtype=float) print(leverage_score(A, k=2, axis=1)) #%% svdA=np.linalg.svd(A) print(leverage_score(svdA, k=2, axis=1)) #%% print(leverage_score(A, k=2, axis=1, method="approximate")) print(leverage_score(A, k=2, axis=0)) #%% svdA=np.linalg.svd(A) print(leverage_score(svdA, k=2, axis=0)) #%% print(leverage_score(A, k=2, axis=0, method="approximate"))