Source code for spalor.matrix_tools.leverage_score

import numpy as np
import sklearn.random_projection
from scipy import fftpack
from scipy.sparse.linalg import svds



[docs]def leverage_score_exact(X,r=10):

    (m, n) = X.shape
    u,s,v=svds(X,r)
    ls=np.square(np.apply_along_axis(np.linalg.norm, 1,u))

    #Q,R=np.linalg.qr(X)
    #ls= np.square(np.apply_along_axis(np.linalg.norm, 1,Q))/n

    return ls


[docs]def leverage_score_aprx(X, c1, c2):
    (m,n)=X.shape
    SX=np.transpose(np.linalg.pinv(fftpack.dct(np.transpose(X),type=2, n=c1)))
    transformer=sklearn.random_projection.GaussianRandomProjection(c2)
    SXPI = transformer.fit_transform(SX)
    ls=np.square(np.apply_along_axis(np.linalg.norm, 1, X.dot(SXPI)))
    ls=m*ls/np.sum(ls)
    return ls

[docs]def leverage_score(A, k=10, method="exact", axis=0):
    '''
    Calcluates the leverage statistic for each row (or column) of X when calculating the rank k approximation of A.

    Parameters:
    -------------
    A: either a n by d np-array or a tuple containing the SVD of A (the output of np.linalg.svd or the output of scipy.sparse.linalg.svds)
    k: rank for which the leverage statitics of the low rank approximation are calculated for
    method: If exact, calculate leverage scores using the rank-k svd [1].  if approximate, use the Fast Johnson-Lindenstrauss Transform to approximate leverage scores[2].
    axis: dimension of the matrix to calclute leverage scores for (0: calculate score for columns, 1: calculate for rows)
    
    Returns:
    -----------
    l: vector of leverage scores with length A.shape[axis]

    References:
    ------------
    [1] Randomized algorithms for matrices and data, Michael W. Mahoney (page 7), https://arxiv.org/pdf/1104.5557.pdf

    [2] Fast Approximation of Matrix Coherence and Statistical Leverage, Petros Drineas and Malik Magdon-Ismail and Michael W. Mahoney and David P. Woodruff, JMLR, https://www.stat.berkeley.edu/~mmahoney/pubs/coherence-jmlr12.pdf
    '''



    if type(A) ==tuple and len(A)==3: # if svd is provided
        svdA=A

        u=svdA[0]
        s=svdA[1]
        v=svdA[2]

        if axis==1:
            (u, v)= (v.T, u.T)

        n=u.shape[0]
        d=v.shape[1]

        if len(s)>k: 
            '''
            if more singular values than needed are calculated, get the top k. 
            Note that different SVD solvers give results in different orders, 
            so dont make assumptions about the order
            '''

            ind=s.argsort()[-k:][::-1]
            s=s[ind]
            u=u[:, ind]
            v=v[ind, :]

    else: # if svd is not provided
        if axis== 1:
            A=A.T
        (n,d) = A.shape

        if method=='exact':
            [u,s,v]=svds(A,k)


    if method=="exact":
        ls=np.square(np.apply_along_axis(np.linalg.norm, 1,u))/k
    if method=="approximate":
        ls=leverage_score_aprx(A,k, k)/n
    return ls



if __name__=="__main__":
    A=np.array([[1, 1, 2, 2],
                [2, 1, 3, 5],
                [1, 2, 3, 1],
                [3, 1, 4, 8]], dtype=float)
    print(leverage_score(A, k=2, axis=1))
    #%%
    svdA=np.linalg.svd(A)
    print(leverage_score(svdA, k=2, axis=1))
    #%%

    print(leverage_score(A, k=2, axis=1, method="approximate"))



    print(leverage_score(A, k=2, axis=0))
    #%%
    svdA=np.linalg.svd(A)
    print(leverage_score(svdA, k=2, axis=0))
    #%%

    print(leverage_score(A, k=2, axis=0, method="approximate"))
SpaLoR

Documentation

Source code for spalor.matrix_tools.leverage_score