Source code for spalor.models.cur

from scipy.sparse.linalg import svds
from numpy.linalg import pinv
import numpy as np
from spalor.matrix_tools import leverage_score
[docs]class CUR:
    '''
    Dimensionality reduction based on a low-rank matrix faactorization:
        A=C*U*R
    where C consists of columns sampled from A, R is rows sampled from A, and U=(C'*C)^-1 *C' *A  * R'*(R*R')^-1.

    Typically, the columns and rows are selected at random with probabilites proportional to the leverage scores.


    Parameters:
    ------------
    n_rows : int, default=10
        Number of rows to sample.
    n_cols : int, default=10
        Number of columns to sample.

    method : {'exact', 'approximate', 'random'}, default='leverage_scores_QR'
        method to selct rows.
            - "exact": randomly select by leverage scores
            - "approximate" : randomly select columns by approximated leverage scores
            - "random" : randomly select columns

    Attributes:
    ------------
    d1 : int
        number or rows in the original matrix
    d2 : int
        number of columns in the original matrix
    cols : list 
        list containing indices of columns sampled
    C : ndarray, shape = (d1,n_components)
        Columns sampled
    X : ndarray, shape = (n_components, d2)
        Score matrix, often used for classification. Coordinates in the lower dimensional column space

    Example:
    --------
    ```
    A=np.array([[1, 1, 2, 2],
        [2, 1, 3, 5],
        [1, 2, 3, 1],
        [3, 1, 4, 8]], dtype=float)

    cur=CUR(n_rows=2, n_cols=2)
    cur.fit(A)

    print("C:\n", cur.C)
    print("U:\n", cur.U)
    print("R:\n", cur.R)
    print("columns used: \n", cur.cols)
    print("rows used: \n", cur.rows)
    ```
    '''

    def __init__(self,n_rows=10,n_cols=10, r=None):
        self.n_rows=n_rows
        self.n_cols=n_cols
        self.r=r

        if self.r is None:
            self.r=max(self.n_cols,self.n_rows)

[docs]    def fit(self, A, rows=None, cols=None):
        '''
        fit matrix A to CUR model
        '''

        self.A=A
        (d1,d2)=A.shape

        svdA=svds(self.A,self.r)

        self.svdA=svdA

        if cols==None:
            ls_u=leverage_score(svdA, k=self.n_cols, axis=1)
            cols = np.random.choice(len(ls_u), self.n_cols, p=ls_u)
        if rows ==None:
            ls_v=leverage_score(svdA, k=self.n_rows, axis=0)
            rows = np.random.choice(len(ls_v), self.n_rows, p=ls_v)


        self.cols=[np.argsort(ls_u)[-self.n_rows:]]
        self.rows=[np.argsort(ls_v)[-self.n_cols:]]

        self.C=np.squeeze(self.A[:,self.cols])
        self.R=np.squeeze(self.A[self.rows,:])

        self.U=pinv(self.C).dot(self.A).dot(pinv(self.R))


    def transform(self,X):
        return np.squeeze(X[:,self.cols])

    def fit_transforms(self,X):
        self.fit(X)
        return self.C

    # def inverse_transform(X):
    #     return self.X.dot(self.U).dot(self.R)

    def get_params(self):
        return (self.C, self.U, self.R)


if __name__=="__main__":
    A=np.array([[1, 1, 2, 2],
            [2, 1, 3, 5],
            [1, 2, 3, 1],
            [3, 1, 4, 8]], dtype=float)

    cur=CUR(n_rows=2, n_cols=2)
    cur.fit(A)

    print("C:\n", cur.C)
    print("U:\n", cur.U)
    print("R:\n", cur.R)
    print("columns used: \n", cur.cols)
    print("rows used: \n", cur.rows)
SpaLoR

Documentation

Source code for spalor.models.cur