Skip to content

API Reference for CumulativeGradientEstimator

Bases: object

Source code in spectral_metric/estimator.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class CumulativeGradientEstimator(object):
    def __init__(self, M_sample=250, k_nearest=3, distance="euclidean"):
        """
        The Cumulative Gradient Estimator, estimates the complexity of a dataset.
        Args:
            M_sample (int): Number of sample per class to use
            k_nearest (int): Number of neighbours to look to compute $P(C_c \vert x)$.
            distance: name of the distance to use.
        """
        self.M_sample = M_sample
        self.k_nearest = k_nearest
        self.distance = distance

    def fit(self, data, target):
        """
        Estimate the CSG metric from the data
        Args:
            data: data samples, ndarray (n_samples, n_features)
            target: target samples, ndarray (n_samples)
        """
        np.random.seed(None)
        data_x = data.copy()
        self.n_class = np.max(target) - min(0, np.min(target)) + 1

        # Do class sampling
        class_samples, self.class_indices = find_samples(
            data_x, target, self.n_class, M=self.M_sample
        )

        self.compute(data_x, target, class_samples)
        return self

    def compute(self, data, target, class_samples):
        """
        Compute the difference matrix and the eigenvalues
        Args:
            data: data samples, ndarray (n_samples, n_features)
            target: target samples, ndarray (n_samples)
            class_samples : class samples, Dict[class_idx, Array[M, n_features]]
        """
        # Compute E_{p(x\mid C_i)} [p(x\mid C_j)]
        self.S, self.similarity_arrays = compute_expectation_with_monte_carlo(
            data,
            target,
            class_samples,
            class_indices=self.class_indices,
            n_class=self.n_class,
            k_nearest=self.k_nearest,
            distance=self.distance,
        )

        # Compute the D matrix
        self.W = np.eye(self.n_class)
        for i, j in product(range(self.n_class), range(self.n_class)):
            self.W[i, j] = 1 - scipy.spatial.distance.braycurtis(self.S[i], self.S[j])

        self.difference = 1 - self.W

        # Get the Laplacian and its eigen values
        self.L_mat, dd = laplacian(self.W, False, True)
        try:
            self.evals, self.evecs = np.linalg.eigh(self.L_mat)
            self.csg = self._csg_from_evals(self.evals)
        except LinAlgError as e:
            log.warning(f"{str(e)}; assigning `evals,evecs,csg` to NaN")
            self.evals = np.ones([self.n_class]) * np.nan
            self.evecs = np.ones([self.n_class, self.n_class]) * np.nan
            self.csg = np.nan

    def _csg_from_evals(self, evals: np.ndarray) -> float:
        # [n_class]
        grads = evals[1:] - evals[:-1]
        ratios = grads / (np.array([list(reversed(range(1, grads.shape[-1] + 1)))]) + 1)
        csg: float = np.maximum.accumulate(ratios, -1).sum(1)
        return csg

__init__(M_sample=250, k_nearest=3, distance='euclidean')

The Cumulative Gradient Estimator, estimates the complexity of a dataset. Args: M_sample (int): Number of sample per class to use k_nearest (int): Number of neighbours to look to compute \(P(C_c ert x)\). distance: name of the distance to use.

Source code in spectral_metric/estimator.py
16
17
18
19
20
21
22
23
24
25
26
def __init__(self, M_sample=250, k_nearest=3, distance="euclidean"):
    """
    The Cumulative Gradient Estimator, estimates the complexity of a dataset.
    Args:
        M_sample (int): Number of sample per class to use
        k_nearest (int): Number of neighbours to look to compute $P(C_c \vert x)$.
        distance: name of the distance to use.
    """
    self.M_sample = M_sample
    self.k_nearest = k_nearest
    self.distance = distance

compute(data, target, class_samples)

Compute the difference matrix and the eigenvalues Args: data: data samples, ndarray (n_samples, n_features) target: target samples, ndarray (n_samples) class_samples : class samples, Dict[class_idx, Array[M, n_features]]

Source code in spectral_metric/estimator.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def compute(self, data, target, class_samples):
    """
    Compute the difference matrix and the eigenvalues
    Args:
        data: data samples, ndarray (n_samples, n_features)
        target: target samples, ndarray (n_samples)
        class_samples : class samples, Dict[class_idx, Array[M, n_features]]
    """
    # Compute E_{p(x\mid C_i)} [p(x\mid C_j)]
    self.S, self.similarity_arrays = compute_expectation_with_monte_carlo(
        data,
        target,
        class_samples,
        class_indices=self.class_indices,
        n_class=self.n_class,
        k_nearest=self.k_nearest,
        distance=self.distance,
    )

    # Compute the D matrix
    self.W = np.eye(self.n_class)
    for i, j in product(range(self.n_class), range(self.n_class)):
        self.W[i, j] = 1 - scipy.spatial.distance.braycurtis(self.S[i], self.S[j])

    self.difference = 1 - self.W

    # Get the Laplacian and its eigen values
    self.L_mat, dd = laplacian(self.W, False, True)
    try:
        self.evals, self.evecs = np.linalg.eigh(self.L_mat)
        self.csg = self._csg_from_evals(self.evals)
    except LinAlgError as e:
        log.warning(f"{str(e)}; assigning `evals,evecs,csg` to NaN")
        self.evals = np.ones([self.n_class]) * np.nan
        self.evecs = np.ones([self.n_class, self.n_class]) * np.nan
        self.csg = np.nan

fit(data, target)

Estimate the CSG metric from the data Args: data: data samples, ndarray (n_samples, n_features) target: target samples, ndarray (n_samples)

Source code in spectral_metric/estimator.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def fit(self, data, target):
    """
    Estimate the CSG metric from the data
    Args:
        data: data samples, ndarray (n_samples, n_features)
        target: target samples, ndarray (n_samples)
    """
    np.random.seed(None)
    data_x = data.copy()
    self.n_class = np.max(target) - min(0, np.min(target)) + 1

    # Do class sampling
    class_samples, self.class_indices = find_samples(
        data_x, target, self.n_class, M=self.M_sample
    )

    self.compute(data_x, target, class_samples)
    return self