File size: 16,175 Bytes
7b5e67a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

from abc import ABC, abstractmethod

from time import time
import numpy as np
import scipy

from umap.umap_ import compute_membership_strengths

from singleVis.backend import get_graph_elements

# helper functions
def knn_dists(X, indices, knn_indices):
    data = X[indices][:,None,:]
    knn_data = X[knn_indices]
    knn_dists = np.linalg.norm(data-knn_data, axis=2)
    return knn_dists

class TemporalEdgeConstructorAbstractClass(ABC):
    @abstractmethod
    def __init__(self) -> None:
        pass

    @abstractmethod
    def construct(self, *args, **kwargs):
        # return head, tail, weight, feature_vectors
        pass


'''Base class for complex edges constructor'''
class TemporalEdgeConstructor(TemporalEdgeConstructorAbstractClass):
  
    def __init__(self, X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs) -> None:
        """Init Parameters for Temporal Edge Constructor

        Parameters
        ----------
        X : ndarray, shape (N, feature_dim)
            feature vectors for complex construction
        time_step_nums : list, [(t_num, b_num)]
            the number of training points and boundary points of all time steps
        sigmas : ndarray, shape (N_T+N_B,)
            the sigmas of all feature vector
        rhos : ndarray, shape (N_T+N_B,)
            the rhos of all feature vectors
        n_neighbors : int
            locally connectivity
        n_epochs: int
        """
        self.features = X
        self.time_step_nums = time_step_nums
        self.time_steps = len(time_step_nums)
        self.sigmas = sigmas
        self.rhos = rhos
        self.n_neighbors = n_neighbors
        self.n_epochs = n_epochs
    
    def temporal_simplicial_set(
        self,
        rows,
        cols,
        vals,
        n_vertice,
        set_op_mix_ratio=1.0,
        apply_set_operations=True):
        """
        Given the edges and edge weights, compute the simplicial set
        (here represented as a fuzzy graph in the form of a sparse matrix)
        associated to the data.
        This is done by locally approximating geodesic distance at each point,
        creating a fuzzy simplicial set for each such point,
        and then combining all the local fuzzy simplicial sets into a global one via a fuzzy union.

        Parameters
        ----------
        rows: list
            index list of edge_to
        cols: list
            index list of edge_from
        vals: list
            list of edge weights
        n_vertice: int
            the number of vertices
        set_op_mix_ratio: float (optional, default 1.0)
            Interpolate between (fuzzy) union and intersection as the set operation
            used to combine local fuzzy simplicial sets to obtain a global fuzzy
            simplicial sets. Both fuzzy set operations use the product t-norm.
            The value of this parameter should be between 0.0 and 1.0; a value of
            1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy
            intersection.
        local_connectivity: int (optional, default 1)
            The local connectivity required -- i.e. the number of nearest
            neighbors that should be assumed to be connected at a local level.
            The higher this value the more connected the manifold becomes
            locally. In practice this should be not more than the local intrinsic
            dimension of the manifold.
        apply_set_operations:

        Returns:
        ----------
        coo_matrix
            A fuzzy simplicial set represented as a sparse matrix. The (i,
            j) entry of the matrix represents the membership strength of the
            1-simplex between the ith and jth sample points.
        """
        result = scipy.sparse.coo_matrix(
            (vals, (rows, cols)), shape=(n_vertice, n_vertice)
        )
        result.eliminate_zeros()

        if apply_set_operations:
            transpose = result.transpose()
            prod_matrix = result.multiply(transpose)
            result = (
                    set_op_mix_ratio * (result + transpose - prod_matrix)
                    + (1.0 - set_op_mix_ratio) * prod_matrix
            )
        result.eliminate_zeros()
        return result
        
    def construct(self):
        return NotImplemented


"""
Strategies:
    Local strategy:
        connect each sample to its nearby epochs
    Global strategy:
        connect each sample to its k temporal neighbors
    GlobalParallel strategy:
        connect each sample to its k temporal neighbors
"""

class LocalTemporalEdgeConstructor(TemporalEdgeConstructor):
    def __init__(self, X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs, persistent, time_step_idxs_list, knn_indices) -> None:
        """
        construct temporal edges based on same data
        link data to its next epoch

        Parameters
        ----------
        X : ndarray, shape (N, feature_dim)
            feature vectors for complex construction
        time_step_nums : list, [(t_num, b_num)]
            the number of training points and boundary points of all time steps
        sigmas : ndarray, shape (N_T+N_B,)
            the sigmas of all feature vector
        rhos : ndarray, shape (N_T+N_B,)
            the rhos of all feature vectors
        n_neighbors : int
            locally connectivity
        persistent : int
            the sliding window size
        time_step_idxs_list : list
            the index list connect each time step to its next time step
        knn_indices : ndarray, shape (N, n_neighbors)
            the knn indices of samples in each time step
        """
        super().__init__(X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs)
        self.persistence = persistent
        self.time_step_idxs_list = time_step_idxs_list
        self.knn_indices = knn_indices
    
    def construct(self):
        """construct temporal edges

        Returns
        -------
        time_complex: scipy matrix
            the temporal complex containing temporal edges
        """
        rows = np.zeros(1, dtype=np.int32)
        cols = np.zeros(1, dtype=np.int32)
        vals = np.zeros(1, dtype=np.float32)

        n_all = 0
        time_step_num = list()
        for i in self.time_step_nums:
            time_step_num.append(n_all)
            n_all = n_all + i[0]
        n_all = 0
        all_step_num = list()
        for i in self.time_step_nums:
            all_step_num.append(n_all)
            n_all = n_all + i[0] + i[1]
        
        # forward
        for window in range(1, self.persistent + 1, 1):
            for step in range(0, self.time_steps - window, 1):
                knn_indices_in = - np.ones((n_all, self.n_neighbors))
                knn_dist = np.zeros((n_all, self.n_neighbors))

                next_knn = self.knn_indices[time_step_num[step+window]:time_step_num[step+window] + self.time_step_nums[step + window][0]]

                # knn_indices_in[all_step_num[step]: all_step_num[step] + time_step_nums[step + window][0]] = next_knn
                increase_idx = all_step_num[step]
                assert len(next_knn) == len(self.time_step_idxs_list[step+window])
                for i in range(len(self.time_step_idxs_list[step+window])):
                    knn_indices_in[increase_idx + self.time_step_idxs_list[step+window][i]]=next_knn[i]
                knn_indices_in = knn_indices_in.astype('int')

                indices = np.arange(all_step_num[step], all_step_num[step] + self.time_step_nums[step][0], 1)[self.time_step_idxs_list[step+window]]
                knn_dists_t = knn_dists(self.features, indices, next_knn)

                # knn_dist[all_step_num[step]:all_step_num[step] + time_step_nums[step + window][0]] = knn_dists_t
                assert len(knn_dists_t) == len(self.time_step_idxs_list[step+window])
                for i in range(len(self.time_step_idxs_list[step+window])):
                    knn_dist[increase_idx + self.time_step_idxs_list[step+window][i]]=knn_dists_t[i]
                knn_dist = knn_dist.astype('float32')

                rows_t, cols_t, vals_t, _ = compute_membership_strengths(knn_indices_in, knn_dist, self.sigmas, self.rhos, return_dists=False)
                idxs = vals_t > 0
                rows = np.concatenate((rows, rows_t[idxs]), axis=0)
                cols = np.concatenate((cols, cols_t[idxs]), axis=0)
                vals = np.concatenate((vals, vals_t[idxs]), axis=0)
        # backward
        for window in range(1, self.persistent + 1, 1):
            for step in range(self.time_steps-1, 0 + window, -1):
                knn_indices_in = - np.ones((n_all, self.n_neighbors))
                knn_dist = np.zeros((n_all, self.n_neighbors))

                prev_knn = self.knn_indices[time_step_num[step-window]:time_step_num[step-window] + self.time_step_nums[step-window][0]]

                knn_indices_in[all_step_num[step]: all_step_num[step] + self.time_step_nums[step][0]] = prev_knn[self.time_step_idxs_list[step]]
                knn_indices_in = knn_indices_in.astype('int')

                indices = np.arange(all_step_num[step], all_step_num[step] + self.time_step_nums[step][0], 1)
                knn_dists_t = knn_dists(self.features, indices, prev_knn[self.time_step_idxs_list[step]])

                knn_dist[all_step_num[step]:all_step_num[step] + self.time_step_nums[step][0]] = knn_dists_t
                knn_dist = knn_dist.astype('float32')

                rows_t, cols_t, vals_t, _ = compute_membership_strengths(knn_indices_in, knn_dist, self.sigmas, self.rhos, return_dists=False)
                idxs = vals_t > 0
                rows = np.concatenate((rows, rows_t[idxs]), axis=0)
                cols = np.concatenate((cols, cols_t[idxs]), axis=0)
                vals = np.concatenate((vals, vals_t[idxs]), axis=0)
        time_complex = self.temporal_simplicial_set(rows=rows, cols=cols, vals=vals, n_vertice=len(self.features))

        # normalize for symmetry reason
        _, heads, tails, weights, _ = get_graph_elements(time_complex, n_epochs=self.n_epochs)
        
        return heads, tails, weights




class GlobalTemporalEdgeConstructor(TemporalEdgeConstructor):
    def __init__(self, X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs) -> None:
        super().__init__(X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs)
    
    def construct(self):
        rows = np.zeros(1, dtype=np.int32)
        cols = np.zeros(1, dtype=np.int32)
        vals = np.zeros(1, dtype=np.float32)

        # base_idx denote the starting point of each time step (including borders)
        base_idx = 0
        base_idx_list = list()
        for i in self.time_step_nums:
            base_idx_list.append(base_idx)
            base_idx = base_idx + i[0] + i[1]
        base_idx_list = np.array(base_idx_list, dtype=int)

        # denote the training data idxs range at each time step
        valid_idx_list = list()
        for i in range(len(self.time_step_nums)):
            valid_idx_list.append(base_idx_list[i]+self.time_step_nums[i][0])
        valid_idx_list = np.array(valid_idx_list, dtype=int)
        
        num = len(self.features)

        # placeholder for knn_indices and knn_dists
        indices = - np.ones((num, self.n_neighbors), dtype=int)
        dists = np.zeros((num, self.n_neighbors), dtype=np.float32)

        for time_step in range(self.time_steps):
            start_idx = base_idx_list[time_step]
            end_idx = start_idx + self.time_step_nums[time_step][0] - 1
            move_positions = base_idx_list - start_idx
            for train_sample_idx in range(start_idx, end_idx + 1, 1):
                candidate_idxs = train_sample_idx + move_positions
                candidate_idxs = candidate_idxs[np.logical_and(candidate_idxs>=base_idx_list, candidate_idxs<valid_idx_list)]
                nn_dist = knn_dists(self.features, [train_sample_idx], candidate_idxs).squeeze(axis=0)
                # find top k
                order = np.argsort(nn_dist)
                # deal with if len(candidate_idxs)<n_neighbors situation
                top_k_idxs = candidate_idxs[order<self.n_neighbors]
                top_k_idxs = np.pad(top_k_idxs, (0, self.n_neighbors-len(top_k_idxs)), 'constant', constant_values=-1).astype('int')
                top_k_dists = nn_dist[order<self.n_neighbors]
                top_k_dists = np.pad(top_k_dists, (0, self.n_neighbors-len(top_k_dists)), 'constant', constant_values=0.).astype(np.float32)

                indices[train_sample_idx] = top_k_idxs
                dists[train_sample_idx] = top_k_dists

        rows, cols, vals, _ = compute_membership_strengths(indices, dists, self.sigmas, self.rhos, return_dists=False)
        # build time complex
        time_complex = self.temporal_simplicial_set(rows=rows, cols=cols, vals=vals, n_vertice=num)
        # normalize for symmetry reason
        _, heads, tails, weights, _ = get_graph_elements(time_complex, n_epochs=self.n_epochs)

        return heads, tails, weights


class GlobalParallelTemporalEdgeConstructor(TemporalEdgeConstructor):
    def __init__(self, X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs, selected_idxs_lists) -> None:
        super().__init__(X, time_step_nums, sigmas, rhos, n_neighbors, n_epochs)
        self.selected_idxs = selected_idxs_lists
    
    def construct(self):
        # base index denote the starting point of each epoch (including border points)
        base_idx = 0
        base_idx_list = list()
        for i in self.time_step_nums:
            base_idx_list.append(base_idx)
            base_idx = base_idx + i[0] + i[1]
        base_idx_list = np.array(base_idx_list, dtype=int)

        num = len(self.features)

        # placeholder for knn_indices and knn_dists
        indices = - np.ones((num, self.n_neighbors), dtype=int)
        dists = np.zeros((num, self.n_neighbors), dtype=np.float32)

        for time_step in range(self.time_steps):
            for point_idx in range(len(self.selected_idxs[time_step])):
                true_idx = self.selected_idxs[time_step][point_idx]

                identical_self = list()
                for e in range(self.time_steps):
                    arg = np.argwhere(self.selected_idxs[e]==true_idx)
                    if arg.shape[0]:
                        target_idx = arg[0][0]
                        identical_self.append(base_idx_list[e]+target_idx)
                    
                if len(identical_self) >0:
                    # identical self number exceeds n_neighbors, need to select top n_neighbors
                    curr_idx = base_idx_list[time_step]+point_idx
                    candidate_idxs = np.array(identical_self)
                    nn_dist = knn_dists(self.features, [curr_idx], candidate_idxs).squeeze(axis=0)
                    # find top k
                    order = np.argsort(nn_dist)
                    # deal with if len(candidate_idxs)<n_neighbors situation
                    top_k_idxs = candidate_idxs[order<self.n_neighbors]
                    top_k_idxs = np.pad(top_k_idxs, (0, self.n_neighbors-len(top_k_idxs)), 'constant', constant_values=-1).astype('int')
                    top_k_dists = nn_dist[order<self.n_neighbors]
                    top_k_dists = np.pad(top_k_dists, (0, self.n_neighbors-len(top_k_dists)), 'constant', constant_values=0.).astype(np.float32)

                    indices[curr_idx] = top_k_idxs
                    dists[curr_idx] = top_k_dists

        rows, cols, vals, _ = compute_membership_strengths(indices, dists, self.sigmas, self.rhos, return_dists=False)
        if len(rows)>0:
            # build time complex
            time_complex = self.temporal_simplicial_set(rows=rows, cols=cols, vals=vals, n_vertice=num)
            # normalize for symmetry reason
            _, heads, tails, weights, _ = get_graph_elements(time_complex, n_epochs=self.n_epochs)

            return heads, tails, weights
        else:
            rows = np.zeros(1, dtype=np.int32)
            cols = np.zeros(1, dtype=np.int32)
            vals = np.zeros(1, dtype=np.float32)
            return rows, cols, vals