aliabd HF staff commited on
Commit
17aed20
·
1 Parent(s): 5b892b0

Delete app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +0 -281
app.py DELETED
@@ -1,281 +0,0 @@
1
- import gradio as gr
2
- import math
3
- from functools import partial
4
- import matplotlib.pyplot as plt
5
- import numpy as np
6
- from sklearn.cluster import (
7
- AgglomerativeClustering, Birch, DBSCAN, KMeans, MeanShift, OPTICS, SpectralClustering, estimate_bandwidth
8
- )
9
- from sklearn.datasets import make_blobs, make_circles, make_moons
10
- from sklearn.mixture import GaussianMixture
11
- from sklearn.neighbors import kneighbors_graph
12
- from sklearn.preprocessing import StandardScaler
13
-
14
- plt.style.use('seaborn')
15
- SEED = 0
16
- MAX_CLUSTERS = 10
17
- N_SAMPLES = 1000
18
- N_COLS = 3
19
- FIGSIZE = 7, 7 # does not affect size in webpage
20
- COLORS = [
21
- 'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan'
22
- ]
23
- assert len(COLORS) >= MAX_CLUSTERS, "Not enough different colors for all clusters"
24
- np.random.seed(SEED)
25
-
26
-
27
- def normalize(X):
28
- return StandardScaler().fit_transform(X)
29
-
30
- def get_regular(n_clusters):
31
- # spiral pattern
32
- centers = [
33
- [0, 0],
34
- [1, 0],
35
- [1, 1],
36
- [0, 1],
37
- [-1, 1],
38
- [-1, 0],
39
- [-1, -1],
40
- [0, -1],
41
- [1, -1],
42
- [2, -1],
43
- ][:n_clusters]
44
- assert len(centers) == n_clusters
45
- X, labels = make_blobs(n_samples=N_SAMPLES, centers=centers, cluster_std=0.25, random_state=SEED)
46
- return normalize(X), labels
47
-
48
-
49
- def get_circles(n_clusters):
50
- X, labels = make_circles(n_samples=N_SAMPLES, factor=0.5, noise=0.05, random_state=SEED)
51
- return normalize(X), labels
52
-
53
-
54
- def get_moons(n_clusters):
55
- X, labels = make_moons(n_samples=N_SAMPLES, noise=0.05, random_state=SEED)
56
- return normalize(X), labels
57
-
58
-
59
- def get_noise(n_clusters):
60
- np.random.seed(SEED)
61
- X, labels = np.random.rand(N_SAMPLES, 2), np.random.randint(0, n_clusters, size=(N_SAMPLES,))
62
- return normalize(X), labels
63
-
64
-
65
- def get_anisotropic(n_clusters):
66
- X, labels = make_blobs(n_samples=N_SAMPLES, centers=n_clusters, random_state=170)
67
- transformation = [[0.6, -0.6], [-0.4, 0.8]]
68
- X = np.dot(X, transformation)
69
- return X, labels
70
-
71
-
72
- def get_varied(n_clusters):
73
- cluster_std = [1.0, 2.5, 0.5, 1.0, 2.5, 0.5, 1.0, 2.5, 0.5, 1.0][:n_clusters]
74
- assert len(cluster_std) == n_clusters
75
- X, labels = make_blobs(
76
- n_samples=N_SAMPLES, centers=n_clusters, cluster_std=cluster_std, random_state=SEED
77
- )
78
- return normalize(X), labels
79
-
80
-
81
- def get_spiral(n_clusters):
82
- # from https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_clustering.html
83
- np.random.seed(SEED)
84
- t = 1.5 * np.pi * (1 + 3 * np.random.rand(1, N_SAMPLES))
85
- x = t * np.cos(t)
86
- y = t * np.sin(t)
87
- X = np.concatenate((x, y))
88
- X += 0.7 * np.random.randn(2, N_SAMPLES)
89
- X = np.ascontiguousarray(X.T)
90
-
91
- labels = np.zeros(N_SAMPLES, dtype=int)
92
- return normalize(X), labels
93
-
94
-
95
- DATA_MAPPING = {
96
- 'regular': get_regular,
97
- 'circles': get_circles,
98
- 'moons': get_moons,
99
- 'spiral': get_spiral,
100
- 'noise': get_noise,
101
- 'anisotropic': get_anisotropic,
102
- 'varied': get_varied,
103
- }
104
-
105
-
106
- def get_groundtruth_model(X, labels, n_clusters, **kwargs):
107
- # dummy model to show true label distribution
108
- class Dummy:
109
- def __init__(self, y):
110
- self.labels_ = labels
111
-
112
- return Dummy(labels)
113
-
114
-
115
- def get_kmeans(X, labels, n_clusters, **kwargs):
116
- model = KMeans(init="k-means++", n_clusters=n_clusters, n_init=10, random_state=SEED)
117
- model.set_params(**kwargs)
118
- return model.fit(X)
119
-
120
-
121
- def get_dbscan(X, labels, n_clusters, **kwargs):
122
- model = DBSCAN(eps=0.3)
123
- model.set_params(**kwargs)
124
- return model.fit(X)
125
-
126
-
127
- def get_agglomerative(X, labels, n_clusters, **kwargs):
128
- connectivity = kneighbors_graph(
129
- X, n_neighbors=n_clusters, include_self=False
130
- )
131
- # make connectivity symmetric
132
- connectivity = 0.5 * (connectivity + connectivity.T)
133
- model = AgglomerativeClustering(
134
- n_clusters=n_clusters, linkage="ward", connectivity=connectivity
135
- )
136
- model.set_params(**kwargs)
137
- return model.fit(X)
138
-
139
-
140
- def get_meanshift(X, labels, n_clusters, **kwargs):
141
- bandwidth = estimate_bandwidth(X, quantile=0.25)
142
- model = MeanShift(bandwidth=bandwidth, bin_seeding=True)
143
- model.set_params(**kwargs)
144
- return model.fit(X)
145
-
146
-
147
- def get_spectral(X, labels, n_clusters, **kwargs):
148
- model = SpectralClustering(
149
- n_clusters=n_clusters,
150
- eigen_solver="arpack",
151
- affinity="nearest_neighbors",
152
- )
153
- model.set_params(**kwargs)
154
- return model.fit(X)
155
-
156
-
157
- def get_optics(X, labels, n_clusters, **kwargs):
158
- model = OPTICS(
159
- min_samples=7,
160
- xi=0.05,
161
- min_cluster_size=0.1,
162
- )
163
- model.set_params(**kwargs)
164
- return model.fit(X)
165
-
166
-
167
- def get_birch(X, labels, n_clusters, **kwargs):
168
- model = Birch(n_clusters=n_clusters)
169
- model.set_params(**kwargs)
170
- return model.fit(X)
171
-
172
-
173
- def get_gaussianmixture(X, labels, n_clusters, **kwargs):
174
- model = GaussianMixture(
175
- n_components=n_clusters, covariance_type="full", random_state=SEED,
176
- )
177
- model.set_params(**kwargs)
178
- return model.fit(X)
179
-
180
-
181
- MODEL_MAPPING = {
182
- 'True labels': get_groundtruth_model,
183
- 'KMeans': get_kmeans,
184
- 'DBSCAN': get_dbscan,
185
- 'MeanShift': get_meanshift,
186
- 'SpectralClustering': get_spectral,
187
- 'OPTICS': get_optics,
188
- 'Birch': get_birch,
189
- 'GaussianMixture': get_gaussianmixture,
190
- 'AgglomerativeClustering': get_agglomerative,
191
- }
192
-
193
-
194
- def plot_clusters(ax, X, labels):
195
- set_clusters = set(labels)
196
- set_clusters.discard(-1) # -1 signifiies outliers, which we plot separately
197
- for label, color in zip(sorted(set_clusters), COLORS):
198
- idx = labels == label
199
- if not sum(idx):
200
- continue
201
- ax.scatter(X[idx, 0], X[idx, 1], color=color)
202
-
203
- # show outliers (if any)
204
- idx = labels == -1
205
- if sum(idx):
206
- ax.scatter(X[idx, 0], X[idx, 1], c='k', marker='x')
207
-
208
- ax.grid(None)
209
- ax.set_xticks([])
210
- ax.set_yticks([])
211
- return ax
212
-
213
-
214
- def cluster(dataset: str, n_clusters: int, clustering_algorithm: str):
215
- if isinstance(n_clusters, dict):
216
- n_clusters = n_clusters['value']
217
- else:
218
- n_clusters = int(n_clusters)
219
-
220
- X, labels = DATA_MAPPING[dataset](n_clusters)
221
- model = MODEL_MAPPING[clustering_algorithm](X, labels, n_clusters=n_clusters)
222
- if hasattr(model, "labels_"):
223
- y_pred = model.labels_.astype(int)
224
- else:
225
- y_pred = model.predict(X)
226
-
227
- fig, ax = plt.subplots(figsize=FIGSIZE)
228
-
229
- plot_clusters(ax, X, y_pred)
230
- ax.set_title(clustering_algorithm, fontsize=16)
231
-
232
- return fig
233
-
234
-
235
- title = "Clustering with Scikit-learn"
236
- description = (
237
- "This example shows how different clustering algorithms work. Simply pick "
238
- "the dataset and the number of clusters to see how the clustering algorithms work. "
239
- "Colored cirles are (predicted) labels and black x are outliers."
240
- )
241
-
242
-
243
- def iter_grid(n_rows, n_cols):
244
- # create a grid using gradio Block
245
- for _ in range(n_rows):
246
- with gr.Row():
247
- for _ in range(n_cols):
248
- with gr.Column():
249
- yield
250
-
251
- with gr.Blocks(title=title) as demo:
252
- gr.HTML(f"<b>{title}</b>")
253
- gr.Markdown(description)
254
-
255
- input_models = list(MODEL_MAPPING)
256
- input_data = gr.Radio(
257
- list(DATA_MAPPING),
258
- value="regular",
259
- label="dataset"
260
- )
261
- input_n_clusters = gr.Slider(
262
- minimum=1,
263
- maximum=MAX_CLUSTERS,
264
- value=4,
265
- step=1,
266
- label='Number of clusters'
267
- )
268
- n_rows = int(math.ceil(len(input_models) / N_COLS))
269
- counter = 0
270
- for _ in iter_grid(n_rows, N_COLS):
271
- if counter >= len(input_models):
272
- break
273
-
274
- input_model = input_models[counter]
275
- plot = gr.Plot(label=input_model)
276
- fn = partial(cluster, clustering_algorithm=input_model)
277
- input_data.change(fn=fn, inputs=[input_data, input_n_clusters], outputs=plot)
278
- input_n_clusters.change(fn=fn, inputs=[input_data, input_n_clusters], outputs=plot)
279
- counter += 1
280
-
281
- demo.launch()