LysandreJik commited on
Commit
ffeea82
·
1 Parent(s): c1e19a3

Max number of pipelines

Browse files
Files changed (1) hide show
  1. app.py +47 -3
app.py CHANGED
@@ -1,13 +1,53 @@
 
 
1
  import gradio as gr
2
  import plotly.graph_objects as go
3
- from datasets import load_dataset
4
  from huggingface_hub import list_datasets
5
 
6
  pipelines = [d.id[20:-21] for d in list_datasets(author='open-source-metrics') if 'checkpoint-downloads' in d.id]
7
 
8
 
9
- def plot(library: str, stacked: bool):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  dataset = load_dataset(f"open-source-metrics/{library}-checkpoint-downloads")['train']
 
 
 
 
11
 
12
  dates = dataset['dates']
13
  axis = dataset.column_names
@@ -24,7 +64,11 @@ def plot(library: str, stacked: bool):
24
 
25
 
26
  with gr.Blocks() as demo:
27
- inputs = [gr.Dropdown(pipelines), gr.Checkbox(label='Stacked')]
 
 
 
 
28
  submit = gr.Button('Submit')
29
  with gr.Row():
30
  outputs = [gr.Plot()]
 
1
+ from collections import OrderedDict
2
+
3
  import gradio as gr
4
  import plotly.graph_objects as go
5
+ from datasets import load_dataset, Dataset
6
  from huggingface_hub import list_datasets
7
 
8
  pipelines = [d.id[20:-21] for d in list_datasets(author='open-source-metrics') if 'checkpoint-downloads' in d.id]
9
 
10
 
11
+ def sum_with_none(iterator):
12
+ return sum([v for v in iterator if v is not None])
13
+
14
+
15
+ def merge_columns(dataset: Dataset, max_number_of_columns: int):
16
+ downloads = {col: sum_with_none(dataset[col]) for col in dataset.column_names if col != 'dates'}
17
+ sorted_downloads = OrderedDict(sorted(downloads.items(), key=lambda x: x[1], reverse=True))
18
+
19
+ to_merge = list(sorted_downloads.keys())[max_number_of_columns:]
20
+ to_keep = list(sorted_downloads.keys())[:max_number_of_columns]
21
+
22
+ dictionary = dataset.to_dict()
23
+ dictionary['combined'] = dictionary.pop('no_arch')
24
+
25
+ while len(to_merge):
26
+ current = dictionary['combined']
27
+ to_add = dictionary.pop(to_merge.pop(0))
28
+
29
+ for i in range(len(current)):
30
+ if current[i] is None:
31
+ current[i] = 0
32
+
33
+ if to_add[i] is None:
34
+ to_add[i] = 0
35
+
36
+ current[i] += to_add[i]
37
+
38
+ dictionary['combined'] = current
39
+
40
+ dataset = Dataset.from_dict(dictionary)
41
+
42
+ return dataset
43
+
44
+
45
+ def plot(library: str, stacked: bool, number_of_pipelines_to_show: int):
46
  dataset = load_dataset(f"open-source-metrics/{library}-checkpoint-downloads")['train']
47
+ n_archs = len(dataset.column_names) - 1 # Remove dates
48
+
49
+ if n_archs > number_of_pipelines_to_show:
50
+ dataset = merge_columns(dataset, number_of_pipelines_to_show)
51
 
52
  dates = dataset['dates']
53
  axis = dataset.column_names
 
64
 
65
 
66
  with gr.Blocks() as demo:
67
+ inputs = [
68
+ gr.Dropdown(pipelines),
69
+ gr.Checkbox(label='Stacked'),
70
+ gr.Slider(minimum=1, maximum=len(pipelines), value=3, step=1, label="Max number of pipelines to show")
71
+ ]
72
  submit = gr.Button('Submit')
73
  with gr.Row():
74
  outputs = [gr.Plot()]