Spaces:
Runtime error
Runtime error
LysandreJik
commited on
Commit
·
9ac5ea2
1
Parent(s):
ffd3765
Decimate
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
from datetime import datetime, timedelta
|
3 |
import json
|
@@ -16,7 +17,7 @@ HfFolder.save_token(HF_TOKEN)
|
|
16 |
datasets = {
|
17 |
# "stars": load_dataset("open-source-metrics/stars"),
|
18 |
"issues": load_dataset("open-source-metrics/issues"),
|
19 |
-
"pip": load_dataset("open-source-metrics/pip")
|
20 |
}
|
21 |
|
22 |
|
@@ -49,6 +50,8 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
49 |
self.end_headers()
|
50 |
|
51 |
# TODO: Send and display warnings
|
|
|
|
|
52 |
self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
|
53 |
|
54 |
return SimpleHTTPRequestHandler
|
@@ -61,8 +64,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
61 |
|
62 |
returned_values = {}
|
63 |
for library_name in library_names:
|
64 |
-
|
65 |
-
for i in dataset:
|
66 |
if i['day'] in returned_values:
|
67 |
returned_values[i['day']][library_name] = i['num_downloads']
|
68 |
else:
|
@@ -71,10 +73,11 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
71 |
for library_name in library_names:
|
72 |
for i in returned_values.keys():
|
73 |
if library_name not in returned_values[i]:
|
74 |
-
returned_values[i][library_name] =
|
75 |
|
76 |
-
|
77 |
-
output[
|
|
|
78 |
|
79 |
self.send_response(200)
|
80 |
self.send_header("Content-Type", "application/json")
|
@@ -91,12 +94,17 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
91 |
library_names = library_names.split(',')
|
92 |
|
93 |
returned_values = {}
|
94 |
-
dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True
|
95 |
|
96 |
for library_name in library_names:
|
97 |
dataset = dataset_dict[library_name]
|
|
|
98 |
n = 0
|
99 |
-
for i in dataset:
|
|
|
|
|
|
|
|
|
100 |
n += 1
|
101 |
if i['dates'] in returned_values:
|
102 |
returned_values[i['dates']][library_name] = n
|
@@ -108,6 +116,7 @@ class RequestHandler(SimpleHTTPRequestHandler):
|
|
108 |
if library_name not in returned_values[i]:
|
109 |
returned_values[i][library_name] = None
|
110 |
|
|
|
111 |
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
112 |
output['day'] = list(returned_values.keys())[::-1]
|
113 |
|
|
|
1 |
+
import collections
|
2 |
import os
|
3 |
from datetime import datetime, timedelta
|
4 |
import json
|
|
|
17 |
datasets = {
|
18 |
# "stars": load_dataset("open-source-metrics/stars"),
|
19 |
"issues": load_dataset("open-source-metrics/issues"),
|
20 |
+
"pip": load_dataset("open-source-metrics/pip").sort('day')
|
21 |
}
|
22 |
|
23 |
|
|
|
50 |
self.end_headers()
|
51 |
|
52 |
# TODO: Send and display warnings
|
53 |
+
dataset_with_most_splits = list(dataset_with_most_splits)
|
54 |
+
dataset_with_most_splits.sort()
|
55 |
self.wfile.write(json.dumps(list(dataset_with_most_splits)).encode("utf-8"))
|
56 |
|
57 |
return SimpleHTTPRequestHandler
|
|
|
64 |
|
65 |
returned_values = {}
|
66 |
for library_name in library_names:
|
67 |
+
for i in datasets['pip'][library_name]:
|
|
|
68 |
if i['day'] in returned_values:
|
69 |
returned_values[i['day']][library_name] = i['num_downloads']
|
70 |
else:
|
|
|
73 |
for library_name in library_names:
|
74 |
for i in returned_values.keys():
|
75 |
if library_name not in returned_values[i]:
|
76 |
+
returned_values[i][library_name] = None
|
77 |
|
78 |
+
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
79 |
+
output = {l: [k[l] for k in returned_values.values()] for l in library_names}
|
80 |
+
output['day'] = list(returned_values.keys())
|
81 |
|
82 |
self.send_response(200)
|
83 |
self.send_header("Content-Type", "application/json")
|
|
|
94 |
library_names = library_names.split(',')
|
95 |
|
96 |
returned_values = {}
|
97 |
+
dataset_dict = load_dataset(f"open-source-metrics/stars", use_auth_token=True).sort('dates')
|
98 |
|
99 |
for library_name in library_names:
|
100 |
dataset = dataset_dict[library_name]
|
101 |
+
|
102 |
n = 0
|
103 |
+
for k, i in enumerate(dataset):
|
104 |
+
# Decimate values if there are too many
|
105 |
+
if len(dataset) > 1000 and k % int(len(dataset) / 1000) != 0:
|
106 |
+
continue
|
107 |
+
|
108 |
n += 1
|
109 |
if i['dates'] in returned_values:
|
110 |
returned_values[i['dates']][library_name] = n
|
|
|
116 |
if library_name not in returned_values[i]:
|
117 |
returned_values[i][library_name] = None
|
118 |
|
119 |
+
returned_values = collections.OrderedDict(sorted(returned_values.items()))
|
120 |
output = {l: [k[l] for k in returned_values.values()][::-1] for l in library_names}
|
121 |
output['day'] = list(returned_values.keys())[::-1]
|
122 |
|