Update app.py
Browse files
app.py
CHANGED
@@ -43,45 +43,45 @@ DATA_FOLDER = Path("bluesky_data")
|
|
43 |
DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
|
44 |
|
45 |
|
46 |
-
def load_hub_data():
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
# Initialize storage and Hub connection
|
76 |
-
DATA_FOLDER.mkdir(exist_ok=True)
|
77 |
-
scheduler = CommitScheduler(
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
)
|
85 |
|
86 |
|
87 |
def on_message_handler(message):
|
@@ -98,17 +98,17 @@ def emit_counts():
|
|
98 |
"""Emit post counts every second"""
|
99 |
global post_count
|
100 |
|
101 |
-
if saved_data := load_hub_data():
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
|
113 |
# Wait for first second to collect initial data
|
114 |
time.sleep(1)
|
|
|
43 |
DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
|
44 |
|
45 |
|
46 |
+
# def load_hub_data():
|
47 |
+
# """Load the most recent data from the Hub"""
|
48 |
+
# try:
|
49 |
+
# api = HfApi(token=HF_TOKEN)
|
50 |
+
# # List files in the repository
|
51 |
+
# files = api.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
|
52 |
+
# data_files = [f for f in files if f.startswith("data/bluesky_counts_")]
|
53 |
+
|
54 |
+
# if not data_files:
|
55 |
+
# return []
|
56 |
+
|
57 |
+
# # Get the most recent file
|
58 |
+
# latest_file = sorted(data_files)[-1]
|
59 |
+
# # Download the file
|
60 |
+
# local_path = hf_hub_download(
|
61 |
+
# repo_id=REPO_ID, filename=latest_file, repo_type=REPO_TYPE, token=HF_TOKEN
|
62 |
+
# )
|
63 |
+
|
64 |
+
# # Load and parse the data
|
65 |
+
# data = []
|
66 |
+
# with open(local_path, "r") as f:
|
67 |
+
# data.extend(json.loads(line.strip()) for line in f)
|
68 |
+
# # Keep only last month of data
|
69 |
+
# return data[-MONTH_IN_SECONDS:]
|
70 |
+
# except Exception as e:
|
71 |
+
# print(f"Error loading data from Hub: {e}")
|
72 |
+
# return []
|
73 |
+
|
74 |
+
|
75 |
+
# # Initialize storage and Hub connection
|
76 |
+
# DATA_FOLDER.mkdir(exist_ok=True)
|
77 |
+
# scheduler = CommitScheduler(
|
78 |
+
# repo_id=REPO_ID,
|
79 |
+
# repo_type=REPO_TYPE,
|
80 |
+
# folder_path=DATA_FOLDER,
|
81 |
+
# path_in_repo="data",
|
82 |
+
# every=600, # Upload every 10 minutes
|
83 |
+
# token=HF_TOKEN, # Add token for authentication
|
84 |
+
# )
|
85 |
|
86 |
|
87 |
def on_message_handler(message):
|
|
|
98 |
"""Emit post counts every second"""
|
99 |
global post_count
|
100 |
|
101 |
+
# if saved_data := load_hub_data():
|
102 |
+
# print(f"Loaded {len(saved_data)} historical data points from Hub")
|
103 |
+
# # Emit historical data
|
104 |
+
# for point in saved_data[-100:]: # Emit last 100 points to initialize plot
|
105 |
+
# df = pd.DataFrame(
|
106 |
+
# {
|
107 |
+
# "timestamp": [pd.Timestamp(point["timestamp"])],
|
108 |
+
# "post_count": [point["post_count"]],
|
109 |
+
# }
|
110 |
+
# )
|
111 |
+
# stream.emit(df)
|
112 |
|
113 |
# Wait for first second to collect initial data
|
114 |
time.sleep(1)
|