davanstrien HF staff commited on
Commit
ab39a72
·
verified ·
1 Parent(s): 10e329b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -50
app.py CHANGED
@@ -43,45 +43,45 @@ DATA_FOLDER = Path("bluesky_data")
43
  DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
44
 
45
 
46
- def load_hub_data():
47
- """Load the most recent data from the Hub"""
48
- try:
49
- api = HfApi(token=HF_TOKEN)
50
- # List files in the repository
51
- files = api.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
52
- data_files = [f for f in files if f.startswith("data/bluesky_counts_")]
53
-
54
- if not data_files:
55
- return []
56
-
57
- # Get the most recent file
58
- latest_file = sorted(data_files)[-1]
59
- # Download the file
60
- local_path = hf_hub_download(
61
- repo_id=REPO_ID, filename=latest_file, repo_type=REPO_TYPE, token=HF_TOKEN
62
- )
63
-
64
- # Load and parse the data
65
- data = []
66
- with open(local_path, "r") as f:
67
- data.extend(json.loads(line.strip()) for line in f)
68
- # Keep only last month of data
69
- return data[-MONTH_IN_SECONDS:]
70
- except Exception as e:
71
- print(f"Error loading data from Hub: {e}")
72
- return []
73
-
74
-
75
- # Initialize storage and Hub connection
76
- DATA_FOLDER.mkdir(exist_ok=True)
77
- scheduler = CommitScheduler(
78
- repo_id=REPO_ID,
79
- repo_type=REPO_TYPE,
80
- folder_path=DATA_FOLDER,
81
- path_in_repo="data",
82
- every=600, # Upload every 10 minutes
83
- token=HF_TOKEN, # Add token for authentication
84
- )
85
 
86
 
87
  def on_message_handler(message):
@@ -98,17 +98,17 @@ def emit_counts():
98
  """Emit post counts every second"""
99
  global post_count
100
 
101
- if saved_data := load_hub_data():
102
- print(f"Loaded {len(saved_data)} historical data points from Hub")
103
- # Emit historical data
104
- for point in saved_data[-100:]: # Emit last 100 points to initialize plot
105
- df = pd.DataFrame(
106
- {
107
- "timestamp": [pd.Timestamp(point["timestamp"])],
108
- "post_count": [point["post_count"]],
109
- }
110
- )
111
- stream.emit(df)
112
 
113
  # Wait for first second to collect initial data
114
  time.sleep(1)
 
43
  DATA_FILE = f"bluesky_counts_{uuid.uuid4()}.json"
44
 
45
 
46
+ # def load_hub_data():
47
+ # """Load the most recent data from the Hub"""
48
+ # try:
49
+ # api = HfApi(token=HF_TOKEN)
50
+ # # List files in the repository
51
+ # files = api.list_repo_files(REPO_ID, repo_type=REPO_TYPE)
52
+ # data_files = [f for f in files if f.startswith("data/bluesky_counts_")]
53
+
54
+ # if not data_files:
55
+ # return []
56
+
57
+ # # Get the most recent file
58
+ # latest_file = sorted(data_files)[-1]
59
+ # # Download the file
60
+ # local_path = hf_hub_download(
61
+ # repo_id=REPO_ID, filename=latest_file, repo_type=REPO_TYPE, token=HF_TOKEN
62
+ # )
63
+
64
+ # # Load and parse the data
65
+ # data = []
66
+ # with open(local_path, "r") as f:
67
+ # data.extend(json.loads(line.strip()) for line in f)
68
+ # # Keep only last month of data
69
+ # return data[-MONTH_IN_SECONDS:]
70
+ # except Exception as e:
71
+ # print(f"Error loading data from Hub: {e}")
72
+ # return []
73
+
74
+
75
+ # # Initialize storage and Hub connection
76
+ # DATA_FOLDER.mkdir(exist_ok=True)
77
+ # scheduler = CommitScheduler(
78
+ # repo_id=REPO_ID,
79
+ # repo_type=REPO_TYPE,
80
+ # folder_path=DATA_FOLDER,
81
+ # path_in_repo="data",
82
+ # every=600, # Upload every 10 minutes
83
+ # token=HF_TOKEN, # Add token for authentication
84
+ # )
85
 
86
 
87
  def on_message_handler(message):
 
98
  """Emit post counts every second"""
99
  global post_count
100
 
101
+ # if saved_data := load_hub_data():
102
+ # print(f"Loaded {len(saved_data)} historical data points from Hub")
103
+ # # Emit historical data
104
+ # for point in saved_data[-100:]: # Emit last 100 points to initialize plot
105
+ # df = pd.DataFrame(
106
+ # {
107
+ # "timestamp": [pd.Timestamp(point["timestamp"])],
108
+ # "post_count": [point["post_count"]],
109
+ # }
110
+ # )
111
+ # stream.emit(df)
112
 
113
  # Wait for first second to collect initial data
114
  time.sleep(1)