throaway2854 commited on
Commit
ff225ce
·
verified ·
1 Parent(s): 2ff0c74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -116
app.py CHANGED
@@ -10,7 +10,6 @@ import io
10
  import uuid
11
  import time
12
  import random
13
- from typing import List, Dict
14
 
15
  DATA_DIR = "/data"
16
  IMAGES_DIR = os.path.join(DATA_DIR, "images")
@@ -78,10 +77,10 @@ class DatasetBuilder:
78
  os.makedirs(IMAGES_DIR, exist_ok=True)
79
  self.hf_token = os.getenv("HF_Token") # Access the token from the environment variable
80
 
81
- def get_dataset_file(self) -> str:
82
  return os.path.join(DATA_DIR, f"{self.dataset_name}.json")
83
 
84
- def load_dataset(self) -> List[Dict]:
85
  dataset_file = self.get_dataset_file()
86
  if os.path.exists(dataset_file):
87
  with open(dataset_file, 'r') as f:
@@ -137,14 +136,6 @@ class DatasetBuilder:
137
  def get_dataset_info(self):
138
  return f"Current dataset size ({self.dataset_name}): {len(self.dataset)} images"
139
 
140
- def get_storage_usage(self) -> float:
141
- total_size = 0
142
- for dirpath, dirnames, filenames in os.walk(DATA_DIR):
143
- for f in filenames:
144
- fp = os.path.join(dirpath, f)
145
- total_size += os.path.getsize(fp)
146
- return total_size / (1024 * 1024 * 1024) # Convert to GB
147
-
148
  def get_dataset_preview(self, num_images=5):
149
  preview = []
150
  for item in self.dataset[-num_images:]:
@@ -152,35 +143,6 @@ class DatasetBuilder:
152
  preview.append((image_path, item['tags']))
153
  return preview
154
 
155
- def get_dataset_with_paths(self) -> List[Dict]:
156
- return [
157
- {
158
- 'image': os.path.join(IMAGES_DIR, item['image']),
159
- 'tags': item['tags'],
160
- 'filename': item['image']
161
- }
162
- for item in self.dataset
163
- ]
164
-
165
- def remove_image(self, filename):
166
- for item in self.dataset:
167
- if item['image'] == filename:
168
- self.dataset.remove(item)
169
- image_path = os.path.join(IMAGES_DIR, filename)
170
- if os.path.exists(image_path):
171
- os.remove(image_path)
172
- self.save_dataset()
173
- return f"Removed image: {filename}"
174
- return f"Image not found: {filename}"
175
-
176
- def edit_tags(self, filename, new_tags):
177
- for item in self.dataset:
178
- if item['image'] == filename:
179
- item['tags'] = new_tags
180
- self.save_dataset()
181
- return f"Updated tags for image: {filename}"
182
- return f"Image not found: {filename}"
183
-
184
  def upload_to_huggingface(self, private=True):
185
  if not self.dataset:
186
  return "Dataset is empty. Add some images first."
@@ -235,90 +197,44 @@ def create_huggingface_dataset(dataset_name):
235
 
236
  def view_dataset(dataset_name):
237
  builder = DatasetBuilder(dataset_name)
238
- return builder.get_dataset_preview(num_images=20)
239
 
240
  def upload_huggingface_dataset(dataset_name, privacy):
241
  builder = DatasetBuilder(dataset_name)
242
- return builder.upload_to_huggingface(private=privacy)
243
-
244
- def view_dataset_for_editing(dataset_name: str) -> List[Dict]:
245
- builder = DatasetBuilder(dataset_name)
246
- return builder.get_dataset_with_paths()
247
-
248
- def remove_image_from_dataset(dataset_name, filename):
249
- builder = DatasetBuilder(dataset_name)
250
- result = builder.remove_image(filename)
251
- return result, builder.get_dataset_with_paths()
252
-
253
- def edit_image_tags(dataset_name, filename, new_tags):
254
- builder = DatasetBuilder(dataset_name)
255
- result = builder.edit_tags(filename, new_tags)
256
- return result, builder.get_dataset_with_paths()
257
-
258
- def get_storage_usage(dataset_name: str) -> str:
259
- builder = DatasetBuilder(dataset_name)
260
- usage = builder.get_storage_usage()
261
- return f"Current storage usage: {usage:.2f} GB / 20 GB"
262
 
263
  # Create Gradio interface
264
  with gr.Blocks(theme="huggingface") as iface:
265
  gr.Markdown("# Image Dataset Builder")
266
-
267
- dataset_name_input = gr.Textbox(lines=1, label="Dataset Name", placeholder="Enter dataset name...", value="default_dataset")
268
-
269
- with gr.Tab("Add Images"):
270
- gr.Markdown("Enter a URL to add an image and its tags to the dataset. Progress is saved automatically.")
271
- with gr.Row():
272
- url_input = gr.Textbox(lines=2, label="URL", placeholder="Enter image URL here...")
273
- cookies_input = gr.Textbox(lines=2, label="Cookies (optional)", placeholder="Enter cookies")
274
  add_button = gr.Button("Add Image")
275
- result_output = gr.Textbox(label="Result")
276
- dataset_info = gr.Textbox(label="Dataset Info")
277
- preview_gallery = gr.Gallery(label="Recent Additions", show_label=False, elem_id="preview_gallery", columns=5, rows=1, height="auto")
278
-
279
- add_button.click(add_image_to_dataset, inputs=[url_input, cookies_input, dataset_name_input], outputs=[result_output, dataset_info, preview_gallery])
280
-
281
- with gr.Tab("Edit Dataset"):
282
- gr.Markdown("## View and Edit Dataset")
283
- view_dataset_button = gr.Button("View Dataset")
284
-
285
- with gr.Column():
286
- edit_gallery = gr.Gallery(label="Dataset Images", show_label=True, elem_id="edit_gallery", columns=4, rows=3, height="auto", object_fit="contain")
287
- selected_image = gr.State(None)
288
-
289
- with gr.Row():
290
- remove_button = gr.Button("Remove Selected Image")
291
- edit_tags_input = gr.Textbox(lines=1, label="Edit Tags", placeholder="Enter new tags, comma-separated...")
292
- edit_button = gr.Button("Update Tags")
293
-
294
- edit_result_output = gr.Textbox(label="Edit Result")
295
-
296
- def select_image(evt: gr.SelectData, images):
297
- selected = images[evt.index]
298
- return selected['filename'], selected['tags']
299
-
300
- edit_gallery.select(select_image, inputs=[edit_gallery], outputs=[selected_image, edit_tags_input])
301
- view_dataset_button.click(view_dataset_for_editing, inputs=[dataset_name_input], outputs=[edit_gallery])
302
- remove_button.click(remove_image_from_dataset, inputs=[dataset_name_input, selected_image], outputs=[edit_result_output, edit_gallery])
303
- edit_button.click(edit_image_tags, inputs=[dataset_name_input, selected_image, edit_tags_input], outputs=[edit_result_output, edit_gallery])
304
-
305
- with gr.Tab("HuggingFace Integration"):
306
- gr.Markdown("## Create and Upload HuggingFace Dataset")
307
- create_hf_button = gr.Button("Create HuggingFace Dataset")
308
- hf_result = gr.Textbox(label="Dataset Creation Result")
309
- create_hf_button.click(create_huggingface_dataset, inputs=[dataset_name_input], outputs=hf_result)
310
-
311
- gr.Markdown("## Upload Dataset to Hugging Face")
312
- privacy_radio = gr.Radio(choices=["private", "public"], value="private", label="Repository Privacy")
313
- upload_hf_button = gr.Button("Upload to Hugging Face")
314
- hf_upload_result = gr.Textbox(label="Upload Result")
315
- upload_hf_button.click(upload_huggingface_dataset, inputs=[dataset_name_input, privacy_radio], outputs=hf_upload_result)
316
-
317
- with gr.Tab("Storage"):
318
- gr.Markdown("## Storage Usage")
319
- storage_usage_button = gr.Button("Check Storage Usage")
320
- storage_usage_output = gr.Textbox(label="Storage Usage")
321
- storage_usage_button.click(get_storage_usage, inputs=[dataset_name_input], outputs=storage_usage_output)
322
 
323
  # Launch the interface
324
  iface.launch()
 
10
  import uuid
11
  import time
12
  import random
 
13
 
14
  DATA_DIR = "/data"
15
  IMAGES_DIR = os.path.join(DATA_DIR, "images")
 
77
  os.makedirs(IMAGES_DIR, exist_ok=True)
78
  self.hf_token = os.getenv("HF_Token") # Access the token from the environment variable
79
 
80
+ def get_dataset_file(self):
81
  return os.path.join(DATA_DIR, f"{self.dataset_name}.json")
82
 
83
+ def load_dataset(self):
84
  dataset_file = self.get_dataset_file()
85
  if os.path.exists(dataset_file):
86
  with open(dataset_file, 'r') as f:
 
136
  def get_dataset_info(self):
137
  return f"Current dataset size ({self.dataset_name}): {len(self.dataset)} images"
138
 
 
 
 
 
 
 
 
 
139
  def get_dataset_preview(self, num_images=5):
140
  preview = []
141
  for item in self.dataset[-num_images:]:
 
143
  preview.append((image_path, item['tags']))
144
  return preview
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  def upload_to_huggingface(self, private=True):
147
  if not self.dataset:
148
  return "Dataset is empty. Add some images first."
 
197
 
198
  def view_dataset(dataset_name):
199
  builder = DatasetBuilder(dataset_name)
200
+ return builder.get_dataset_preview(num_images=60)
201
 
202
  def upload_huggingface_dataset(dataset_name, privacy):
203
  builder = DatasetBuilder(dataset_name)
204
+ return builder.upload_to_huggingface(private=privacy)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  # Create Gradio interface
207
  with gr.Blocks(theme="huggingface") as iface:
208
  gr.Markdown("# Image Dataset Builder")
209
+ gr.Markdown("Enter a URL to add an image and its tags to the dataset. Progress is saved automatically.")
210
+
211
+ with gr.Row():
212
+ dataset_name_input = gr.Textbox(lines=1, label="Dataset Name", placeholder="Enter dataset name...", value="default_dataset")
213
+ url_input = gr.Textbox(lines=2, label="URL", placeholder="Enter image URL here...")
214
+ cookies_input = gr.Textbox(lines=2, label="Cookies (optional)", placeholder="Enter cookies")
 
 
215
  add_button = gr.Button("Add Image")
216
+
217
+ result_output = gr.Textbox(label="Result")
218
+ dataset_info = gr.Textbox(label="Dataset Info")
219
+
220
+ gr.Markdown("## Dataset Preview")
221
+ preview_gallery = gr.Gallery(label="Recent Additions", show_label=False, elem_id="preview_gallery", columns=5, rows=1, height="auto")
222
+
223
+ add_button.click(add_image_to_dataset, inputs=[url_input, cookies_input, dataset_name_input], outputs=[result_output, dataset_info, preview_gallery])
224
+
225
+ create_hf_button = gr.Button("Create HuggingFace Dataset")
226
+ hf_result = gr.Textbox(label="Dataset Creation Result")
227
+ create_hf_button.click(create_huggingface_dataset, inputs=[dataset_name_input], outputs=hf_result)
228
+
229
+ view_dataset_button = gr.Button("View Dataset")
230
+ dataset_gallery = gr.Gallery(label="Dataset Contents", show_label=False, elem_id="dataset_gallery", columns=5, rows=4, height="auto")
231
+ view_dataset_button.click(view_dataset, inputs=[dataset_name_input], outputs=dataset_gallery)
232
+
233
+ gr.Markdown("## Upload Dataset to Hugging Face")
234
+ privacy_radio = gr.Radio(choices=["private", "public"], value="private", label="Repository Privacy")
235
+ upload_hf_button = gr.Button("Upload to Hugging Face")
236
+ hf_upload_result = gr.Textbox(label="Upload Result")
237
+ upload_hf_button.click(upload_huggingface_dataset, inputs=[dataset_name_input, privacy_radio], outputs=hf_upload_result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  # Launch the interface
240
  iface.launch()