Spaces:
Paused
Paused
throaway2854
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ import io
|
|
10 |
import uuid
|
11 |
import time
|
12 |
import random
|
13 |
-
from typing import List, Dict
|
14 |
|
15 |
DATA_DIR = "/data"
|
16 |
IMAGES_DIR = os.path.join(DATA_DIR, "images")
|
@@ -78,10 +77,10 @@ class DatasetBuilder:
|
|
78 |
os.makedirs(IMAGES_DIR, exist_ok=True)
|
79 |
self.hf_token = os.getenv("HF_Token") # Access the token from the environment variable
|
80 |
|
81 |
-
def get_dataset_file(self)
|
82 |
return os.path.join(DATA_DIR, f"{self.dataset_name}.json")
|
83 |
|
84 |
-
def load_dataset(self)
|
85 |
dataset_file = self.get_dataset_file()
|
86 |
if os.path.exists(dataset_file):
|
87 |
with open(dataset_file, 'r') as f:
|
@@ -137,14 +136,6 @@ class DatasetBuilder:
|
|
137 |
def get_dataset_info(self):
|
138 |
return f"Current dataset size ({self.dataset_name}): {len(self.dataset)} images"
|
139 |
|
140 |
-
def get_storage_usage(self) -> float:
|
141 |
-
total_size = 0
|
142 |
-
for dirpath, dirnames, filenames in os.walk(DATA_DIR):
|
143 |
-
for f in filenames:
|
144 |
-
fp = os.path.join(dirpath, f)
|
145 |
-
total_size += os.path.getsize(fp)
|
146 |
-
return total_size / (1024 * 1024 * 1024) # Convert to GB
|
147 |
-
|
148 |
def get_dataset_preview(self, num_images=5):
|
149 |
preview = []
|
150 |
for item in self.dataset[-num_images:]:
|
@@ -152,35 +143,6 @@ class DatasetBuilder:
|
|
152 |
preview.append((image_path, item['tags']))
|
153 |
return preview
|
154 |
|
155 |
-
def get_dataset_with_paths(self) -> List[Dict]:
|
156 |
-
return [
|
157 |
-
{
|
158 |
-
'image': os.path.join(IMAGES_DIR, item['image']),
|
159 |
-
'tags': item['tags'],
|
160 |
-
'filename': item['image']
|
161 |
-
}
|
162 |
-
for item in self.dataset
|
163 |
-
]
|
164 |
-
|
165 |
-
def remove_image(self, filename):
|
166 |
-
for item in self.dataset:
|
167 |
-
if item['image'] == filename:
|
168 |
-
self.dataset.remove(item)
|
169 |
-
image_path = os.path.join(IMAGES_DIR, filename)
|
170 |
-
if os.path.exists(image_path):
|
171 |
-
os.remove(image_path)
|
172 |
-
self.save_dataset()
|
173 |
-
return f"Removed image: {filename}"
|
174 |
-
return f"Image not found: {filename}"
|
175 |
-
|
176 |
-
def edit_tags(self, filename, new_tags):
|
177 |
-
for item in self.dataset:
|
178 |
-
if item['image'] == filename:
|
179 |
-
item['tags'] = new_tags
|
180 |
-
self.save_dataset()
|
181 |
-
return f"Updated tags for image: {filename}"
|
182 |
-
return f"Image not found: {filename}"
|
183 |
-
|
184 |
def upload_to_huggingface(self, private=True):
|
185 |
if not self.dataset:
|
186 |
return "Dataset is empty. Add some images first."
|
@@ -235,90 +197,44 @@ def create_huggingface_dataset(dataset_name):
|
|
235 |
|
236 |
def view_dataset(dataset_name):
|
237 |
builder = DatasetBuilder(dataset_name)
|
238 |
-
return builder.get_dataset_preview(num_images=
|
239 |
|
240 |
def upload_huggingface_dataset(dataset_name, privacy):
|
241 |
builder = DatasetBuilder(dataset_name)
|
242 |
-
return builder.upload_to_huggingface(private=privacy)
|
243 |
-
|
244 |
-
def view_dataset_for_editing(dataset_name: str) -> List[Dict]:
|
245 |
-
builder = DatasetBuilder(dataset_name)
|
246 |
-
return builder.get_dataset_with_paths()
|
247 |
-
|
248 |
-
def remove_image_from_dataset(dataset_name, filename):
|
249 |
-
builder = DatasetBuilder(dataset_name)
|
250 |
-
result = builder.remove_image(filename)
|
251 |
-
return result, builder.get_dataset_with_paths()
|
252 |
-
|
253 |
-
def edit_image_tags(dataset_name, filename, new_tags):
|
254 |
-
builder = DatasetBuilder(dataset_name)
|
255 |
-
result = builder.edit_tags(filename, new_tags)
|
256 |
-
return result, builder.get_dataset_with_paths()
|
257 |
-
|
258 |
-
def get_storage_usage(dataset_name: str) -> str:
|
259 |
-
builder = DatasetBuilder(dataset_name)
|
260 |
-
usage = builder.get_storage_usage()
|
261 |
-
return f"Current storage usage: {usage:.2f} GB / 20 GB"
|
262 |
|
263 |
# Create Gradio interface
|
264 |
with gr.Blocks(theme="huggingface") as iface:
|
265 |
gr.Markdown("# Image Dataset Builder")
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
gr.
|
271 |
-
|
272 |
-
url_input = gr.Textbox(lines=2, label="URL", placeholder="Enter image URL here...")
|
273 |
-
cookies_input = gr.Textbox(lines=2, label="Cookies (optional)", placeholder="Enter cookies")
|
274 |
add_button = gr.Button("Add Image")
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
selected = images[evt.index]
|
298 |
-
return selected['filename'], selected['tags']
|
299 |
-
|
300 |
-
edit_gallery.select(select_image, inputs=[edit_gallery], outputs=[selected_image, edit_tags_input])
|
301 |
-
view_dataset_button.click(view_dataset_for_editing, inputs=[dataset_name_input], outputs=[edit_gallery])
|
302 |
-
remove_button.click(remove_image_from_dataset, inputs=[dataset_name_input, selected_image], outputs=[edit_result_output, edit_gallery])
|
303 |
-
edit_button.click(edit_image_tags, inputs=[dataset_name_input, selected_image, edit_tags_input], outputs=[edit_result_output, edit_gallery])
|
304 |
-
|
305 |
-
with gr.Tab("HuggingFace Integration"):
|
306 |
-
gr.Markdown("## Create and Upload HuggingFace Dataset")
|
307 |
-
create_hf_button = gr.Button("Create HuggingFace Dataset")
|
308 |
-
hf_result = gr.Textbox(label="Dataset Creation Result")
|
309 |
-
create_hf_button.click(create_huggingface_dataset, inputs=[dataset_name_input], outputs=hf_result)
|
310 |
-
|
311 |
-
gr.Markdown("## Upload Dataset to Hugging Face")
|
312 |
-
privacy_radio = gr.Radio(choices=["private", "public"], value="private", label="Repository Privacy")
|
313 |
-
upload_hf_button = gr.Button("Upload to Hugging Face")
|
314 |
-
hf_upload_result = gr.Textbox(label="Upload Result")
|
315 |
-
upload_hf_button.click(upload_huggingface_dataset, inputs=[dataset_name_input, privacy_radio], outputs=hf_upload_result)
|
316 |
-
|
317 |
-
with gr.Tab("Storage"):
|
318 |
-
gr.Markdown("## Storage Usage")
|
319 |
-
storage_usage_button = gr.Button("Check Storage Usage")
|
320 |
-
storage_usage_output = gr.Textbox(label="Storage Usage")
|
321 |
-
storage_usage_button.click(get_storage_usage, inputs=[dataset_name_input], outputs=storage_usage_output)
|
322 |
|
323 |
# Launch the interface
|
324 |
iface.launch()
|
|
|
10 |
import uuid
|
11 |
import time
|
12 |
import random
|
|
|
13 |
|
14 |
DATA_DIR = "/data"
|
15 |
IMAGES_DIR = os.path.join(DATA_DIR, "images")
|
|
|
77 |
os.makedirs(IMAGES_DIR, exist_ok=True)
|
78 |
self.hf_token = os.getenv("HF_Token") # Access the token from the environment variable
|
79 |
|
80 |
+
def get_dataset_file(self):
|
81 |
return os.path.join(DATA_DIR, f"{self.dataset_name}.json")
|
82 |
|
83 |
+
def load_dataset(self):
|
84 |
dataset_file = self.get_dataset_file()
|
85 |
if os.path.exists(dataset_file):
|
86 |
with open(dataset_file, 'r') as f:
|
|
|
136 |
def get_dataset_info(self):
|
137 |
return f"Current dataset size ({self.dataset_name}): {len(self.dataset)} images"
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
def get_dataset_preview(self, num_images=5):
|
140 |
preview = []
|
141 |
for item in self.dataset[-num_images:]:
|
|
|
143 |
preview.append((image_path, item['tags']))
|
144 |
return preview
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
def upload_to_huggingface(self, private=True):
|
147 |
if not self.dataset:
|
148 |
return "Dataset is empty. Add some images first."
|
|
|
197 |
|
198 |
def view_dataset(dataset_name):
|
199 |
builder = DatasetBuilder(dataset_name)
|
200 |
+
return builder.get_dataset_preview(num_images=60)
|
201 |
|
202 |
def upload_huggingface_dataset(dataset_name, privacy):
|
203 |
builder = DatasetBuilder(dataset_name)
|
204 |
+
return builder.upload_to_huggingface(private=privacy)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
# Create Gradio interface
|
207 |
with gr.Blocks(theme="huggingface") as iface:
|
208 |
gr.Markdown("# Image Dataset Builder")
|
209 |
+
gr.Markdown("Enter a URL to add an image and its tags to the dataset. Progress is saved automatically.")
|
210 |
+
|
211 |
+
with gr.Row():
|
212 |
+
dataset_name_input = gr.Textbox(lines=1, label="Dataset Name", placeholder="Enter dataset name...", value="default_dataset")
|
213 |
+
url_input = gr.Textbox(lines=2, label="URL", placeholder="Enter image URL here...")
|
214 |
+
cookies_input = gr.Textbox(lines=2, label="Cookies (optional)", placeholder="Enter cookies")
|
|
|
|
|
215 |
add_button = gr.Button("Add Image")
|
216 |
+
|
217 |
+
result_output = gr.Textbox(label="Result")
|
218 |
+
dataset_info = gr.Textbox(label="Dataset Info")
|
219 |
+
|
220 |
+
gr.Markdown("## Dataset Preview")
|
221 |
+
preview_gallery = gr.Gallery(label="Recent Additions", show_label=False, elem_id="preview_gallery", columns=5, rows=1, height="auto")
|
222 |
+
|
223 |
+
add_button.click(add_image_to_dataset, inputs=[url_input, cookies_input, dataset_name_input], outputs=[result_output, dataset_info, preview_gallery])
|
224 |
+
|
225 |
+
create_hf_button = gr.Button("Create HuggingFace Dataset")
|
226 |
+
hf_result = gr.Textbox(label="Dataset Creation Result")
|
227 |
+
create_hf_button.click(create_huggingface_dataset, inputs=[dataset_name_input], outputs=hf_result)
|
228 |
+
|
229 |
+
view_dataset_button = gr.Button("View Dataset")
|
230 |
+
dataset_gallery = gr.Gallery(label="Dataset Contents", show_label=False, elem_id="dataset_gallery", columns=5, rows=4, height="auto")
|
231 |
+
view_dataset_button.click(view_dataset, inputs=[dataset_name_input], outputs=dataset_gallery)
|
232 |
+
|
233 |
+
gr.Markdown("## Upload Dataset to Hugging Face")
|
234 |
+
privacy_radio = gr.Radio(choices=["private", "public"], value="private", label="Repository Privacy")
|
235 |
+
upload_hf_button = gr.Button("Upload to Hugging Face")
|
236 |
+
hf_upload_result = gr.Textbox(label="Upload Result")
|
237 |
+
upload_hf_button.click(upload_huggingface_dataset, inputs=[dataset_name_input, privacy_radio], outputs=hf_upload_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
# Launch the interface
|
240 |
iface.launch()
|