File size: 2,673 Bytes
f0be581
4006c1a
c881cad
4006c1a
f0be581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcc36e8
 
fce2161
4006c1a
 
bcc36e8
4006c1a
 
82cce96
 
 
 
8537faa
 
 
84ac83a
8537faa
 
 
 
 
82cce96
 
4006c1a
bcc36e8
4006c1a
56dbc6b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import gradio as gr
from repo_utils import extract_repo_content

# Get the HF token and space author name from environment variables
hf_token = os.getenv("HF_TOKEN")
hf_user = os.getenv("SPACE_AUTHOR_NAME")

if not hf_token:
    raise ValueError("HF_TOKEN environment variable is not set")
if not hf_user:
    raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")

def format_output(extracted_content, repo_url):
    formatted_output = f"# Repository URL: {repo_url}\n\n"
    for file_data in extracted_content:
        if isinstance(file_data, dict) and 'header' in file_data:
            formatted_output += f"### File: {file_data['header']['name']}\n"
            formatted_output += f"**Type:** {file_data['header']['type']}\n"
            formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
            formatted_output += f"**Created:** {file_data['header']['creation_date']}\n"
            formatted_output += f"**Modified:** {file_data['header']['modification_date']}\n"
            formatted_output += "#### Content:\n"
            formatted_output += f"```\n{file_data['content']}\n```\n\n"
        else:
            formatted_output += "Error in file data format.\n"
    return formatted_output

def extract_and_display(url):
    extracted_content = extract_repo_content(url, hf_token, hf_user)
    formatted_output = format_output(extracted_content, url)
    return formatted_output

app = gr.Blocks(theme="sudeepshouche/minimalist")

with app:
    gr.Markdown("# VectorSpace Explorer")
    gr.Markdown("**Unleash the power of AI to explore Hugging Face repositories.**")
    
    url_input = gr.Textbox(label="πŸ”— Repository URL", placeholder="Enter the repository URL here OR select an example below...")
    url_examples = gr.Examples(
        examples=[
            ["https://huggingface.co/spaces/big-vision/paligemma-hf"],
            ["https://huggingface.co/google/paligemma-3b-mix-224"],
            ["https://huggingface.co/microsoft/Phi-3-vision-128k-instruct"],
            ["https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf"]
        ],
        inputs=url_input
    )
    output_display = gr.Textbox(label="πŸ“„ Extracted Repository Content", show_copy_button=True, lines=20, placeholder="Repository content will be extracted here...\n\nMetadata is captured for all files, but text content provided only for files less than 32 kb\n\n\n\nReview and search through the content here OR simply copy it for offline analysis!!. πŸ€–")
    extract_button = gr.Button("πŸš€ Extract Content")
    
    extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)

app.launch()