File size: 2,871 Bytes
946a274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39bca12
 
 
 
 
 
 
 
 
 
946a274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39bca12
946a274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39bca12
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from gradio_client import Client
import os
import zipfile
from datasets import Dataset
from huggingface_hub import HfApi

# Initialize the Gradio client
client = Client("MiniMaxAI/MiniMax-Text-01")

# Function to call the API and get the result
def call_api(prompt):
    result = client.predict(
        message=prompt,
        max_tokens=12800,
        temperature=0.1,
        top_p=0.9,
        api_name="/chat"
    )
    return result

# Function to segment the text file into chunks of 3000 words
def segment_text(file_path):
    try:
        # Try reading with UTF-8 encoding first
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    except UnicodeDecodeError:
        # Fallback to latin-1 encoding if UTF-8 fails
        with open(file_path, "r", encoding="latin-1") as f:
            text = f.read()
    
    # Split the text into chunks of 3000 words
    words = text.split()
    chunks = [" ".join(words[i:i + 3000]) for i in range(0, len(words), 3000)]
    return chunks

# Function to process the text file and make parallel API calls
def process_text(file, prompt):
    # Segment the text file into chunks
    chunks = segment_text(file.name)
    
    # Perform two parallel API calls for each chunk
    results = []
    for chunk in chunks:
        result1 = call_api(f"{prompt}\n\n{chunk}")
        result2 = call_api(f"{prompt}\n\n{chunk}")
        results.extend([result1, result2])
    
    # Save results as individual text files
    os.makedirs("outputs", exist_ok=True)
    for idx, result in enumerate(results):
        with open(f"outputs/output_{idx}.txt", "w", encoding="utf-8") as f:
            f.write(result)
    
    # Upload to Hugging Face dataset
    hf_api = HfApi(token=os.environ["HUGGINGFACE_TOKEN"])
    dataset = Dataset.from_dict({"text": results})
    dataset.push_to_hub("your_huggingface_username/your_dataset_name")
    
    # Create a ZIP file
    with zipfile.ZipFile("outputs.zip", "w") as zipf:
        for root, dirs, files in os.walk("outputs"):
            for file in files:
                zipf.write(os.path.join(root, file), file)
    
    return "outputs.zip", "Results uploaded to Hugging Face dataset and ZIP file created."

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Text File Processor with Parallel API Calls")
    with gr.Row():
        file_input = gr.File(label="Upload Text File")
        prompt_input = gr.Textbox(label="Enter Prompt")
    with gr.Row():
        output_zip = gr.File(label="Download ZIP File")
        output_message = gr.Textbox(label="Status Message")
    submit_button = gr.Button("Submit")
    
    submit_button.click(
        process_text,
        inputs=[file_input, prompt_input],
        outputs=[output_zip, output_message]
    )

# Launch the Gradio app with a public link
demo.launch(share=True)