File size: 6,189 Bytes
946a274
 
 
 
 
d7d471b
6f874f7
d7d471b
 
 
 
946a274
 
 
d7d471b
30c739d
 
d7d471b
 
 
 
 
 
 
 
 
 
 
 
 
946a274
30c739d
946a274
39bca12
d7d471b
39bca12
 
 
d7d471b
39bca12
d7d471b
39bca12
 
 
d7d471b
 
 
 
39bca12
30c739d
946a274
af926fe
d7d471b
946a274
 
6f874f7
946a274
d7d471b
 
 
 
d55b380
 
d7d471b
6f874f7
 
 
 
 
9670a56
bf10da6
9670a56
af926fe
d7d471b
 
 
 
6f874f7
d55b380
 
d7d471b
6f874f7
 
 
 
 
 
 
 
9670a56
6f874f7
 
9670a56
 
 
 
 
 
6f874f7
 
 
 
 
af926fe
6f874f7
af926fe
 
6f874f7
d7d471b
 
 
 
6f874f7
d7d471b
 
 
 
 
 
 
 
 
 
 
6f874f7
946a274
d7d471b
 
 
946a274
 
 
6f874f7
946a274
 
 
 
 
 
 
 
 
 
 
 
 
 
39bca12
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
from gradio_client import Client
import os
import zipfile
from huggingface_hub import HfApi
import logging
import time  # Import time module for adding delays

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Function to call the API and get the result
def call_api(prompt):
    try:
        # Reload the Gradio client for each chunk
        client = Client("MiniMaxAI/MiniMax-Text-01")
        logger.info(f"Calling API with prompt: {prompt[:100]}...")  # Log the first 100 chars of the prompt
        result = client.predict(
            message=prompt,
            max_tokens=12800,
            temperature=0.1,
            top_p=0.9,
            api_name="/chat"
        )
        logger.info("API call successful.")
        return result
    except Exception as e:
        logger.error(f"API call failed: {e}")
        raise gr.Error(f"API call failed: {str(e)}")

# Function to segment the text file into chunks of 1500 words
def segment_text(file_path):
    try:
        logger.info(f"Reading file: {file_path}")
        # Try reading with UTF-8 encoding first
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
        logger.info("File read successfully with UTF-8 encoding.")
    except UnicodeDecodeError:
        logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.")
        # Fallback to latin-1 encoding if UTF-8 fails
        with open(file_path, "r", encoding="latin-1") as f:
            text = f.read()
        logger.info("File read successfully with latin-1 encoding.")
    except Exception as e:
        logger.error(f"Failed to read file: {e}")
        raise gr.Error(f"Failed to read file: {str(e)}")
    
    # Split the text into chunks of 1500 words
    words = text.split()
    chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1250)]
    logger.info(f"Segmented text into {len(chunks)} chunks.")
    return chunks

# Function to process the text file and make API calls with rate limiting
def process_text(file, prompt):
    try:
        logger.info("Starting text processing...")
        
        # Segment the text file into chunks
        file_path = file.name if hasattr(file, "name") else file
        chunks = segment_text(file_path)
        
        # Initialize Hugging Face API
        hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN"))
        if not hf_api.token:
            raise ValueError("Hugging Face token not found in environment variables.")
        
        # Repository name on Hugging Face Hub
        repo_name = "TeacherPuffy/book4"
        
        # Process each chunk with a 15-second delay between API calls
        results = []
        for idx, chunk in enumerate(chunks):
            logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
            try:
                # Call the API
                result = call_api(f"{prompt}\n\n{chunk}")
                results.append(result)
                logger.info(f"Chunk {idx + 1} processed successfully.")
                
                # Save the result to a file
                os.makedirs("outputs", exist_ok=True)
                output_file = f"outputs/output_{idx}.txt"
                with open(output_file, "w", encoding="utf-8") as f:
                    f.write(result)
                logger.info(f"Saved result to {output_file}")
                
                # Upload the chunk as an individual text file to Hugging Face
                try:
                    logger.info(f"Uploading chunk {idx + 1} to Hugging Face...")
                    hf_api.upload_file(
                        path_or_fileobj=output_file,
                        path_in_repo=f"output_{idx}.txt",  # File name in the repository
                        repo_id=repo_name,
                        repo_type="dataset",
                    )
                    logger.info(f"Chunk {idx + 1} uploaded to Hugging Face successfully.")
                except Exception as e:
                    logger.error(f"Failed to upload chunk {idx + 1} to Hugging Face: {e}")
                    raise gr.Error(f"Failed to upload chunk {idx + 1} to Hugging Face: {str(e)}")
                
                # Wait 15 seconds before the next API call
                if idx < len(chunks) - 1:  # No need to wait after the last chunk
                    logger.info("Waiting 15 seconds before the next API call...")
                    time.sleep(15)
                
            except Exception as e:
                logger.error(f"Failed to process chunk {idx + 1}: {e}")
                raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
        
        # Create a ZIP file of all outputs
        try:
            logger.info("Creating ZIP file...")
            with zipfile.ZipFile("outputs.zip", "w") as zipf:
                for root, dirs, files in os.walk("outputs"):
                    for file in files:
                        zipf.write(os.path.join(root, file), file)
            logger.info("ZIP file created successfully.")
        except Exception as e:
            logger.error(f"Failed to create ZIP file: {e}")
            raise gr.Error(f"Failed to create ZIP file: {str(e)}")
        
        return "outputs.zip", "All chunks processed and uploaded to Hugging Face. ZIP file created."
    
    except Exception as e:
        logger.error(f"An error occurred during processing: {e}")
        raise gr.Error(f"An error occurred: {str(e)}")

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Text File Processor with Rate-Limited API Calls")
    with gr.Row():
        file_input = gr.File(label="Upload Text File")
        prompt_input = gr.Textbox(label="Enter Prompt")
    with gr.Row():
        output_zip = gr.File(label="Download ZIP File")
        output_message = gr.Textbox(label="Status Message")
    submit_button = gr.Button("Submit")
    
    submit_button.click(
        process_text,
        inputs=[file_input, prompt_input],
        outputs=[output_zip, output_message]
    )

# Launch the Gradio app with a public link
demo.launch(share=True)