Spaces:
Running
Running
File size: 6,189 Bytes
946a274 d7d471b 6f874f7 d7d471b 946a274 d7d471b 30c739d d7d471b 946a274 30c739d 946a274 39bca12 d7d471b 39bca12 d7d471b 39bca12 d7d471b 39bca12 d7d471b 39bca12 30c739d 946a274 af926fe d7d471b 946a274 6f874f7 946a274 d7d471b d55b380 d7d471b 6f874f7 9670a56 bf10da6 9670a56 af926fe d7d471b 6f874f7 d55b380 d7d471b 6f874f7 9670a56 6f874f7 9670a56 6f874f7 af926fe 6f874f7 af926fe 6f874f7 d7d471b 6f874f7 d7d471b 6f874f7 946a274 d7d471b 946a274 6f874f7 946a274 39bca12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
from gradio_client import Client
import os
import zipfile
from huggingface_hub import HfApi
import logging
import time # Import time module for adding delays
# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# Function to call the API and get the result
def call_api(prompt):
try:
# Reload the Gradio client for each chunk
client = Client("MiniMaxAI/MiniMax-Text-01")
logger.info(f"Calling API with prompt: {prompt[:100]}...") # Log the first 100 chars of the prompt
result = client.predict(
message=prompt,
max_tokens=12800,
temperature=0.1,
top_p=0.9,
api_name="/chat"
)
logger.info("API call successful.")
return result
except Exception as e:
logger.error(f"API call failed: {e}")
raise gr.Error(f"API call failed: {str(e)}")
# Function to segment the text file into chunks of 1500 words
def segment_text(file_path):
try:
logger.info(f"Reading file: {file_path}")
# Try reading with UTF-8 encoding first
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
logger.info("File read successfully with UTF-8 encoding.")
except UnicodeDecodeError:
logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.")
# Fallback to latin-1 encoding if UTF-8 fails
with open(file_path, "r", encoding="latin-1") as f:
text = f.read()
logger.info("File read successfully with latin-1 encoding.")
except Exception as e:
logger.error(f"Failed to read file: {e}")
raise gr.Error(f"Failed to read file: {str(e)}")
# Split the text into chunks of 1500 words
words = text.split()
chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1250)]
logger.info(f"Segmented text into {len(chunks)} chunks.")
return chunks
# Function to process the text file and make API calls with rate limiting
def process_text(file, prompt):
try:
logger.info("Starting text processing...")
# Segment the text file into chunks
file_path = file.name if hasattr(file, "name") else file
chunks = segment_text(file_path)
# Initialize Hugging Face API
hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN"))
if not hf_api.token:
raise ValueError("Hugging Face token not found in environment variables.")
# Repository name on Hugging Face Hub
repo_name = "TeacherPuffy/book4"
# Process each chunk with a 15-second delay between API calls
results = []
for idx, chunk in enumerate(chunks):
logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
try:
# Call the API
result = call_api(f"{prompt}\n\n{chunk}")
results.append(result)
logger.info(f"Chunk {idx + 1} processed successfully.")
# Save the result to a file
os.makedirs("outputs", exist_ok=True)
output_file = f"outputs/output_{idx}.txt"
with open(output_file, "w", encoding="utf-8") as f:
f.write(result)
logger.info(f"Saved result to {output_file}")
# Upload the chunk as an individual text file to Hugging Face
try:
logger.info(f"Uploading chunk {idx + 1} to Hugging Face...")
hf_api.upload_file(
path_or_fileobj=output_file,
path_in_repo=f"output_{idx}.txt", # File name in the repository
repo_id=repo_name,
repo_type="dataset",
)
logger.info(f"Chunk {idx + 1} uploaded to Hugging Face successfully.")
except Exception as e:
logger.error(f"Failed to upload chunk {idx + 1} to Hugging Face: {e}")
raise gr.Error(f"Failed to upload chunk {idx + 1} to Hugging Face: {str(e)}")
# Wait 15 seconds before the next API call
if idx < len(chunks) - 1: # No need to wait after the last chunk
logger.info("Waiting 15 seconds before the next API call...")
time.sleep(15)
except Exception as e:
logger.error(f"Failed to process chunk {idx + 1}: {e}")
raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
# Create a ZIP file of all outputs
try:
logger.info("Creating ZIP file...")
with zipfile.ZipFile("outputs.zip", "w") as zipf:
for root, dirs, files in os.walk("outputs"):
for file in files:
zipf.write(os.path.join(root, file), file)
logger.info("ZIP file created successfully.")
except Exception as e:
logger.error(f"Failed to create ZIP file: {e}")
raise gr.Error(f"Failed to create ZIP file: {str(e)}")
return "outputs.zip", "All chunks processed and uploaded to Hugging Face. ZIP file created."
except Exception as e:
logger.error(f"An error occurred during processing: {e}")
raise gr.Error(f"An error occurred: {str(e)}")
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Text File Processor with Rate-Limited API Calls")
with gr.Row():
file_input = gr.File(label="Upload Text File")
prompt_input = gr.Textbox(label="Enter Prompt")
with gr.Row():
output_zip = gr.File(label="Download ZIP File")
output_message = gr.Textbox(label="Status Message")
submit_button = gr.Button("Submit")
submit_button.click(
process_text,
inputs=[file_input, prompt_input],
outputs=[output_zip, output_message]
)
# Launch the Gradio app with a public link
demo.launch(share=True) |