TeacherPuffy commited on
Commit
6f874f7
·
verified ·
1 Parent(s): d55b380

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -27
app.py CHANGED
@@ -5,7 +5,7 @@ import zipfile
5
  from datasets import Dataset
6
  from huggingface_hub import HfApi
7
  import logging
8
- from datetime import datetime
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -55,7 +55,7 @@ def segment_text(file_path):
55
  logger.info(f"Segmented text into {len(chunks)} chunks.")
56
  return chunks
57
 
58
- # Function to process the text file and make API calls
59
  def process_text(file, prompt):
60
  try:
61
  logger.info("Starting text processing...")
@@ -64,40 +64,48 @@ def process_text(file, prompt):
64
  file_path = file.name if hasattr(file, "name") else file
65
  chunks = segment_text(file_path)
66
 
67
- # Perform API calls for each chunk
 
 
 
 
 
68
  results = []
69
  for idx, chunk in enumerate(chunks):
70
  logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
71
  try:
 
72
  result = call_api(f"{prompt}\n\n{chunk}")
73
  results.append(result)
74
  logger.info(f"Chunk {idx + 1} processed successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
  logger.error(f"Failed to process chunk {idx + 1}: {e}")
77
  raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
78
 
79
- # Save results as individual text files
80
- os.makedirs("outputs", exist_ok=True)
81
- for idx, result in enumerate(results):
82
- output_file = f"outputs/output_{idx}.txt"
83
- with open(output_file, "w", encoding="utf-8") as f:
84
- f.write(result)
85
- logger.info(f"Saved result to {output_file}")
86
-
87
- # Upload to Hugging Face dataset
88
- try:
89
- logger.info("Uploading results to Hugging Face dataset...")
90
- hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN"))
91
- if not hf_api.token:
92
- raise ValueError("Hugging Face token not found in environment variables.")
93
- dataset = Dataset.from_dict({"text": results})
94
- dataset.push_to_hub("TeacherPuffy/book") # Updated dataset name
95
- logger.info("Results uploaded to Hugging Face dataset successfully.")
96
- except Exception as e:
97
- logger.error(f"Failed to upload to Hugging Face: {e}")
98
- raise gr.Error(f"Failed to upload to Hugging Face: {str(e)}")
99
-
100
- # Create a ZIP file
101
  try:
102
  logger.info("Creating ZIP file...")
103
  with zipfile.ZipFile("outputs.zip", "w") as zipf:
@@ -109,7 +117,7 @@ def process_text(file, prompt):
109
  logger.error(f"Failed to create ZIP file: {e}")
110
  raise gr.Error(f"Failed to create ZIP file: {str(e)}")
111
 
112
- return "outputs.zip", "Results uploaded to Hugging Face dataset and ZIP file created."
113
 
114
  except Exception as e:
115
  logger.error(f"An error occurred during processing: {e}")
@@ -117,7 +125,7 @@ def process_text(file, prompt):
117
 
118
  # Gradio interface
119
  with gr.Blocks() as demo:
120
- gr.Markdown("## Text File Processor with API Calls")
121
  with gr.Row():
122
  file_input = gr.File(label="Upload Text File")
123
  prompt_input = gr.Textbox(label="Enter Prompt")
 
5
  from datasets import Dataset
6
  from huggingface_hub import HfApi
7
  import logging
8
+ import time # Import time module for adding delays
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
55
  logger.info(f"Segmented text into {len(chunks)} chunks.")
56
  return chunks
57
 
58
+ # Function to process the text file and make API calls with rate limiting
59
  def process_text(file, prompt):
60
  try:
61
  logger.info("Starting text processing...")
 
64
  file_path = file.name if hasattr(file, "name") else file
65
  chunks = segment_text(file_path)
66
 
67
+ # Initialize Hugging Face API
68
+ hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN"))
69
+ if not hf_api.token:
70
+ raise ValueError("Hugging Face token not found in environment variables.")
71
+
72
+ # Process each chunk with a 20-second delay between API calls
73
  results = []
74
  for idx, chunk in enumerate(chunks):
75
  logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
76
  try:
77
+ # Call the API
78
  result = call_api(f"{prompt}\n\n{chunk}")
79
  results.append(result)
80
  logger.info(f"Chunk {idx + 1} processed successfully.")
81
+
82
+ # Save the result to a file
83
+ os.makedirs("outputs", exist_ok=True)
84
+ output_file = f"outputs/output_{idx}.txt"
85
+ with open(output_file, "w", encoding="utf-8") as f:
86
+ f.write(result)
87
+ logger.info(f"Saved result to {output_file}")
88
+
89
+ # Upload the chunk to Hugging Face
90
+ try:
91
+ logger.info(f"Uploading chunk {idx + 1} to Hugging Face...")
92
+ dataset = Dataset.from_dict({"text": [result]})
93
+ dataset.push_to_hub("TeacherPuffy/book") # Updated dataset name
94
+ logger.info(f"Chunk {idx + 1} uploaded to Hugging Face successfully.")
95
+ except Exception as e:
96
+ logger.error(f"Failed to upload chunk {idx + 1} to Hugging Face: {e}")
97
+ raise gr.Error(f"Failed to upload chunk {idx + 1} to Hugging Face: {str(e)}")
98
+
99
+ # Wait 20 seconds before the next API call
100
+ if idx < len(chunks) - 1: # No need to wait after the last chunk
101
+ logger.info("Waiting 20 seconds before the next API call...")
102
+ time.sleep(20)
103
+
104
  except Exception as e:
105
  logger.error(f"Failed to process chunk {idx + 1}: {e}")
106
  raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
107
 
108
+ # Create a ZIP file of all outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  try:
110
  logger.info("Creating ZIP file...")
111
  with zipfile.ZipFile("outputs.zip", "w") as zipf:
 
117
  logger.error(f"Failed to create ZIP file: {e}")
118
  raise gr.Error(f"Failed to create ZIP file: {str(e)}")
119
 
120
+ return "outputs.zip", "All chunks processed and uploaded to Hugging Face. ZIP file created."
121
 
122
  except Exception as e:
123
  logger.error(f"An error occurred during processing: {e}")
 
125
 
126
  # Gradio interface
127
  with gr.Blocks() as demo:
128
+ gr.Markdown("## Text File Processor with Rate-Limited API Calls")
129
  with gr.Row():
130
  file_input = gr.File(label="Upload Text File")
131
  prompt_input = gr.Textbox(label="Enter Prompt")