TeacherPuffy commited on
Commit
d7d471b
·
verified ·
1 Parent(s): 7328493

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -35
app.py CHANGED
@@ -4,67 +4,114 @@ import os
4
  import zipfile
5
  from datasets import Dataset
6
  from huggingface_hub import HfApi
 
 
 
 
 
 
7
 
8
  # Initialize the Gradio client
9
  client = Client("MiniMaxAI/MiniMax-Text-01")
10
 
11
  # Function to call the API and get the result
12
  def call_api(prompt):
13
- result = client.predict(
14
- message=prompt,
15
- max_tokens=12800,
16
- temperature=0.1,
17
- top_p=0.9,
18
- api_name="/chat"
19
- )
20
- return result
 
 
 
 
 
 
21
 
22
  # Function to segment the text file into chunks of 3000 words
23
  def segment_text(file_path):
24
  try:
 
25
  # Try reading with UTF-8 encoding first
26
  with open(file_path, "r", encoding="utf-8") as f:
27
  text = f.read()
 
28
  except UnicodeDecodeError:
 
29
  # Fallback to latin-1 encoding if UTF-8 fails
30
  with open(file_path, "r", encoding="latin-1") as f:
31
  text = f.read()
 
 
 
 
32
 
33
  # Split the text into chunks of 3000 words
34
  words = text.split()
35
  chunks = [" ".join(words[i:i + 3000]) for i in range(0, len(words), 3000)]
 
36
  return chunks
37
 
38
  # Function to process the text file and make parallel API calls
39
  def process_text(file, prompt):
40
- # Segment the text file into chunks
41
- chunks = segment_text(file.name)
42
-
43
- # Perform two parallel API calls for each chunk
44
- results = []
45
- for chunk in chunks:
46
- result1 = call_api(f"{prompt}\n\n{chunk}")
47
- result2 = call_api(f"{prompt}\n\n{chunk}")
48
- results.extend([result1, result2])
49
-
50
- # Save results as individual text files
51
- os.makedirs("outputs", exist_ok=True)
52
- for idx, result in enumerate(results):
53
- with open(f"outputs/output_{idx}.txt", "w", encoding="utf-8") as f:
54
- f.write(result)
55
-
56
- # Upload to Hugging Face dataset
57
- hf_api = HfApi(token=os.environ["HUGGINGFACE_TOKEN"])
58
- dataset = Dataset.from_dict({"text": results})
59
- dataset.push_to_hub("TeacherPuffy/book")
60
-
61
- # Create a ZIP file
62
- with zipfile.ZipFile("outputs.zip", "w") as zipf:
63
- for root, dirs, files in os.walk("outputs"):
64
- for file in files:
65
- zipf.write(os.path.join(root, file), file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- return "outputs.zip", "Results uploaded to Hugging Face dataset and ZIP file created."
 
 
68
 
69
  # Gradio interface
70
  with gr.Blocks() as demo:
 
4
  import zipfile
5
  from datasets import Dataset
6
  from huggingface_hub import HfApi
7
+ import logging
8
+ from datetime import datetime
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
+ logger = logging.getLogger(__name__)
13
 
14
  # Initialize the Gradio client
15
  client = Client("MiniMaxAI/MiniMax-Text-01")
16
 
17
  # Function to call the API and get the result
18
  def call_api(prompt):
19
+ try:
20
+ logger.info(f"Calling API with prompt: {prompt[:100]}...") # Log the first 100 chars of the prompt
21
+ result = client.predict(
22
+ message=prompt,
23
+ max_tokens=12800,
24
+ temperature=0.1,
25
+ top_p=0.9,
26
+ api_name="/chat"
27
+ )
28
+ logger.info("API call successful.")
29
+ return result
30
+ except Exception as e:
31
+ logger.error(f"API call failed: {e}")
32
+ raise gr.Error(f"API call failed: {str(e)}")
33
 
34
  # Function to segment the text file into chunks of 3000 words
35
  def segment_text(file_path):
36
  try:
37
+ logger.info(f"Reading file: {file_path}")
38
  # Try reading with UTF-8 encoding first
39
  with open(file_path, "r", encoding="utf-8") as f:
40
  text = f.read()
41
+ logger.info("File read successfully with UTF-8 encoding.")
42
  except UnicodeDecodeError:
43
+ logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.")
44
  # Fallback to latin-1 encoding if UTF-8 fails
45
  with open(file_path, "r", encoding="latin-1") as f:
46
  text = f.read()
47
+ logger.info("File read successfully with latin-1 encoding.")
48
+ except Exception as e:
49
+ logger.error(f"Failed to read file: {e}")
50
+ raise gr.Error(f"Failed to read file: {str(e)}")
51
 
52
  # Split the text into chunks of 3000 words
53
  words = text.split()
54
  chunks = [" ".join(words[i:i + 3000]) for i in range(0, len(words), 3000)]
55
+ logger.info(f"Segmented text into {len(chunks)} chunks.")
56
  return chunks
57
 
58
  # Function to process the text file and make parallel API calls
59
  def process_text(file, prompt):
60
+ try:
61
+ logger.info("Starting text processing...")
62
+
63
+ # Segment the text file into chunks
64
+ chunks = segment_text(file.name)
65
+
66
+ # Perform two parallel API calls for each chunk
67
+ results = []
68
+ for idx, chunk in enumerate(chunks):
69
+ logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
70
+ try:
71
+ result1 = call_api(f"{prompt}\n\n{chunk}")
72
+ result2 = call_api(f"{prompt}\n\n{chunk}")
73
+ results.extend([result1, result2])
74
+ logger.info(f"Chunk {idx + 1} processed successfully.")
75
+ except Exception as e:
76
+ logger.error(f"Failed to process chunk {idx + 1}: {e}")
77
+ raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
78
+
79
+ # Save results as individual text files
80
+ os.makedirs("outputs", exist_ok=True)
81
+ for idx, result in enumerate(results):
82
+ output_file = f"outputs/output_{idx}.txt"
83
+ with open(output_file, "w", encoding="utf-8") as f:
84
+ f.write(result)
85
+ logger.info(f"Saved result to {output_file}")
86
+
87
+ # Upload to Hugging Face dataset
88
+ try:
89
+ logger.info("Uploading results to Hugging Face dataset...")
90
+ hf_api = HfApi(token=os.environ["HUGGINGFACE_TOKEN"])
91
+ dataset = Dataset.from_dict({"text": results})
92
+ dataset.push_to_hub("TeacherPuffy/book") # Updated dataset name
93
+ logger.info("Results uploaded to Hugging Face dataset successfully.")
94
+ except Exception as e:
95
+ logger.error(f"Failed to upload to Hugging Face: {e}")
96
+ raise gr.Error(f"Failed to upload to Hugging Face: {str(e)}")
97
+
98
+ # Create a ZIP file
99
+ try:
100
+ logger.info("Creating ZIP file...")
101
+ with zipfile.ZipFile("outputs.zip", "w") as zipf:
102
+ for root, dirs, files in os.walk("outputs"):
103
+ for file in files:
104
+ zipf.write(os.path.join(root, file), file)
105
+ logger.info("ZIP file created successfully.")
106
+ except Exception as e:
107
+ logger.error(f"Failed to create ZIP file: {e}")
108
+ raise gr.Error(f"Failed to create ZIP file: {str(e)}")
109
+
110
+ return "outputs.zip", "Results uploaded to Hugging Face dataset and ZIP file created."
111
 
112
+ except Exception as e:
113
+ logger.error(f"An error occurred during processing: {e}")
114
+ raise gr.Error(f"An error occurred: {str(e)}")
115
 
116
  # Gradio interface
117
  with gr.Blocks() as demo: