Tonic commited on
Commit
22a3dc8
·
verified ·
1 Parent(s): 5eddd0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -157,9 +157,11 @@ def query_vectara(text):
157
  else:
158
  return f"Error: {response.status_code}"
159
 
160
- def clean_text(text):
161
- # Function to clean text using regex
162
- cleaned_text = re.sub(r'[^\w\s]', '', text) # Remove special characters except spaces
 
 
163
  return cleaned_text
164
 
165
  def evaluate_content(user_input):
@@ -169,8 +171,11 @@ def evaluate_content(user_input):
169
  summary = vectara_response_json.get("summary", "")
170
  sources = vectara_response_json.get("sources", [])
171
 
172
- # Clean summary text
173
- summary_clean = clean_text(summary)
 
 
 
174
 
175
  # Process sources to extract and clean necessary information
176
  sources_info = ""
@@ -185,7 +190,7 @@ def evaluate_content(user_input):
185
 
186
  sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n"
187
 
188
- # Generate text based on the cleaned summary
189
  olmo_output = generate_text(summary_clean)
190
  olmo_output_clean = clean_text(olmo_output)
191
 
 
157
  else:
158
  return f"Error: {response.status_code}"
159
 
160
+ def remove_references(text):
161
+ # Regex pattern to find references like [1], [1][2], etc.
162
+ pattern = r'\[\d+\]+'
163
+ # Replace found patterns with an empty string
164
+ cleaned_text = re.sub(pattern, '', text)
165
  return cleaned_text
166
 
167
  def evaluate_content(user_input):
 
171
  summary = vectara_response_json.get("summary", "")
172
  sources = vectara_response_json.get("sources", [])
173
 
174
+ # Remove references from the summary text
175
+ summary_no_refs = remove_references(summary)
176
+
177
+ # Clean summary text to remove special characters
178
+ summary_clean = clean_text(summary_no_refs)
179
 
180
  # Process sources to extract and clean necessary information
181
  sources_info = ""
 
190
 
191
  sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n"
192
 
193
+ # Generate text based on the cleaned and reference-removed summary
194
  olmo_output = generate_text(summary_clean)
195
  olmo_output_clean = clean_text(olmo_output)
196