cstr commited on
Commit
ebf5837
·
verified ·
1 Parent(s): 19a6585

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -89
app.py CHANGED
@@ -109,12 +109,11 @@ MODEL_CONTEXT_SIZES = {
109
  "mistralai/Mistral-7B-Instruct-v0.3": 32768,
110
  "microsoft/phi-3-mini-4k-instruct": 4096,
111
  "microsoft/Phi-3.5-mini-instruct": 4096,
112
- "microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
113
  "HuggingFaceH4/zephyr-7b-beta": 8192,
114
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
115
  "google/gemma-2-2b-it": 2048,
116
  "microsoft/phi-2": 2048,
117
- # Add other model contexts here
118
  }
119
  }
120
 
@@ -522,14 +521,28 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
522
  elif model_selection == "GLHF API":
523
  if not glhf_api_key:
524
  return "Error: GLHF API key required", None
525
- summary = send_to_glhf(
526
- prompt,
527
- glhf_model == "Use HuggingFace Model",
528
- hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice],
529
- glhf_custom_model,
530
- glhf_api_key,
531
- use_rate_limits
532
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
  else:
535
  return "Error: Invalid model selection", None
@@ -583,7 +596,7 @@ def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_
583
 
584
  def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
585
  api_key: str, use_rate_limit: bool = False) -> str:
586
- """Send prompt to GLHF API with model selection and proper stream handling."""
587
  def _send():
588
  try:
589
  import openai
@@ -594,42 +607,22 @@ def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model:
594
 
595
  model_id = f"hf:{model_name if use_hf_model else custom_model}"
596
 
597
- try:
598
- # First try without streaming
599
- completion = client.chat.completions.create(
600
- stream=False,
601
- model=model_id,
602
- messages=[
603
- {"role": "system", "content": "You are a helpful assistant."},
604
- {"role": "user", "content": prompt}
605
- ],
606
- )
607
- return completion.choices[0].message.content
608
- except Exception as non_stream_error:
609
- logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
610
-
611
- # Fallback to streaming if needed
612
- completion = client.chat.completions.create(
613
- stream=True,
614
- model=model_id,
615
- messages=[
616
- {"role": "system", "content": "You are a helpful assistant."},
617
- {"role": "user", "content": prompt}
618
- ],
619
- )
620
 
621
- response_text = []
622
- try:
623
- for chunk in completion:
624
- if chunk.choices and chunk.choices[0].delta.content is not None:
625
- response_text.append(chunk.choices[0].delta.content)
626
- except Exception as stream_error:
627
- if response_text: # If we got partial response, return it
628
- logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
629
- return "".join(response_text)
630
- raise # Re-raise if we got nothing
631
 
632
- return "".join(response_text)
633
 
634
  except Exception as e:
635
  logging.error(f"GLHF API error: {e}")
@@ -702,41 +695,27 @@ def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate
702
 
703
  return apply_rate_limit(_send, 16) if use_rate_limit else _send()
704
 
705
- def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
706
- api_key: str, use_rate_limit: bool = False) -> str:
707
- """Send prompt to GLHF API with model selection."""
708
  def _send():
709
  try:
710
- import openai
711
- client = openai.OpenAI(
712
- api_key=api_key,
713
- base_url="https://glhf.chat/api/openai/v1",
714
- )
715
-
716
- model_id = f"hf:{model_name if use_hf_model else custom_model}"
717
-
718
- # For GLHF, always use streaming for reliability
719
- completion = client.chat.completions.create(
720
- stream=True,
721
- model=model_id,
722
- messages=[
723
- {"role": "system", "content": "You are a helpful assistant."},
724
- {"role": "user", "content": prompt}
725
- ],
726
  )
727
-
728
- response_text = []
729
- for chunk in completion:
730
- if chunk.choices[0].delta.content is not None:
731
- response_text.append(chunk.choices[0].delta.content)
732
-
733
- return "".join(response_text)
734
-
735
  except Exception as e:
736
- logging.error(f"GLHF API error: {e}")
737
- return f"Error with GLHF API: {str(e)}"
738
 
739
- return apply_rate_limit(_send, 384) if use_rate_limit else _send()
740
 
741
  def estimate_tokens(text: str) -> int:
742
  """Rough token estimation: ~4 characters per token on average"""
@@ -1342,20 +1321,20 @@ with gr.Blocks(css="""
1342
  send_to_model_btn.click(
1343
  fn=send_to_model,
1344
  inputs=[
1345
- generated_prompt,
1346
- model_choice,
1347
- hf_model,
1348
- hf_custom_model,
1349
- hf_api_key,
1350
- groq_model,
1351
- groq_api_key,
1352
- openai_api_key,
1353
- openai_model,
1354
- cohere_api_key,
1355
- cohere_model,
1356
- glhf_api_key,
1357
- glhf_model,
1358
- glhf_custom_model
1359
  ],
1360
  outputs=[summary_output, download_summary]
1361
  )
 
109
  "mistralai/Mistral-7B-Instruct-v0.3": 32768,
110
  "microsoft/phi-3-mini-4k-instruct": 4096,
111
  "microsoft/Phi-3.5-mini-instruct": 4096,
112
+ "microsoft/Phi-3-mini-128k-instruct": 131072,
113
  "HuggingFaceH4/zephyr-7b-beta": 8192,
114
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
115
  "google/gemma-2-2b-it": 2048,
116
  "microsoft/phi-2": 2048,
 
117
  }
118
  }
119
 
 
521
  elif model_selection == "GLHF API":
522
  if not glhf_api_key:
523
  return "Error: GLHF API key required", None
524
+
525
+ # Determine the actual model ID to use
526
+ if glhf_model == "Use HuggingFace Model":
527
+ model_id = f"hf:{hf_custom_model if hf_model_choice == 'Custom Model' else model_registry.hf_models[hf_model_choice]}"
528
+ else:
529
+ model_id = f"hf:{glhf_custom_model}"
530
+
531
+ summary = send_to_glhf(prompt, glhf_api_key, model_id, use_rate_limits)
532
+
533
+ if not summary:
534
+ return "Error: No response from model", None
535
+
536
+ if not isinstance(summary, str):
537
+ return "Error: Invalid response type from model", None
538
+
539
+ # Create download file for valid responses
540
+ if not summary.startswith("Error"):
541
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as f:
542
+ f.write(summary)
543
+ return summary, f.name
544
+
545
+ return summary, None
546
 
547
  else:
548
  return "Error: Invalid model selection", None
 
596
 
597
  def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
598
  api_key: str, use_rate_limit: bool = False) -> str:
599
+ """Send prompt to GLHF API with model selection."""
600
  def _send():
601
  try:
602
  import openai
 
607
 
608
  model_id = f"hf:{model_name if use_hf_model else custom_model}"
609
 
610
+ # For GLHF, always use streaming for reliability
611
+ completion = client.chat.completions.create(
612
+ stream=True,
613
+ model=model_id,
614
+ messages=[
615
+ {"role": "system", "content": "You are a helpful assistant."},
616
+ {"role": "user", "content": prompt}
617
+ ],
618
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
+ response_text = []
621
+ for chunk in completion:
622
+ if chunk.choices[0].delta.content is not None:
623
+ response_text.append(chunk.choices[0].delta.content)
 
 
 
 
 
 
624
 
625
+ return "".join(response_text)
626
 
627
  except Exception as e:
628
  logging.error(f"GLHF API error: {e}")
 
695
 
696
  return apply_rate_limit(_send, 16) if use_rate_limit else _send()
697
 
698
+ def send_to_groq(prompt: str, model_name: str, api_key: str, use_rate_limit: bool = False) -> str:
699
+ """Send prompt to Groq API."""
 
700
  def _send():
701
  try:
702
+ client = Groq(api_key=api_key)
703
+ response = client.chat.completions.create(
704
+ model=model_name,
705
+ messages=[{
706
+ "role": "user",
707
+ "content": prompt
708
+ }],
709
+ temperature=0.7,
710
+ max_tokens=500,
711
+ top_p=0.95
 
 
 
 
 
 
712
  )
713
+ return response.choices[0].message.content
 
 
 
 
 
 
 
714
  except Exception as e:
715
+ logging.error(f"Groq API error: {e}")
716
+ return f"Error with Groq API: {str(e)}"
717
 
718
+ return apply_rate_limit(_send, 4) if use_rate_limit else _send()
719
 
720
  def estimate_tokens(text: str) -> int:
721
  """Rough token estimation: ~4 characters per token on average"""
 
1321
  send_to_model_btn.click(
1322
  fn=send_to_model,
1323
  inputs=[
1324
+ generated_prompt, # prompt
1325
+ model_choice, # model_selection
1326
+ hf_model, # hf_model_choice
1327
+ hf_custom_model, # hf_custom_model
1328
+ hf_api_key, # hf_api_key
1329
+ groq_model, # groq_model_choice
1330
+ groq_api_key, # groq_api_key
1331
+ openai_api_key, # openai_api_key
1332
+ openai_model, # openai_model_choice
1333
+ cohere_api_key, # cohere_api_key
1334
+ cohere_model, # cohere_model
1335
+ glhf_api_key, # glhf_api_key
1336
+ glhf_model, # glhf_model
1337
+ glhf_custom_model # glhf_custom_model
1338
  ],
1339
  outputs=[summary_output, download_summary]
1340
  )