Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -109,12 +109,11 @@ MODEL_CONTEXT_SIZES = {
|
|
109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
111 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
112 |
-
"microsoft/Phi-3-mini-128k-instruct": 131072,
|
113 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
115 |
"google/gemma-2-2b-it": 2048,
|
116 |
"microsoft/phi-2": 2048,
|
117 |
-
# Add other model contexts here
|
118 |
}
|
119 |
}
|
120 |
|
@@ -522,14 +521,28 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
522 |
elif model_selection == "GLHF API":
|
523 |
if not glhf_api_key:
|
524 |
return "Error: GLHF API key required", None
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
hf_custom_model if hf_model_choice ==
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
|
534 |
else:
|
535 |
return "Error: Invalid model selection", None
|
@@ -583,7 +596,7 @@ def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_
|
|
583 |
|
584 |
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
585 |
api_key: str, use_rate_limit: bool = False) -> str:
|
586 |
-
"""Send prompt to GLHF API with model selection
|
587 |
def _send():
|
588 |
try:
|
589 |
import openai
|
@@ -594,42 +607,22 @@ def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model:
|
|
594 |
|
595 |
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
596 |
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
)
|
607 |
-
return completion.choices[0].message.content
|
608 |
-
except Exception as non_stream_error:
|
609 |
-
logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
|
610 |
-
|
611 |
-
# Fallback to streaming if needed
|
612 |
-
completion = client.chat.completions.create(
|
613 |
-
stream=True,
|
614 |
-
model=model_id,
|
615 |
-
messages=[
|
616 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
617 |
-
{"role": "user", "content": prompt}
|
618 |
-
],
|
619 |
-
)
|
620 |
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
response_text.append(chunk.choices[0].delta.content)
|
626 |
-
except Exception as stream_error:
|
627 |
-
if response_text: # If we got partial response, return it
|
628 |
-
logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
|
629 |
-
return "".join(response_text)
|
630 |
-
raise # Re-raise if we got nothing
|
631 |
|
632 |
-
|
633 |
|
634 |
except Exception as e:
|
635 |
logging.error(f"GLHF API error: {e}")
|
@@ -702,41 +695,27 @@ def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate
|
|
702 |
|
703 |
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
704 |
|
705 |
-
def
|
706 |
-
|
707 |
-
"""Send prompt to GLHF API with model selection."""
|
708 |
def _send():
|
709 |
try:
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
stream=True,
|
721 |
-
model=model_id,
|
722 |
-
messages=[
|
723 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
724 |
-
{"role": "user", "content": prompt}
|
725 |
-
],
|
726 |
)
|
727 |
-
|
728 |
-
response_text = []
|
729 |
-
for chunk in completion:
|
730 |
-
if chunk.choices[0].delta.content is not None:
|
731 |
-
response_text.append(chunk.choices[0].delta.content)
|
732 |
-
|
733 |
-
return "".join(response_text)
|
734 |
-
|
735 |
except Exception as e:
|
736 |
-
logging.error(f"
|
737 |
-
return f"Error with
|
738 |
|
739 |
-
return apply_rate_limit(_send,
|
740 |
|
741 |
def estimate_tokens(text: str) -> int:
|
742 |
"""Rough token estimation: ~4 characters per token on average"""
|
@@ -1342,20 +1321,20 @@ with gr.Blocks(css="""
|
|
1342 |
send_to_model_btn.click(
|
1343 |
fn=send_to_model,
|
1344 |
inputs=[
|
1345 |
-
generated_prompt,
|
1346 |
-
model_choice,
|
1347 |
-
hf_model,
|
1348 |
-
hf_custom_model,
|
1349 |
-
hf_api_key,
|
1350 |
-
groq_model,
|
1351 |
-
groq_api_key,
|
1352 |
-
openai_api_key,
|
1353 |
-
openai_model,
|
1354 |
-
cohere_api_key,
|
1355 |
-
cohere_model,
|
1356 |
-
glhf_api_key,
|
1357 |
-
glhf_model,
|
1358 |
-
glhf_custom_model
|
1359 |
],
|
1360 |
outputs=[summary_output, download_summary]
|
1361 |
)
|
|
|
109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
111 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
112 |
+
"microsoft/Phi-3-mini-128k-instruct": 131072,
|
113 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
115 |
"google/gemma-2-2b-it": 2048,
|
116 |
"microsoft/phi-2": 2048,
|
|
|
117 |
}
|
118 |
}
|
119 |
|
|
|
521 |
elif model_selection == "GLHF API":
|
522 |
if not glhf_api_key:
|
523 |
return "Error: GLHF API key required", None
|
524 |
+
|
525 |
+
# Determine the actual model ID to use
|
526 |
+
if glhf_model == "Use HuggingFace Model":
|
527 |
+
model_id = f"hf:{hf_custom_model if hf_model_choice == 'Custom Model' else model_registry.hf_models[hf_model_choice]}"
|
528 |
+
else:
|
529 |
+
model_id = f"hf:{glhf_custom_model}"
|
530 |
+
|
531 |
+
summary = send_to_glhf(prompt, glhf_api_key, model_id, use_rate_limits)
|
532 |
+
|
533 |
+
if not summary:
|
534 |
+
return "Error: No response from model", None
|
535 |
+
|
536 |
+
if not isinstance(summary, str):
|
537 |
+
return "Error: Invalid response type from model", None
|
538 |
+
|
539 |
+
# Create download file for valid responses
|
540 |
+
if not summary.startswith("Error"):
|
541 |
+
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as f:
|
542 |
+
f.write(summary)
|
543 |
+
return summary, f.name
|
544 |
+
|
545 |
+
return summary, None
|
546 |
|
547 |
else:
|
548 |
return "Error: Invalid model selection", None
|
|
|
596 |
|
597 |
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
598 |
api_key: str, use_rate_limit: bool = False) -> str:
|
599 |
+
"""Send prompt to GLHF API with model selection."""
|
600 |
def _send():
|
601 |
try:
|
602 |
import openai
|
|
|
607 |
|
608 |
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
609 |
|
610 |
+
# For GLHF, always use streaming for reliability
|
611 |
+
completion = client.chat.completions.create(
|
612 |
+
stream=True,
|
613 |
+
model=model_id,
|
614 |
+
messages=[
|
615 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
616 |
+
{"role": "user", "content": prompt}
|
617 |
+
],
|
618 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
|
620 |
+
response_text = []
|
621 |
+
for chunk in completion:
|
622 |
+
if chunk.choices[0].delta.content is not None:
|
623 |
+
response_text.append(chunk.choices[0].delta.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
624 |
|
625 |
+
return "".join(response_text)
|
626 |
|
627 |
except Exception as e:
|
628 |
logging.error(f"GLHF API error: {e}")
|
|
|
695 |
|
696 |
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
697 |
|
698 |
+
def send_to_groq(prompt: str, model_name: str, api_key: str, use_rate_limit: bool = False) -> str:
|
699 |
+
"""Send prompt to Groq API."""
|
|
|
700 |
def _send():
|
701 |
try:
|
702 |
+
client = Groq(api_key=api_key)
|
703 |
+
response = client.chat.completions.create(
|
704 |
+
model=model_name,
|
705 |
+
messages=[{
|
706 |
+
"role": "user",
|
707 |
+
"content": prompt
|
708 |
+
}],
|
709 |
+
temperature=0.7,
|
710 |
+
max_tokens=500,
|
711 |
+
top_p=0.95
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
)
|
713 |
+
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
except Exception as e:
|
715 |
+
logging.error(f"Groq API error: {e}")
|
716 |
+
return f"Error with Groq API: {str(e)}"
|
717 |
|
718 |
+
return apply_rate_limit(_send, 4) if use_rate_limit else _send()
|
719 |
|
720 |
def estimate_tokens(text: str) -> int:
|
721 |
"""Rough token estimation: ~4 characters per token on average"""
|
|
|
1321 |
send_to_model_btn.click(
|
1322 |
fn=send_to_model,
|
1323 |
inputs=[
|
1324 |
+
generated_prompt, # prompt
|
1325 |
+
model_choice, # model_selection
|
1326 |
+
hf_model, # hf_model_choice
|
1327 |
+
hf_custom_model, # hf_custom_model
|
1328 |
+
hf_api_key, # hf_api_key
|
1329 |
+
groq_model, # groq_model_choice
|
1330 |
+
groq_api_key, # groq_api_key
|
1331 |
+
openai_api_key, # openai_api_key
|
1332 |
+
openai_model, # openai_model_choice
|
1333 |
+
cohere_api_key, # cohere_api_key
|
1334 |
+
cohere_model, # cohere_model
|
1335 |
+
glhf_api_key, # glhf_api_key
|
1336 |
+
glhf_model, # glhf_model
|
1337 |
+
glhf_custom_model # glhf_custom_model
|
1338 |
],
|
1339 |
outputs=[summary_output, download_summary]
|
1340 |
)
|