Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,8 @@ import logging
|
|
12 |
import webbrowser
|
13 |
from huggingface_hub import InferenceClient
|
14 |
from typing import Dict, List, Optional, Tuple
|
|
|
|
|
15 |
import time
|
16 |
from groq import Groq # Import the Groq client
|
17 |
|
@@ -63,7 +65,6 @@ MODEL_CONTEXT_SIZES = {
|
|
63 |
"microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
|
64 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
65 |
"deepseek-ai/DeepSeek-Coder-V2-Instruct": 8192,
|
66 |
-
"meta-llama/Llama-3-8b-Instruct": 8192,
|
67 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
68 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
69 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
@@ -103,30 +104,60 @@ MODEL_CONTEXT_SIZES = {
|
|
103 |
"command-light-nightly": 4096,
|
104 |
"c4ai-aya-expanse-8b": 8192,
|
105 |
"c4ai-aya-expanse-32b": 131072,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
}
|
107 |
}
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
class ModelRegistry:
|
110 |
def __init__(self):
|
111 |
# HuggingFace Models
|
112 |
self.hf_models = {
|
113 |
-
"
|
114 |
-
"
|
115 |
-
"Zephyr 7B
|
116 |
-
"
|
117 |
-
"
|
118 |
-
"
|
119 |
-
"
|
120 |
-
"
|
121 |
-
"
|
122 |
-
"
|
123 |
-
"
|
124 |
-
"
|
125 |
-
"Gemma 2 2B": "google/gemma-2-2b-it", # Added
|
126 |
-
"GPT2": "openai-community/gpt2", # Added
|
127 |
-
"Phi-2": "microsoft/phi-2", # Added
|
128 |
-
"TinyLlama 1.1B": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", # Added
|
129 |
-
"Custom Model": "" # Keep for custom models
|
130 |
}
|
131 |
|
132 |
# Default Groq Models
|
@@ -369,6 +400,16 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
369 |
return "Error: Groq API key required", None
|
370 |
elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
|
371 |
return "Error: OpenAI API key required", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
# Call implementation with error handling
|
374 |
try:
|
@@ -408,8 +449,9 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
408 |
logging.info("send to model completed.")
|
409 |
|
410 |
def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
|
411 |
-
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice
|
412 |
-
|
|
|
413 |
logging.info("send to model impl commencing...")
|
414 |
|
415 |
try:
|
@@ -417,12 +459,10 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
417 |
return "Text copied to clipboard. Use paste for processing.", None
|
418 |
|
419 |
if model_selection == "HuggingFace Inference":
|
420 |
-
# First try without API key
|
421 |
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
422 |
summary = send_to_hf_inference(prompt, model_id)
|
423 |
-
if summary.startswith("Error"):
|
424 |
-
|
425 |
-
summary = send_to_hf_inference(prompt, model_id, hf_api_key)
|
426 |
|
427 |
elif model_selection == "Groq API":
|
428 |
summary = send_to_groq(prompt, groq_model_choice, groq_api_key)
|
@@ -431,7 +471,13 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
431 |
summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice)
|
432 |
|
433 |
elif model_selection == "Cohere API":
|
434 |
-
summary = send_to_cohere(prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
|
436 |
else:
|
437 |
return "Error: Invalid model selection", None
|
@@ -458,6 +504,11 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
458 |
def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None) -> str:
|
459 |
"""Send prompt to HuggingFace Inference API with optional authentication."""
|
460 |
try:
|
|
|
|
|
|
|
|
|
|
|
461 |
client = InferenceClient(token=api_key) if api_key else InferenceClient()
|
462 |
response = client.text_generation(
|
463 |
prompt,
|
@@ -556,26 +607,92 @@ def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo") -> s
|
|
556 |
logging.error(f"OpenAI API error: {e}")
|
557 |
raise # Re-raise to be handled by caller
|
558 |
|
|
|
559 |
def send_to_cohere(prompt: str, api_key: str = None) -> str:
|
560 |
-
"""Send prompt to Cohere API with
|
561 |
try:
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
return response.text
|
573 |
-
else:
|
574 |
-
return "Error: No response text from Cohere"
|
575 |
|
576 |
except Exception as e:
|
577 |
logging.error(f"Cohere API error: {e}")
|
578 |
-
return f"Error with Cohere API: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
|
580 |
def copy_text_js(element_id: str) -> str:
|
581 |
return f"""function() {{
|
@@ -808,6 +925,50 @@ with gr.Blocks(css="""
|
|
808 |
type="password"
|
809 |
)
|
810 |
groq_refresh_btn = gr.Button("π Refresh Groq Models") # Add refresh button
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
811 |
|
812 |
send_to_model_btn = gr.Button("π Send to Model", variant="primary")
|
813 |
open_chatgpt_button = gr.Button("π Open ChatGPT")
|
@@ -861,7 +1022,9 @@ with gr.Blocks(css="""
|
|
861 |
return (
|
862 |
gr.update(visible=choice == "HuggingFace Inference"),
|
863 |
gr.update(visible=choice == "Groq API"),
|
864 |
-
gr.update(visible=choice == "OpenAI ChatGPT")
|
|
|
|
|
865 |
)
|
866 |
|
867 |
def refresh_groq_models_list():
|
@@ -1093,7 +1256,12 @@ with gr.Blocks(css="""
|
|
1093 |
groq_model,
|
1094 |
groq_api_key,
|
1095 |
openai_api_key,
|
1096 |
-
openai_model
|
|
|
|
|
|
|
|
|
|
|
1097 |
],
|
1098 |
outputs=[summary_output, download_summary]
|
1099 |
)
|
|
|
12 |
import webbrowser
|
13 |
from huggingface_hub import InferenceClient
|
14 |
from typing import Dict, List, Optional, Tuple
|
15 |
+
from functools import wraps
|
16 |
+
import threading
|
17 |
import time
|
18 |
from groq import Groq # Import the Groq client
|
19 |
|
|
|
65 |
"microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
|
66 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
67 |
"deepseek-ai/DeepSeek-Coder-V2-Instruct": 8192,
|
|
|
68 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
69 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
70 |
"microsoft/Phi-3.5-mini-instruct": 4096,
|
|
|
104 |
"command-light-nightly": 4096,
|
105 |
"c4ai-aya-expanse-8b": 8192,
|
106 |
"c4ai-aya-expanse-32b": 131072,
|
107 |
+
},
|
108 |
+
"GLHF API": {
|
109 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
110 |
+
"microsoft/phi-3-mini-4k-instruct": 4096,
|
111 |
+
"microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
|
112 |
+
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
113 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
114 |
+
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
115 |
+
"microsoft/Phi-3.5-mini-instruct": 4096,
|
116 |
+
"google/gemma-2-2b-it": 2048,
|
117 |
+
"microsoft/phi-2": 2048,
|
118 |
+
# Add other model contexts here
|
119 |
}
|
120 |
}
|
121 |
|
122 |
+
class RateLimit:
|
123 |
+
def __init__(self, calls_per_min):
|
124 |
+
self.calls_per_min = calls_per_min
|
125 |
+
self.calls = []
|
126 |
+
self.lock = threading.Lock()
|
127 |
+
|
128 |
+
def __call__(self, func):
|
129 |
+
@wraps(func)
|
130 |
+
def wrapped(*args, **kwargs):
|
131 |
+
with self.lock:
|
132 |
+
now = time.time()
|
133 |
+
# Remove old calls
|
134 |
+
self.calls = [call for call in self.calls if call > now - 60]
|
135 |
+
|
136 |
+
if len(self.calls) >= self.calls_per_min:
|
137 |
+
sleep_time = self.calls[0] - (now - 60)
|
138 |
+
if sleep_time > 0:
|
139 |
+
time.sleep(sleep_time)
|
140 |
+
|
141 |
+
self.calls.append(now)
|
142 |
+
return func(*args, **kwargs)
|
143 |
+
return wrapped
|
144 |
+
|
145 |
class ModelRegistry:
|
146 |
def __init__(self):
|
147 |
# HuggingFace Models
|
148 |
self.hf_models = {
|
149 |
+
"Mixtral 7B": "mistralai/Mistral-7B-Instruct-v0.3", # works well
|
150 |
+
"Nous-Hermes": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", # works well
|
151 |
+
"Zephyr 7B": "HuggingFaceH4/zephyr-7b-beta", # works
|
152 |
+
"Phi-3.5 Mini": "microsoft/Phi-3.5-mini-instruct", # works but poor results
|
153 |
+
"Gemma 2 2B": "google/gemma-2-2b-it", # works but often busy
|
154 |
+
"GPT2": "openai-community/gpt2", # works with token limits
|
155 |
+
"Phi-2": "microsoft/phi-2", # works with token limits
|
156 |
+
"TinyLlama 1.1B": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", # works with token limits
|
157 |
+
"DeepSeek Coder V2 (Pro)": "deepseek-ai/DeepSeek-Coder-V2-Instruct", # needs API key
|
158 |
+
"Meta Llama 3.1 70B (Pro)": "meta-llama/Meta-Llama-3.1-70B-Instruct", # needs API key
|
159 |
+
"Aya 23-35B (Pro)": "CohereForAI/aya-23-35B", # needs API key
|
160 |
+
"Custom Model": ""
|
|
|
|
|
|
|
|
|
|
|
161 |
}
|
162 |
|
163 |
# Default Groq Models
|
|
|
400 |
return "Error: Groq API key required", None
|
401 |
elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
|
402 |
return "Error: OpenAI API key required", None
|
403 |
+
elif model_selection == "GLHF API":
|
404 |
+
if not glhf_api_key:
|
405 |
+
return "Error: GLHF API key required", None
|
406 |
+
if glhf_model == "Use HuggingFace Model":
|
407 |
+
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
408 |
+
summary = send_to_glhf(prompt, True, model_id, "", glhf_api_key)
|
409 |
+
else:
|
410 |
+
if not glhf_custom_model.strip():
|
411 |
+
return "Error: Custom model ID required", None
|
412 |
+
summary = send_to_glhf(prompt, False, "", glhf_custom_model.strip(), glhf_api_key)
|
413 |
|
414 |
# Call implementation with error handling
|
415 |
try:
|
|
|
449 |
logging.info("send to model completed.")
|
450 |
|
451 |
def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
|
452 |
+
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
|
453 |
+
cohere_api_key=None, cohere_model=None, glhf_api_key=None):
|
454 |
+
"""Implementation of model sending with all providers."""
|
455 |
logging.info("send to model impl commencing...")
|
456 |
|
457 |
try:
|
|
|
459 |
return "Text copied to clipboard. Use paste for processing.", None
|
460 |
|
461 |
if model_selection == "HuggingFace Inference":
|
|
|
462 |
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
463 |
summary = send_to_hf_inference(prompt, model_id)
|
464 |
+
if summary.startswith("Error") and hf_api_key:
|
465 |
+
summary = send_to_hf_inference(prompt, model_id, hf_api_key)
|
|
|
466 |
|
467 |
elif model_selection == "Groq API":
|
468 |
summary = send_to_groq(prompt, groq_model_choice, groq_api_key)
|
|
|
471 |
summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice)
|
472 |
|
473 |
elif model_selection == "Cohere API":
|
474 |
+
summary = send_to_cohere(prompt, cohere_api_key, cohere_model)
|
475 |
+
|
476 |
+
elif model_selection == "GLHF API":
|
477 |
+
if not glhf_api_key:
|
478 |
+
return "Error: GLHF API key required", None
|
479 |
+
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
480 |
+
summary = send_to_glhf(prompt, model_id, glhf_api_key)
|
481 |
|
482 |
else:
|
483 |
return "Error: Invalid model selection", None
|
|
|
504 |
def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None) -> str:
|
505 |
"""Send prompt to HuggingFace Inference API with optional authentication."""
|
506 |
try:
|
507 |
+
# Check token limits first
|
508 |
+
is_within_limits, error_msg = check_token_limits(prompt, model_name)
|
509 |
+
if not is_within_limits:
|
510 |
+
return error_msg
|
511 |
+
|
512 |
client = InferenceClient(token=api_key) if api_key else InferenceClient()
|
513 |
response = client.text_generation(
|
514 |
prompt,
|
|
|
607 |
logging.error(f"OpenAI API error: {e}")
|
608 |
raise # Re-raise to be handled by caller
|
609 |
|
610 |
+
@RateLimit(calls_per_min=16) # 80% of 20 calls/min
|
611 |
def send_to_cohere(prompt: str, api_key: str = None) -> str:
|
612 |
+
"""Send prompt to Cohere API with V2 and V1 fallback."""
|
613 |
try:
|
614 |
+
# Try V2 first
|
615 |
+
try:
|
616 |
+
import cohere
|
617 |
+
client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
|
618 |
+
response = client.chat(
|
619 |
+
model="command-r-plus-08-2024", # Using latest model
|
620 |
+
messages=[{
|
621 |
+
"role": "user",
|
622 |
+
"content": prompt
|
623 |
+
}],
|
624 |
+
temperature=0.7,
|
625 |
+
)
|
626 |
+
return response.message.content[0].text
|
627 |
+
except Exception as v2_error:
|
628 |
+
logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
|
629 |
+
|
630 |
+
# Fallback to V1
|
631 |
+
client = cohere.Client(api_key) if api_key else cohere.Client()
|
632 |
+
response = client.chat(
|
633 |
+
message=prompt,
|
634 |
+
temperature=0.7,
|
635 |
+
max_tokens=500,
|
636 |
+
)
|
637 |
return response.text
|
|
|
|
|
638 |
|
639 |
except Exception as e:
|
640 |
logging.error(f"Cohere API error: {e}")
|
641 |
+
return f"Error with Cohere API: {str(e)}"
|
642 |
+
|
643 |
+
@RateLimit(calls_per_min=384) # 80% of 480/8hours = 60/hour = 1/min
|
644 |
+
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str, api_key: str) -> str:
|
645 |
+
"""Send prompt to GLHF API with model selection."""
|
646 |
+
try:
|
647 |
+
import openai
|
648 |
+
|
649 |
+
client = openai.OpenAI(
|
650 |
+
api_key=api_key,
|
651 |
+
base_url="https://glhf.chat/api/openai/v1",
|
652 |
+
)
|
653 |
+
|
654 |
+
# Select model based on user choice
|
655 |
+
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
656 |
+
|
657 |
+
# Always use streaming for reliability
|
658 |
+
completion = client.chat.completions.create(
|
659 |
+
stream=True,
|
660 |
+
model=model_id,
|
661 |
+
messages=[
|
662 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
663 |
+
{"role": "user", "content": prompt}
|
664 |
+
],
|
665 |
+
)
|
666 |
+
|
667 |
+
response_text = []
|
668 |
+
for chunk in completion:
|
669 |
+
if chunk.choices[0].delta.content is not None:
|
670 |
+
response_text.append(chunk.choices[0].delta.content)
|
671 |
+
|
672 |
+
return "".join(response_text)
|
673 |
+
|
674 |
+
except Exception as e:
|
675 |
+
logging.error(f"GLHF API error: {e}")
|
676 |
+
return f"Error with GLHF API: {str(e)}"
|
677 |
+
|
678 |
+
def estimate_tokens(text: str) -> int:
|
679 |
+
"""Rough token estimation: ~4 characters per token on average"""
|
680 |
+
return len(text) // 4
|
681 |
+
|
682 |
+
def check_token_limits(prompt: str, model_name: str) -> tuple[bool, str]:
|
683 |
+
"""Check if prompt might exceed model's token limits."""
|
684 |
+
token_limited_models = {
|
685 |
+
"openai-community/gpt2": 1500, # 2048 - buffer
|
686 |
+
"microsoft/phi-2": 1500,
|
687 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": 1500
|
688 |
+
}
|
689 |
+
|
690 |
+
if model_name in token_limited_models:
|
691 |
+
estimated_tokens = estimate_tokens(prompt)
|
692 |
+
max_tokens = token_limited_models[model_name]
|
693 |
+
if estimated_tokens > max_tokens:
|
694 |
+
return False, f"Prompt too long (estimated {estimated_tokens} tokens). This model supports max {max_tokens} tokens."
|
695 |
+
return True, ""
|
696 |
|
697 |
def copy_text_js(element_id: str) -> str:
|
698 |
return f"""function() {{
|
|
|
925 |
type="password"
|
926 |
)
|
927 |
groq_refresh_btn = gr.Button("π Refresh Groq Models") # Add refresh button
|
928 |
+
|
929 |
+
with gr.Column(visible=False) as glhf_options:
|
930 |
+
glhf_api_key = gr.Textbox(
|
931 |
+
label="π GLHF API Key",
|
932 |
+
type="password"
|
933 |
+
)
|
934 |
+
glhf_model = gr.Radio(
|
935 |
+
choices=["Use HuggingFace Model", "Custom Model"],
|
936 |
+
value="Use HuggingFace Model",
|
937 |
+
label="Model Selection"
|
938 |
+
)
|
939 |
+
glhf_custom_model = gr.Textbox(
|
940 |
+
label="Custom Model ID (owner/model format)",
|
941 |
+
placeholder="e.g., mistralai/Mixtral-8x7B-Instruct-v0.2",
|
942 |
+
visible=False
|
943 |
+
)
|
944 |
+
|
945 |
+
# Add visibility toggle
|
946 |
+
def toggle_glhf_custom_model(choice):
|
947 |
+
return gr.update(visible=choice == "Custom Model")
|
948 |
+
|
949 |
+
glhf_model.change(
|
950 |
+
toggle_glhf_custom_model,
|
951 |
+
inputs=[glhf_model],
|
952 |
+
outputs=[glhf_custom_model]
|
953 |
+
)
|
954 |
+
|
955 |
+
# Update Cohere options container
|
956 |
+
with gr.Column(visible=False) as cohere_options:
|
957 |
+
cohere_api_key = gr.Textbox(
|
958 |
+
label="π Cohere API Key (optional - needed for some models)",
|
959 |
+
type="password"
|
960 |
+
)
|
961 |
+
cohere_model = gr.Dropdown(
|
962 |
+
choices=[
|
963 |
+
"command-r-plus-08-2024",
|
964 |
+
"command-r-plus-04-2024",
|
965 |
+
"command-r",
|
966 |
+
"command",
|
967 |
+
"command-light"
|
968 |
+
],
|
969 |
+
value="command-r-plus-08-2024",
|
970 |
+
label="Cohere Model"
|
971 |
+
)
|
972 |
|
973 |
send_to_model_btn = gr.Button("π Send to Model", variant="primary")
|
974 |
open_chatgpt_button = gr.Button("π Open ChatGPT")
|
|
|
1022 |
return (
|
1023 |
gr.update(visible=choice == "HuggingFace Inference"),
|
1024 |
gr.update(visible=choice == "Groq API"),
|
1025 |
+
gr.update(visible=choice == "OpenAI ChatGPT"),
|
1026 |
+
gr.update(visible=choice == "Cohere API"),
|
1027 |
+
gr.update(visible=choice == "GLHF API")
|
1028 |
)
|
1029 |
|
1030 |
def refresh_groq_models_list():
|
|
|
1256 |
groq_model,
|
1257 |
groq_api_key,
|
1258 |
openai_api_key,
|
1259 |
+
openai_model,
|
1260 |
+
cohere_api_key,
|
1261 |
+
cohere_model,
|
1262 |
+
glhf_api_key,
|
1263 |
+
glhf_model,
|
1264 |
+
glhf_custom_model
|
1265 |
],
|
1266 |
outputs=[summary_output, download_summary]
|
1267 |
)
|