Spaces:
Sleeping
Sleeping
import requests | |
import json | |
import os | |
from dotenv import load_dotenv; load_dotenv() # Load environment variables from .env file | |
def generate(query: str, system_prompt: str = "Talk Like Shakesphere" , model: str = "openai/gpt-4o", max_tokens: int = 4096, # For Simple Models | |
temperature: float = 0.85, frequency_penalty: float = 0.34, presence_penalty: float = 0.06, | |
repetition_penalty: float = 1.0, top_k: int = 0) -> str: | |
""" | |
Sends a request to the OpenRouter API and returns the generated text using the specified model. | |
Args: | |
query (str): The input query or prompt. | |
system_prompt (str, optional): A context or introduction to set the style or tone of the generated response. | |
Defaults to "Talk Like Shakespeare". | |
model (str, optional): The language model to use for generating the response. | |
Defaults to "openchat/openchat-7b". | |
max_tokens (int, optional): The maximum number of tokens to generate in the response. | |
Defaults to 8096. | |
temperature (float, optional): A parameter controlling the diversity of the generated response. | |
Higher values result in more diverse outputs. Defaults to 0.85. | |
frequency_penalty (float, optional): A penalty applied to tokens with low frequency in the training data. | |
Defaults to 0.34. | |
presence_penalty (float, optional): A penalty applied to tokens based on their presence in the prompt. | |
Defaults to 0.06. | |
repetition_penalty (float, optional): A penalty applied to repeated tokens in the generated response. | |
Defaults to 1.0. | |
top_k (int, optional): The number of highest probability tokens to consider at each step of generation. | |
Defaults to 0, meaning no restriction. | |
Returns: | |
str: The generated text. | |
Available models: | |
- Free: | |
- "openchat/openchat-7b" | |
- "huggingfaceh4/zephyr-7b-beta" | |
- "mistralai/mistral-7b-instruct:free" | |
- Flagship Opensource: | |
- "meta-llama/llama-3-8b-instruct:extended" | |
- "lynn/soliloquy-l3" | |
- "mistralai/mixtral-8x22b-instruct" | |
- "meta-llama/llama-3-70b-instruct:nitro" | |
- Premium: | |
- "openai/gpt-4" | |
- "openai/gpt-4-0314" | |
- "anthropic/claude-3-opus" | |
- "anthropic/claude-3-opus:beta" | |
- "openai/gpt-4-turbo" | |
""" | |
response = requests.post( | |
url="https://openrouter.ai/api/v1/chat/completions", | |
headers={ | |
"Authorization": f"Bearer {os.environ.get('OPENROUTER')}", | |
}, | |
data=json.dumps({ | |
"messages": [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": query}, | |
], | |
"model": model, | |
"max_tokens": max_tokens, | |
"temperature": temperature, | |
"frequency_penalty": frequency_penalty, | |
"presence_penalty": presence_penalty, | |
"repetition_penalty": repetition_penalty, | |
"top_k": top_k, | |
})) | |
try: return response.json()["choices"][0]["message"]["content"].strip() | |
except Exception as e: return f"Failed to Get Response\nError: {e}\nResponse: {response.text}" | |
if __name__ == "__main__": | |
# response = openrouter_api_call("Introdue yourself and tell me your name and who made you") | |
response = generate("are you gpt 4 or not. do you have access to realtime data. if not then till which time you have data of") | |
print(response) |