update
Browse files- llm.py +12 -14
- pages_helpers.py +1 -0
llm.py
CHANGED
@@ -11,6 +11,8 @@ TKNZ_RATIO = 1
|
|
11 |
GEMINI_MODEL = 'gemini-1.5-pro-002'
|
12 |
FLASH_MODEL = 'gemini-1.5-flash-002'
|
13 |
|
|
|
|
|
14 |
# https://github.com/google-gemini/cookbook/blob/main/quickstarts/Prompting.ipynb
|
15 |
# https://github.com/google-gemini/cookbook/blob/main/quickstarts/Streaming.ipynb
|
16 |
import google.generativeai as genai # pip install -U -q google-generativeai
|
@@ -20,9 +22,9 @@ llm_log_filename = f"{location__}/.cache/llm.log"
|
|
20 |
genai.configure(api_key="AIzaSyAUeHVWLkYioIGk6PMbCTqk73PowHCIyPM")
|
21 |
|
22 |
GEMINI_CLIENT = genai.GenerativeModel(GEMINI_MODEL, \
|
23 |
-
generation_config=genai.GenerationConfig(
|
24 |
-
max_output_tokens=
|
25 |
-
temperature=TEMPERATURE
|
26 |
))
|
27 |
|
28 |
def chat(prompt, history=[], use_cache=False, stream=False):
|
@@ -78,20 +80,16 @@ elif thinker in "70b|405b":
|
|
78 |
|
79 |
# https://docs.together.ai/docs/chat-models#hosted-models
|
80 |
model = {
|
81 |
-
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo 128k
|
82 |
-
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo 128k
|
83 |
}[thinker]
|
84 |
|
85 |
-
model, CTXLEN
|
86 |
LLM_HOST = model
|
87 |
|
88 |
-
MAX_TOKENS = int(MAX_TOKENS[:-1])*1024
|
89 |
-
TKNZ_RATIO = float(TKNZ_RATIO)
|
90 |
-
|
91 |
CTXLEN = int(CTXLEN[:-1])
|
92 |
-
if CTXLEN > 64: CTXLEN = 64 # max
|
93 |
-
CTXLEN = CTXLEN*1024 -
|
94 |
-
# print(model, CTXLEN, MAX_TOKENS, TKNZ_RATIO); input(); # DEBUG
|
95 |
|
96 |
from together import Together
|
97 |
together_client = Together(api_key='adc0db56b77fe6508bdeadb4d8253771750a50639f8e87313153e49d4599f6ea')
|
@@ -103,7 +101,7 @@ elif thinker in "70b|405b":
|
|
103 |
return together_client.chat.completions.create(
|
104 |
model=model,
|
105 |
messages=[{"role": "user", "content": prompt}],
|
106 |
-
max_tokens=
|
107 |
temperature=TEMPERATURE,
|
108 |
top_p=0.7, top_k=50,
|
109 |
repetition_penalty=1.2, stop=stops,
|
@@ -129,7 +127,7 @@ elif thinker in "70b|405b":
|
|
129 |
response = Together(api_key=os.environ.get('TOGETHER_API_KEY')).chat.completions.create(
|
130 |
model=model,
|
131 |
messages=messages,
|
132 |
-
max_tokens=
|
133 |
temperature=TEMPERATURE,
|
134 |
top_p=0.7, top_k=50,
|
135 |
repetition_penalty=1.2, stop=stops,
|
|
|
11 |
GEMINI_MODEL = 'gemini-1.5-pro-002'
|
12 |
FLASH_MODEL = 'gemini-1.5-flash-002'
|
13 |
|
14 |
+
MAX_OUTPUT_TOKENS = 1024*8
|
15 |
+
|
16 |
# https://github.com/google-gemini/cookbook/blob/main/quickstarts/Prompting.ipynb
|
17 |
# https://github.com/google-gemini/cookbook/blob/main/quickstarts/Streaming.ipynb
|
18 |
import google.generativeai as genai # pip install -U -q google-generativeai
|
|
|
22 |
genai.configure(api_key="AIzaSyAUeHVWLkYioIGk6PMbCTqk73PowHCIyPM")
|
23 |
|
24 |
GEMINI_CLIENT = genai.GenerativeModel(GEMINI_MODEL, \
|
25 |
+
generation_config = genai.GenerationConfig(
|
26 |
+
max_output_tokens = MAX_OUTPUT_TOKENS,
|
27 |
+
temperature = TEMPERATURE,
|
28 |
))
|
29 |
|
30 |
def chat(prompt, history=[], use_cache=False, stream=False):
|
|
|
80 |
|
81 |
# https://docs.together.ai/docs/chat-models#hosted-models
|
82 |
model = {
|
83 |
+
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo 128k", # $3.50 / 1m tokens(*)
|
84 |
+
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo 128k", # $0.88 / 1m tokens(*)
|
85 |
}[thinker]
|
86 |
|
87 |
+
model, CTXLEN = model.strip().split()
|
88 |
LLM_HOST = model
|
89 |
|
|
|
|
|
|
|
90 |
CTXLEN = int(CTXLEN[:-1])
|
91 |
+
if CTXLEN > 64: CTXLEN = 64 # max 64k ctxlen
|
92 |
+
CTXLEN = CTXLEN*1024 - MAX_OUTPUT_TOKENS
|
|
|
93 |
|
94 |
from together import Together
|
95 |
together_client = Together(api_key='adc0db56b77fe6508bdeadb4d8253771750a50639f8e87313153e49d4599f6ea')
|
|
|
101 |
return together_client.chat.completions.create(
|
102 |
model=model,
|
103 |
messages=[{"role": "user", "content": prompt}],
|
104 |
+
max_tokens=MAX_OUTPUT_TOKENS,
|
105 |
temperature=TEMPERATURE,
|
106 |
top_p=0.7, top_k=50,
|
107 |
repetition_penalty=1.2, stop=stops,
|
|
|
127 |
response = Together(api_key=os.environ.get('TOGETHER_API_KEY')).chat.completions.create(
|
128 |
model=model,
|
129 |
messages=messages,
|
130 |
+
max_tokens=MAX_OUTPUT_TOKENS,
|
131 |
temperature=TEMPERATURE,
|
132 |
top_p=0.7, top_k=50,
|
133 |
repetition_penalty=1.2, stop=stops,
|
pages_helpers.py
CHANGED
@@ -569,4 +569,5 @@ https://arxiv.org/html/2409.10516v2
|
|
569 |
https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling
|
570 |
https://arxiv.org/html/2405.07863v2
|
571 |
https://arxiv.org/html/2406.12845
|
|
|
572 |
""".strip()
|
|
|
569 |
https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling
|
570 |
https://arxiv.org/html/2405.07863v2
|
571 |
https://arxiv.org/html/2406.12845
|
572 |
+
https://eugeneyan.com/writing/llm-evaluators
|
573 |
""".strip()
|