tiendung commited on
Commit
8119571
·
1 Parent(s): c960787
Files changed (2) hide show
  1. llm.py +12 -14
  2. pages_helpers.py +1 -0
llm.py CHANGED
@@ -11,6 +11,8 @@ TKNZ_RATIO = 1
11
  GEMINI_MODEL = 'gemini-1.5-pro-002'
12
  FLASH_MODEL = 'gemini-1.5-flash-002'
13
 
 
 
14
  # https://github.com/google-gemini/cookbook/blob/main/quickstarts/Prompting.ipynb
15
  # https://github.com/google-gemini/cookbook/blob/main/quickstarts/Streaming.ipynb
16
  import google.generativeai as genai # pip install -U -q google-generativeai
@@ -20,9 +22,9 @@ llm_log_filename = f"{location__}/.cache/llm.log"
20
  genai.configure(api_key="AIzaSyAUeHVWLkYioIGk6PMbCTqk73PowHCIyPM")
21
 
22
  GEMINI_CLIENT = genai.GenerativeModel(GEMINI_MODEL, \
23
- generation_config=genai.GenerationConfig(
24
- max_output_tokens=1024*4,
25
- temperature=TEMPERATURE
26
  ))
27
 
28
  def chat(prompt, history=[], use_cache=False, stream=False):
@@ -78,20 +80,16 @@ elif thinker in "70b|405b":
78
 
79
  # https://docs.together.ai/docs/chat-models#hosted-models
80
  model = {
81
- "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo 128k 4k 1.2", # $3.50 / 1m tokens(*)
82
- "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo 128k 4k 1.2", # $0.88 / 1m tokens(*)
83
  }[thinker]
84
 
85
- model, CTXLEN, MAX_TOKENS, TKNZ_RATIO = model.strip().split()
86
  LLM_HOST = model
87
 
88
- MAX_TOKENS = int(MAX_TOKENS[:-1])*1024
89
- TKNZ_RATIO = float(TKNZ_RATIO)
90
-
91
  CTXLEN = int(CTXLEN[:-1])
92
- if CTXLEN > 64: CTXLEN = 64 # max 32k ctxlen
93
- CTXLEN = CTXLEN*1024 - MAX_TOKENS
94
- # print(model, CTXLEN, MAX_TOKENS, TKNZ_RATIO); input(); # DEBUG
95
 
96
  from together import Together
97
  together_client = Together(api_key='adc0db56b77fe6508bdeadb4d8253771750a50639f8e87313153e49d4599f6ea')
@@ -103,7 +101,7 @@ elif thinker in "70b|405b":
103
  return together_client.chat.completions.create(
104
  model=model,
105
  messages=[{"role": "user", "content": prompt}],
106
- max_tokens=MAX_TOKENS,
107
  temperature=TEMPERATURE,
108
  top_p=0.7, top_k=50,
109
  repetition_penalty=1.2, stop=stops,
@@ -129,7 +127,7 @@ elif thinker in "70b|405b":
129
  response = Together(api_key=os.environ.get('TOGETHER_API_KEY')).chat.completions.create(
130
  model=model,
131
  messages=messages,
132
- max_tokens=MAX_TOKENS,
133
  temperature=TEMPERATURE,
134
  top_p=0.7, top_k=50,
135
  repetition_penalty=1.2, stop=stops,
 
11
  GEMINI_MODEL = 'gemini-1.5-pro-002'
12
  FLASH_MODEL = 'gemini-1.5-flash-002'
13
 
14
+ MAX_OUTPUT_TOKENS = 1024*8
15
+
16
  # https://github.com/google-gemini/cookbook/blob/main/quickstarts/Prompting.ipynb
17
  # https://github.com/google-gemini/cookbook/blob/main/quickstarts/Streaming.ipynb
18
  import google.generativeai as genai # pip install -U -q google-generativeai
 
22
  genai.configure(api_key="AIzaSyAUeHVWLkYioIGk6PMbCTqk73PowHCIyPM")
23
 
24
  GEMINI_CLIENT = genai.GenerativeModel(GEMINI_MODEL, \
25
+ generation_config = genai.GenerationConfig(
26
+ max_output_tokens = MAX_OUTPUT_TOKENS,
27
+ temperature = TEMPERATURE,
28
  ))
29
 
30
  def chat(prompt, history=[], use_cache=False, stream=False):
 
80
 
81
  # https://docs.together.ai/docs/chat-models#hosted-models
82
  model = {
83
+ "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo 128k", # $3.50 / 1m tokens(*)
84
+ "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo 128k", # $0.88 / 1m tokens(*)
85
  }[thinker]
86
 
87
+ model, CTXLEN = model.strip().split()
88
  LLM_HOST = model
89
 
 
 
 
90
  CTXLEN = int(CTXLEN[:-1])
91
+ if CTXLEN > 64: CTXLEN = 64 # max 64k ctxlen
92
+ CTXLEN = CTXLEN*1024 - MAX_OUTPUT_TOKENS
 
93
 
94
  from together import Together
95
  together_client = Together(api_key='adc0db56b77fe6508bdeadb4d8253771750a50639f8e87313153e49d4599f6ea')
 
101
  return together_client.chat.completions.create(
102
  model=model,
103
  messages=[{"role": "user", "content": prompt}],
104
+ max_tokens=MAX_OUTPUT_TOKENS,
105
  temperature=TEMPERATURE,
106
  top_p=0.7, top_k=50,
107
  repetition_penalty=1.2, stop=stops,
 
127
  response = Together(api_key=os.environ.get('TOGETHER_API_KEY')).chat.completions.create(
128
  model=model,
129
  messages=messages,
130
+ max_tokens=MAX_OUTPUT_TOKENS,
131
  temperature=TEMPERATURE,
132
  top_p=0.7, top_k=50,
133
  repetition_penalty=1.2, stop=stops,
pages_helpers.py CHANGED
@@ -569,4 +569,5 @@ https://arxiv.org/html/2409.10516v2
569
  https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling
570
  https://arxiv.org/html/2405.07863v2
571
  https://arxiv.org/html/2406.12845
 
572
  """.strip()
 
569
  https://rlhflow.github.io/posts/2024-05-29-multi-objective-reward-modeling
570
  https://arxiv.org/html/2405.07863v2
571
  https://arxiv.org/html/2406.12845
572
+ https://eugeneyan.com/writing/llm-evaluators
573
  """.strip()