Muennighoff commited on
Commit
216d974
·
1 Parent(s): 64dd40c

Add more OpenAI models

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -158,15 +158,23 @@ EXTERNAL_MODELS = [
158
  "sentence-t5-xxl",
159
  "sup-simcse-bert-base-uncased",
160
  "text-similarity-ada-001",
161
- "text-search-ada-query-001",
162
- "text-search-ada-doc-001",
 
 
 
163
  "unsup-simcse-bert-base-uncased",
164
  ]
165
  EXTERNAL_MODEL_TO_LINK = {
166
  "LASER2": "https://github.com/facebookresearch/LASER",
167
  "text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
168
- "text-search-ada-query-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
169
  "text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
 
 
 
 
 
170
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
171
  "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
172
  "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
@@ -219,8 +227,15 @@ EXTERNAL_MODEL_TO_DIM = {
219
  "sentence-t5-xxl": 768,
220
  "sup-simcse-bert-base-uncased": 768,
221
  "text-similarity-ada-001": 1024,
 
 
 
222
  "text-search-ada-query-001": 1024,
223
- "text-search-ada-doc-001": 1024,
 
 
 
 
224
  "unsup-simcse-bert-base-uncased": 768,
225
  }
226
 
@@ -255,7 +270,7 @@ def add_task(examples):
255
  return examples
256
 
257
  for model in EXTERNAL_MODELS:
258
- ds = load_dataset("mteb/results", model)
259
  # For local debugging:
260
  #, download_mode='force_redownload', ignore_verifications=True)
261
  ds = ds.map(add_lang)
@@ -297,7 +312,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False
297
  res = {k: v for d in results_list for k, v in d.items()}
298
  # Model & at least one result
299
  if len(res) > 1:
300
- res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
 
301
  df_list.append(res)
302
 
303
  for model in models:
 
158
  "sentence-t5-xxl",
159
  "sup-simcse-bert-base-uncased",
160
  "text-similarity-ada-001",
161
+ "text-similarity-curie-001",
162
+ "text-search-ada-001",
163
+ "text-search-babbage-001",
164
+ "text-search-curie-001",
165
+ "text-search-davinci-001",
166
  "unsup-simcse-bert-base-uncased",
167
  ]
168
  EXTERNAL_MODEL_TO_LINK = {
169
  "LASER2": "https://github.com/facebookresearch/LASER",
170
  "text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
171
+ "text-similarity-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
172
  "text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
173
+ "text-search-ada-query-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
174
+ "text-search-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
175
+ "text-search-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
176
+ "text-search-babbage-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
177
+ "text-search-davinci-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
178
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
179
  "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
180
  "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
 
227
  "sentence-t5-xxl": 768,
228
  "sup-simcse-bert-base-uncased": 768,
229
  "text-similarity-ada-001": 1024,
230
+ "text-similarity-curie-001": 4096,
231
+
232
+ "text-search-ada-doc-001": 1024,
233
  "text-search-ada-query-001": 1024,
234
+ "text-search-ada-001": 1024,
235
+ "text-search-babbage-001": 2048,
236
+ "text-search-curie-001": 4096,
237
+ "text-search-davinci-001": 12288,
238
+
239
  "unsup-simcse-bert-base-uncased": 768,
240
  }
241
 
 
270
  return examples
271
 
272
  for model in EXTERNAL_MODELS:
273
+ ds = load_dataset("mteb/results", model, download_mode='force_redownload', ignore_verifications=True)
274
  # For local debugging:
275
  #, download_mode='force_redownload', ignore_verifications=True)
276
  ds = ds.map(add_lang)
 
312
  res = {k: v for d in results_list for k, v in d.items()}
313
  # Model & at least one result
314
  if len(res) > 1:
315
+ if add_emb_dim:
316
+ res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
317
  df_list.append(res)
318
 
319
  for model in models: