Yeyito commited on
Commit
ca453e8
·
1 Parent(s): 98d650b

Load tokenizer from parent model & app.py fixes

Browse files
app.py CHANGED
@@ -6,6 +6,8 @@ import time
6
  import pandas as pd
7
  from threading import Thread
8
  import numpy as np
 
 
9
 
10
  # Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
11
  project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
@@ -52,6 +54,9 @@ def save_to_txt(model, results, model_type,ref_model):
52
 
53
  with open(file_path, "a") as f:
54
  f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
 
 
 
55
  f.close()
56
 
57
  def run_test(model,ref_model,data):
@@ -88,7 +93,9 @@ def worker_thread():
88
  for submission in modelQueue:
89
  #evaluate(submission[1],submission[0].split(" ")[0],submission[2])
90
  #modelQueue.pop(modelQueue.index(submission))
91
-
 
 
92
  # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
93
  # I highly encourage you to try to reproduce the results I get using your own implementation.
94
  # Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
@@ -105,16 +112,10 @@ def queue(model,model_type,ref_model):
105
  file_path = "data/queue.csv"
106
  with open(file_path, "a") as f:
107
  model = model.strip()
 
108
  f.write(f"\n{model_type},{model},{ref_model}")
109
  f.close()
110
  print(f"QUEUE:\n{modelQueue}")
111
-
112
- eval_entry = {
113
- "model": model,
114
- "model_type": model_type,
115
- "ref_model": ref_model,
116
- }
117
-
118
 
119
  ### bigcode/bigcode-models-leaderboard
120
  def add_new_eval(
 
6
  import pandas as pd
7
  from threading import Thread
8
  import numpy as np
9
+ import discord
10
+ from discord.ext import commands
11
 
12
  # Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
13
  project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
 
54
 
55
  with open(file_path, "a") as f:
56
  f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
57
+
58
+ print(f"Finished evaluation of model: {model} using ref_model: {ref_model}")
59
+ print(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
60
  f.close()
61
 
62
  def run_test(model,ref_model,data):
 
93
  for submission in modelQueue:
94
  #evaluate(submission[1],submission[0].split(" ")[0],submission[2])
95
  #modelQueue.pop(modelQueue.index(submission))
96
+ #exit()
97
+
98
+ #The exit above is temporal while I figure out how to unload a model from a thread or similar.
99
  # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
100
  # I highly encourage you to try to reproduce the results I get using your own implementation.
101
  # Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
 
112
  file_path = "data/queue.csv"
113
  with open(file_path, "a") as f:
114
  model = model.strip()
115
+ ref_model = ref_model.strip()
116
  f.write(f"\n{model_type},{model},{ref_model}")
117
  f.close()
118
  print(f"QUEUE:\n{modelQueue}")
 
 
 
 
 
 
 
119
 
120
  ### bigcode/bigcode-models-leaderboard
121
  def add_new_eval(
detect-pretrain-code-contamination/src/run.py CHANGED
@@ -40,15 +40,11 @@ def load_data(filename):
40
  def unload_model(model,tokenizer):
41
  print("[X] Cannot unload model! Functionality not implemented!")
42
 
43
- def load_model(name1):
44
  if name1 not in models:
45
  model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
46
  model1.eval()
47
- if "mistral" in name1 or "Mistral" in name1: #Loading default mistral tokenizers as some tokenizers don't work out of the box.
48
- tokenizer1 = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
49
- else:
50
- tokenizer1 = AutoTokenizer.from_pretrained(name1)
51
-
52
  tokenizer1.pad_token = tokenizer1.eos_token
53
  models[name1] = model1
54
  models[name1 + "_tokenizer"] = tokenizer1
@@ -124,7 +120,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
124
  neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
125
  except:
126
  ### MODEL 2 likelihoods
127
- model2, tokenizer2 = load_model(ref_model)
128
  inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
129
  for ex in tqdm(test_data):
130
  text = ex[col_name]
@@ -147,7 +143,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
147
  print("Saved ref data, exiting.")
148
 
149
  ### MODEL 1 likelihoods
150
- model1, tokenizer1 = load_model(target_model)
151
  inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
152
  for ex in tqdm(test_data):
153
  text = ex[col_name]
@@ -155,7 +151,6 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
155
  inference1_pass.append(new_ex)
156
 
157
  ### RIMA results
158
- model1, tokenizer1 = load_model(target_model)
159
  counter = 0
160
  results = []
161
  for ex in tqdm(test_data):
 
40
  def unload_model(model,tokenizer):
41
  print("[X] Cannot unload model! Functionality not implemented!")
42
 
43
+ def load_model(name1,ref_model):
44
  if name1 not in models:
45
  model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
46
  model1.eval()
47
+ tokenizer1 = AutoTokenizer.from_pretrained(ref_model)
 
 
 
 
48
  tokenizer1.pad_token = tokenizer1.eos_token
49
  models[name1] = model1
50
  models[name1 + "_tokenizer"] = tokenizer1
 
120
  neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
121
  except:
122
  ### MODEL 2 likelihoods
123
+ model2, tokenizer2 = load_model(ref_model,ref_model)
124
  inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
125
  for ex in tqdm(test_data):
126
  text = ex[col_name]
 
143
  print("Saved ref data, exiting.")
144
 
145
  ### MODEL 1 likelihoods
146
+ model1, tokenizer1 = load_model(target_model,ref_model)
147
  inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
148
  for ex in tqdm(test_data):
149
  text = ex[col_name]
 
151
  inference1_pass.append(new_ex)
152
 
153
  ### RIMA results
 
154
  counter = 0
155
  results = []
156
  for ex in tqdm(test_data):