update-readme
Browse files
README.md
CHANGED
@@ -37,7 +37,7 @@ license: llama3
|
|
37 |
import torch
|
38 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
39 |
device = "cuda"
|
40 |
-
path = "
|
41 |
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
42 |
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
43 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
@@ -83,10 +83,17 @@ for i in range(K):
|
|
83 |
attribute = attributes[top_obj_dims[example_index, i].item()]
|
84 |
coeff = top_obj_coeffs[example_index, i].item()
|
85 |
print(f"{attribute}: {round(coeff,5)}")
|
|
|
|
|
86 |
# code-complexity: 0.19922
|
87 |
# helpsteer-verbosity: -0.10864
|
88 |
# ultrafeedback-instruction_following: 0.07861
|
89 |
|
|
|
|
|
|
|
|
|
|
|
90 |
# The actual rewards of this example from the HelpSteer dataset
|
91 |
# are [3,3,4,2,2] for the five helpsteer objectives:
|
92 |
# helpfulness, correctness, coherence, complexity, verbosity
|
@@ -94,7 +101,8 @@ for i in range(K):
|
|
94 |
# original reward space to compare with the ground truth
|
95 |
helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
|
96 |
print(helpsteer_rewards_pred)
|
97 |
-
# [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
|
|
|
98 |
```
|
99 |
|
100 |
## Easy to use Pipeline
|
|
|
37 |
import torch
|
38 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
39 |
device = "cuda"
|
40 |
+
path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
|
41 |
model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
|
42 |
trust_remote_code=True, torch_dtype=torch.bfloat16)
|
43 |
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
|
|
|
83 |
attribute = attributes[top_obj_dims[example_index, i].item()]
|
84 |
coeff = top_obj_coeffs[example_index, i].item()
|
85 |
print(f"{attribute}: {round(coeff,5)}")
|
86 |
+
|
87 |
+
# Float16
|
88 |
# code-complexity: 0.19922
|
89 |
# helpsteer-verbosity: -0.10864
|
90 |
# ultrafeedback-instruction_following: 0.07861
|
91 |
|
92 |
+
# 4bit
|
93 |
+
# code-complexity: 0.19043
|
94 |
+
# helpsteer-verbosity: -0.11304
|
95 |
+
# ultrafeedback-instruction_following: 0.08203
|
96 |
+
|
97 |
# The actual rewards of this example from the HelpSteer dataset
|
98 |
# are [3,3,4,2,2] for the five helpsteer objectives:
|
99 |
# helpfulness, correctness, coherence, complexity, verbosity
|
|
|
101 |
# original reward space to compare with the ground truth
|
102 |
helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
|
103 |
print(helpsteer_rewards_pred)
|
104 |
+
# float16 [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
|
105 |
+
# 4bit [1.7754, 1.9316, 3.4062, 1.2773, 1.8438]
|
106 |
```
|
107 |
|
108 |
## Easy to use Pipeline
|