SteveTran commited on
Commit
cf7046b
·
verified ·
1 Parent(s): af3607c

update-readme

Browse files
Files changed (1) hide show
  1. README.md +10 -2
README.md CHANGED
@@ -37,7 +37,7 @@ license: llama3
37
  import torch
38
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
39
  device = "cuda"
40
- path = "RLHFlow/ArmoRM-Llama3-8B-v0.1"
41
  model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
42
  trust_remote_code=True, torch_dtype=torch.bfloat16)
43
  tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
@@ -83,10 +83,17 @@ for i in range(K):
83
  attribute = attributes[top_obj_dims[example_index, i].item()]
84
  coeff = top_obj_coeffs[example_index, i].item()
85
  print(f"{attribute}: {round(coeff,5)}")
 
 
86
  # code-complexity: 0.19922
87
  # helpsteer-verbosity: -0.10864
88
  # ultrafeedback-instruction_following: 0.07861
89
 
 
 
 
 
 
90
  # The actual rewards of this example from the HelpSteer dataset
91
  # are [3,3,4,2,2] for the five helpsteer objectives:
92
  # helpfulness, correctness, coherence, complexity, verbosity
@@ -94,7 +101,8 @@ for i in range(K):
94
  # original reward space to compare with the ground truth
95
  helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
96
  print(helpsteer_rewards_pred)
97
- # [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
 
98
  ```
99
 
100
  ## Easy to use Pipeline
 
37
  import torch
38
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
39
  device = "cuda"
40
+ path = "SteveTran/ArmoRM-Llama3-8B-v0.1-4bit"
41
  model = AutoModelForSequenceClassification.from_pretrained(path, device_map=device,
42
  trust_remote_code=True, torch_dtype=torch.bfloat16)
43
  tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
 
83
  attribute = attributes[top_obj_dims[example_index, i].item()]
84
  coeff = top_obj_coeffs[example_index, i].item()
85
  print(f"{attribute}: {round(coeff,5)}")
86
+
87
+ # Float16
88
  # code-complexity: 0.19922
89
  # helpsteer-verbosity: -0.10864
90
  # ultrafeedback-instruction_following: 0.07861
91
 
92
+ # 4bit
93
+ # code-complexity: 0.19043
94
+ # helpsteer-verbosity: -0.11304
95
+ # ultrafeedback-instruction_following: 0.08203
96
+
97
  # The actual rewards of this example from the HelpSteer dataset
98
  # are [3,3,4,2,2] for the five helpsteer objectives:
99
  # helpfulness, correctness, coherence, complexity, verbosity
 
101
  # original reward space to compare with the ground truth
102
  helpsteer_rewards_pred = multi_obj_rewards[0, :5] * 5 - 0.5
103
  print(helpsteer_rewards_pred)
104
+ # float16 [2.78125 2.859375 3.484375 1.3847656 1.296875 ]
105
+ # 4bit [1.7754, 1.9316, 3.4062, 1.2773, 1.8438]
106
  ```
107
 
108
  ## Easy to use Pipeline