merge miaoran into forrest
Browse files- README.md +54 -1
- config.json +2 -1
- modeling_hhem_v2.py +9 -2
README.md
CHANGED
@@ -26,7 +26,11 @@ By "hallucinated" or "factually inconsistent", we mean that a text (hypothesis,
|
|
26 |
A common type of hallucination in RAG is **factual but hallucinated**.
|
27 |
For example, given the premise _"The capital of France is Berlin"_, the hypothesis _"The capital of France is Paris"_ is hallucinated -- although it is true in the world knowledge. This happens when LLMs do not generate content based on the textual data provided to them as part of the RAG retrieval process, but rather generate content based on their pre-trained knowledge.
|
28 |
|
29 |
-
## Using HHEM-2.1-Open
|
|
|
|
|
|
|
|
|
30 |
|
31 |
HHEM-2.1 has some breaking change from HHEM-1.0. Your previous code will not work anymore. While we are working on backward compatibility, please follow the new usage instructions below.
|
32 |
|
@@ -54,6 +58,55 @@ model.predict(pairs) # note the predict() method. Do not do model(pairs).
|
|
54 |
# tensor([0.0111, 0.6474, 0.1290, 0.8969, 0.1846, 0.0050, 0.0543])
|
55 |
```
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
You may run into a warning message that "Token indices sequence length is longer than the specified maximum sequence length". Please ignore this warning for now. It is a notification inherited from the foundation, T5-base.
|
58 |
|
59 |
Note that the order of a pair is important. For example, the 2nd and 3rd examples in the `pairs` list are consistent and hallucinated, respectively.
|
|
|
26 |
A common type of hallucination in RAG is **factual but hallucinated**.
|
27 |
For example, given the premise _"The capital of France is Berlin"_, the hypothesis _"The capital of France is Paris"_ is hallucinated -- although it is true in the world knowledge. This happens when LLMs do not generate content based on the textual data provided to them as part of the RAG retrieval process, but rather generate content based on their pre-trained knowledge.
|
28 |
|
29 |
+
## Using HHEM-2.1-Open with `transformers`
|
30 |
+
|
31 |
+
HHEM-2.1 has some breaking change from HHEM-1.0. Your previous code will not work anymore. While we are working on backward compatibility, please follow the new usage instructions below.
|
32 |
+
|
33 |
+
**Using with `Auto` class**
|
34 |
|
35 |
HHEM-2.1 has some breaking change from HHEM-1.0. Your previous code will not work anymore. While we are working on backward compatibility, please follow the new usage instructions below.
|
36 |
|
|
|
58 |
# tensor([0.0111, 0.6474, 0.1290, 0.8969, 0.1846, 0.0050, 0.0543])
|
59 |
```
|
60 |
|
61 |
+
|
62 |
+
**Using with `text-classification` pipeline**
|
63 |
+
|
64 |
+
Please note that when using `text-classification` pipeline for prediction, scores for two labels will be returned for each pair. The score for **consistent** label is the one that should be focused on.
|
65 |
+
|
66 |
+
```python
|
67 |
+
from transformers import pipeline, AutoTokenizer
|
68 |
+
|
69 |
+
pairs = [
|
70 |
+
("The capital of France is Berlin.", "The capital of France is Paris."),
|
71 |
+
('I am in California', 'I am in United States.'),
|
72 |
+
('I am in United States', 'I am in California.'),
|
73 |
+
("A person on a horse jumps over a broken down airplane.", "A person is outdoors, on a horse."),
|
74 |
+
("A boy is jumping on skateboard in the middle of a red bridge.", "The boy skates down the sidewalk on a red bridge"),
|
75 |
+
("A man with blond-hair, and a brown shirt drinking out of a public water fountain.", "A blond man wearing a brown shirt is reading a book."),
|
76 |
+
("Mark Wahlberg was a fan of Manny.", "Manny was a fan of Mark Wahlberg.")
|
77 |
+
]
|
78 |
+
|
79 |
+
# Apply prompt to pairs
|
80 |
+
prompt = "<pad> Determine if the hypothesis is true given the premise?\n\nPremise: {text1}\n\nHypothesis: {text2}"
|
81 |
+
input_pairs = [prompt.format(text1=pair[0], text2=pair[1]) for pair in pairs]
|
82 |
+
|
83 |
+
# Use text-classification pipeline to predict
|
84 |
+
classifier = pipeline(
|
85 |
+
"text-classification",
|
86 |
+
model='vectara/hallucination_evaluation_model',
|
87 |
+
tokenizer=AutoTokenizer.from_pretrained('google/flan-t5-base'),
|
88 |
+
trust_remote_code=True
|
89 |
+
)
|
90 |
+
classifier(input_pairs, return_all_scores=True)
|
91 |
+
|
92 |
+
# output
|
93 |
+
|
94 |
+
# [[{'label': 'hallucinated', 'score': 0.9889384508132935},
|
95 |
+
# {'label': 'consistent', 'score': 0.011061512865126133}],
|
96 |
+
# [{'label': 'hallucinated', 'score': 0.35263675451278687},
|
97 |
+
# {'label': 'consistent', 'score': 0.6473632454872131}],
|
98 |
+
# [{'label': 'hallucinated', 'score': 0.870982825756073},
|
99 |
+
# {'label': 'consistent', 'score': 0.1290171593427658}],
|
100 |
+
# [{'label': 'hallucinated', 'score': 0.1030581071972847},
|
101 |
+
# {'label': 'consistent', 'score': 0.8969419002532959}],
|
102 |
+
# [{'label': 'hallucinated', 'score': 0.8153750896453857},
|
103 |
+
# {'label': 'consistent', 'score': 0.18462494015693665}],
|
104 |
+
# [{'label': 'hallucinated', 'score': 0.9949689507484436},
|
105 |
+
# {'label': 'consistent', 'score': 0.005031010136008263}],
|
106 |
+
# [{'label': 'hallucinated', 'score': 0.9456764459609985},
|
107 |
+
# {'label': 'consistent', 'score': 0.05432349815964699}]]
|
108 |
+
```
|
109 |
+
|
110 |
You may run into a warning message that "Token indices sequence length is longer than the specified maximum sequence length". Please ignore this warning for now. It is a notification inherited from the foundation, T5-base.
|
111 |
|
112 |
Note that the order of a pair is important. For example, the 2nd and 3rd examples in the `pairs` list are consistent and hallucinated, respectively.
|
config.json
CHANGED
@@ -8,5 +8,6 @@
|
|
8 |
},
|
9 |
"model_type": "HHEMv2Config",
|
10 |
"torch_dtype": "float32",
|
11 |
-
"transformers_version": "4.39.3"
|
|
|
12 |
}
|
|
|
8 |
},
|
9 |
"model_type": "HHEMv2Config",
|
10 |
"torch_dtype": "float32",
|
11 |
+
"transformers_version": "4.39.3",
|
12 |
+
"id2label": {"0": "hallucinated", "1": "consistent"}
|
13 |
}
|
modeling_hhem_v2.py
CHANGED
@@ -45,8 +45,15 @@ class HHEMv2ForSequenceClassification(PreTrainedModel):
|
|
45 |
# combined_model = PeftModel.from_pretrained(base_model, checkpoint, is_trainable=False)
|
46 |
# self.t5 = combined_model
|
47 |
|
48 |
-
def forward(self, **kwargs):
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def predict(self, text_pairs):
|
52 |
tokenizer = self.tokenzier
|
|
|
45 |
# combined_model = PeftModel.from_pretrained(base_model, checkpoint, is_trainable=False)
|
46 |
# self.t5 = combined_model
|
47 |
|
48 |
+
def forward(self, **kwargs): # To cope with `text-classiication` pipeline
|
49 |
+
self.t5.eval()
|
50 |
+
with torch.no_grad():
|
51 |
+
outputs = self.t5(**kwargs)
|
52 |
+
logits = outputs.logits
|
53 |
+
logits = logits[:, 0, :]
|
54 |
+
outputs.logits = logits
|
55 |
+
return outputs
|
56 |
+
# return self.t5(**kwargs)
|
57 |
|
58 |
def predict(self, text_pairs):
|
59 |
tokenizer = self.tokenzier
|