Update README.md
Browse files
README.md
CHANGED
@@ -140,7 +140,7 @@ doc_inputs = processor(text=doc_texts, images=doc_image_inputs, videos=doc_video
|
|
140 |
doc_inputs = model.prepare_inputs_for_generation(**doc_inputs, use_cache=False)
|
141 |
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
142 |
with torch.no_grad():
|
143 |
-
output = model(**
|
144 |
doc_embeddings = get_embedding(output.hidden_states[-1], 1536) # adjust dimensionality for efficiency trade-off e.g. 512
|
145 |
|
146 |
```
|
@@ -161,12 +161,12 @@ for i in range(num_queries):
|
|
161 |
### Encode Document Text
|
162 |
This DSE checkpoint is warm-up with `Tevatron/msmarco-passage-aug`, thus the model can also effectively encode document as text input.
|
163 |
```python
|
164 |
-
|
165 |
"The llama (/ˈlɑːmə/; Spanish pronunciation: [ˈʎama] or [ˈʝama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.",
|
166 |
"Llama (acronym for Large Language Model Meta AI, and formerly stylized as LLaMA) is a family of autoregressive large language models (LLMs) released by Meta AI starting in February 2023.[2][3] The latest version is Llama 3.1, released in July 2024.[4]"
|
167 |
]
|
168 |
doc_messages = []
|
169 |
-
for doc in
|
170 |
message = [
|
171 |
{
|
172 |
'role': 'user',
|
@@ -186,7 +186,7 @@ doc_inputs = processor(text=doc_texts, images=doc_image_inputs, videos=doc_video
|
|
186 |
doc_inputs = model.prepare_inputs_for_generation(**doc_inputs, use_cache=False)
|
187 |
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
188 |
with torch.no_grad():
|
189 |
-
output = model(**
|
190 |
doc_embeddings = get_embedding(output.hidden_states[-1], 1536) # adjust dimensionality for efficiency trade-off e.g. 512
|
191 |
|
192 |
for i in range(num_queries):
|
|
|
140 |
doc_inputs = model.prepare_inputs_for_generation(**doc_inputs, use_cache=False)
|
141 |
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
142 |
with torch.no_grad():
|
143 |
+
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
144 |
doc_embeddings = get_embedding(output.hidden_states[-1], 1536) # adjust dimensionality for efficiency trade-off e.g. 512
|
145 |
|
146 |
```
|
|
|
161 |
### Encode Document Text
|
162 |
This DSE checkpoint is warm-up with `Tevatron/msmarco-passage-aug`, thus the model can also effectively encode document as text input.
|
163 |
```python
|
164 |
+
doc_texts = [
|
165 |
"The llama (/ˈlɑːmə/; Spanish pronunciation: [ˈʎama] or [ˈʝama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.",
|
166 |
"Llama (acronym for Large Language Model Meta AI, and formerly stylized as LLaMA) is a family of autoregressive large language models (LLMs) released by Meta AI starting in February 2023.[2][3] The latest version is Llama 3.1, released in July 2024.[4]"
|
167 |
]
|
168 |
doc_messages = []
|
169 |
+
for doc in doc_texts:
|
170 |
message = [
|
171 |
{
|
172 |
'role': 'user',
|
|
|
186 |
doc_inputs = model.prepare_inputs_for_generation(**doc_inputs, use_cache=False)
|
187 |
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
188 |
with torch.no_grad():
|
189 |
+
output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
|
190 |
doc_embeddings = get_embedding(output.hidden_states[-1], 1536) # adjust dimensionality for efficiency trade-off e.g. 512
|
191 |
|
192 |
for i in range(num_queries):
|