Kaguya-19 commited on
Commit
143fca0
·
1 Parent(s): 09daf17

fit for sentence transformers

Browse files
1_Pool/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 2304,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": false
10
+ }
README.md CHANGED
@@ -347,6 +347,7 @@ flash-attn>2.3.5
347
 
348
  ### 示例脚本 Demo
349
 
 
350
  ```python
351
 
352
  from transformers import AutoModel, AutoTokenizer
@@ -358,10 +359,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
358
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
359
  model.eval()
360
 
361
- def weighted_mean_pooling(hidden, attention_mask):
362
- attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
363
- s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
364
- d = attention_mask_.sum(dim=1, keepdim=True).float()
 
365
  reps = s / d
366
  return reps
367
 
@@ -373,7 +375,7 @@ def encode(input_texts):
373
  attention_mask = batch_dict["attention_mask"]
374
  hidden = outputs.last_hidden_state
375
 
376
- reps = weighted_mean_pooling(hidden, attention_mask)
377
  embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
378
  return embeddings
379
 
@@ -391,6 +393,30 @@ scores = (embeddings_query @ embeddings_doc.T)
391
  print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
392
  ```
393
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  ## 实验结果 Evaluation Results
395
 
396
  ### 中文与英文检索结果 CN/EN Retrieval Results
 
347
 
348
  ### 示例脚本 Demo
349
 
350
+ #### Huggingface Transformers
351
  ```python
352
 
353
  from transformers import AutoModel, AutoTokenizer
 
359
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
360
  model.eval()
361
 
362
+ # 事实上我们用的是weighted mean pooling,但为了部署方便,我们将一部分pooling步骤集成在model.forward中
363
+ # In fact, we will use weighted mean pooling, but we will integrate some pooling steps into model.forward for deployment convenience
364
+ def mean_pooling(hidden,attention_mask):
365
+ s = torch.sum(hidden * attention_mask.unsqueeze(-1).float(), dim=1)
366
+ d = attention_mask.sum(dim=1, keepdim=True).float()
367
  reps = s / d
368
  return reps
369
 
 
375
  attention_mask = batch_dict["attention_mask"]
376
  hidden = outputs.last_hidden_state
377
 
378
+ reps = mean_pooling(hidden, attention_mask)
379
  embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
380
  return embeddings
381
 
 
393
  print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
394
  ```
395
 
396
+ #### Sentence Transformers
397
+
398
+ ```python
399
+ import torch
400
+ from sentence_transformers import SentenceTransformer
401
+
402
+ model_name = "openbmb/MiniCPM-Embedding"
403
+ model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={"attn_implementation":"flash_attention_2", "torch_dtype":torch.float16})
404
+ model.max_seq_length = 512
405
+ model.tokenizer.padding_side="right"
406
+
407
+ queries = ["中国的首都是哪里?"]
408
+ passages = ["beijing", "shanghai"]
409
+
410
+
411
+ INSTRUCTION = "Query: "
412
+
413
+ embeddings_query = model.encode(queries, prompt=INSTRUCTION, normalize_embeddings=True)
414
+ embeddings_doc = model.encode(passages, normalize_embeddings=True)
415
+
416
+ scores = (embeddings_query @ embeddings_doc.T)
417
+ print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
418
+ ```
419
+
420
  ## 实验结果 Evaluation Results
421
 
422
  ### 中文与英文检索结果 CN/EN Retrieval Results
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openbmb/RankCPM-E",
3
  "architectures": [
4
  "MiniCPM"
5
  ],
 
1
  {
2
+ "_name_or_path": "openbmb/MiniCPM-Embedding",
3
  "architectures": [
4
  "MiniCPM"
5
  ],
config_sentence_transformers.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.7.0",
4
+ "transformers": "4.37.2",
5
+ "pytorch": "2.0.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null
9
+ }
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"task":"sentence-embedding"}
modeling_minicpm.py CHANGED
@@ -1043,6 +1043,8 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
1043
  if inputs_embeds is None:
1044
  inputs_embeds = self.embed_tokens(input_ids) * self.config.scale_emb
1045
 
 
 
1046
  if self._use_flash_attention_2:
1047
  # 2d mask is passed through the layers
1048
  attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
@@ -1107,6 +1109,13 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
1107
  if output_hidden_states:
1108
  all_hidden_states += (hidden_states,)
1109
 
 
 
 
 
 
 
 
1110
  next_cache = None
1111
  if use_cache:
1112
  next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
 
1043
  if inputs_embeds is None:
1044
  inputs_embeds = self.embed_tokens(input_ids) * self.config.scale_emb
1045
 
1046
+ _attention_mask = attention_mask
1047
+
1048
  if self._use_flash_attention_2:
1049
  # 2d mask is passed through the layers
1050
  attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
 
1109
  if output_hidden_states:
1110
  all_hidden_states += (hidden_states,)
1111
 
1112
+ # gen weight before mean pooling
1113
+ attention_mask_ = _attention_mask * _attention_mask.cumsum(dim=1)
1114
+ s = hidden_states * attention_mask_.unsqueeze(-1).float()
1115
+ d = attention_mask_.sum(dim=1, keepdim=True).unsqueeze(1).float() /_attention_mask.sum(dim=1, keepdim=True).unsqueeze(1).float()
1116
+
1117
+ hidden_states = s / d
1118
+
1119
  next_cache = None
1120
  if use_cache:
1121
  next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]