Files changed (1) hide show
  1. README.md +4 -0
README.md CHANGED
@@ -55,6 +55,7 @@ Since ModernBERT is a Masked Language Model (MLM), you can use the `fill-mask` p
55
  **⚠️ If your GPU supports it, we recommend using ModernBERT with Flash Attention 2 to reach the highest efficiency. To do so, install Flash Attention as follows, then use the model as normal:**
56
 
57
  ```bash
 
58
  pip install flash-attn
59
  ```
60
 
@@ -66,6 +67,7 @@ from transformers import AutoTokenizer, AutoModelForMaskedLM
66
  model_id = "answerdotai/ModernBERT-base"
67
  tokenizer = AutoTokenizer.from_pretrained(model_id)
68
  model = AutoModelForMaskedLM.from_pretrained(model_id)
 
69
 
70
  text = "The capital of France is [MASK]."
71
  inputs = tokenizer(text, return_tensors="pt")
@@ -86,6 +88,8 @@ import torch
86
  from transformers import pipeline
87
  from pprint import pprint
88
 
 
 
89
  pipe = pipeline(
90
  "fill-mask",
91
  model="answerdotai/ModernBERT-base",
 
55
  **⚠️ If your GPU supports it, we recommend using ModernBERT with Flash Attention 2 to reach the highest efficiency. To do so, install Flash Attention as follows, then use the model as normal:**
56
 
57
  ```bash
58
+ # To load on CPU, you can skip this step.
59
  pip install flash-attn
60
  ```
61
 
 
67
  model_id = "answerdotai/ModernBERT-base"
68
  tokenizer = AutoTokenizer.from_pretrained(model_id)
69
  model = AutoModelForMaskedLM.from_pretrained(model_id)
70
+ # For CPU, model = AutoModelForMaskedLM.from_pretrained(model_id, reference_compile=False)
71
 
72
  text = "The capital of France is [MASK]."
73
  inputs = tokenizer(text, return_tensors="pt")
 
88
  from transformers import pipeline
89
  from pprint import pprint
90
 
91
+ # To load on CPU, reference_compile=False
92
+
93
  pipe = pipeline(
94
  "fill-mask",
95
  model="answerdotai/ModernBERT-base",