alvations commited on
Commit
95d16d9
·
1 Parent(s): e96ad88

Added capabilities to load local models

Browse files

From https://stackoverflow.com/questions/75886674/how-to-compute-sentence-level-perplexity-from-hugging-face-language-models/75887046?noredirect=1#comment133854598_75887046

Files changed (1) hide show
  1. perplexity.py +5 -3
perplexity.py CHANGED
@@ -101,7 +101,9 @@ class Perplexity(evaluate.Metric):
101
  )
102
 
103
  def _compute(
104
- self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
 
 
105
  ):
106
 
107
  if device is not None:
@@ -111,10 +113,10 @@ class Perplexity(evaluate.Metric):
111
  else:
112
  device = "cuda" if torch.cuda.is_available() else "cpu"
113
 
114
- model = AutoModelForCausalLM.from_pretrained(model_id)
115
  model = model.to(device)
116
 
117
- tokenizer = AutoTokenizer.from_pretrained(model_id)
118
 
119
  # if batch_size > 1 (which generally leads to padding being required), and
120
  # if there is not an already assigned pad_token, assign an existing
 
101
  )
102
 
103
  def _compute(
104
+ self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, local_file_only: bool = False,
105
+ device=None, max_length=None,
106
+
107
  ):
108
 
109
  if device is not None:
 
113
  else:
114
  device = "cuda" if torch.cuda.is_available() else "cpu"
115
 
116
+ model = AutoModelForCausalLM.from_pretrained(model_id, local_file_only=local_file_only)
117
  model = model.to(device)
118
 
119
+ tokenizer = AutoTokenizer.from_pretrained(model_id, local_file_only=local_file_only)
120
 
121
  # if batch_size > 1 (which generally leads to padding being required), and
122
  # if there is not an already assigned pad_token, assign an existing