wenhuach commited on
Commit
44f6bc8
1 Parent(s): f4bbbab

workatournd for overflow

Browse files

Signed-off-by: wenhuach <[email protected]>

Files changed (2) hide show
  1. config.json +1 -1
  2. modeling_deepseek.py +1 -1
config.json CHANGED
@@ -79,7 +79,7 @@
79
  "tie_word_embeddings": false,
80
  "topk_group": 4,
81
  "topk_method": "noaux_tc",
82
- "torch_dtype": "bfloat16",
83
  "transformers_version": "4.47.0",
84
  "use_cache": true,
85
  "v_head_dim": 128,
 
79
  "tie_word_embeddings": false,
80
  "topk_group": 4,
81
  "topk_method": "noaux_tc",
82
+ "torch_dtype": "float16",
83
  "transformers_version": "4.47.0",
84
  "use_cache": true,
85
  "v_head_dim": 128,
modeling_deepseek.py CHANGED
@@ -386,7 +386,7 @@ class DeepseekV3MLP(nn.Module):
386
  self.act_fn = ACT2FN[config.hidden_act]
387
 
388
  def forward(self, x):
389
- down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
390
  return down_proj
391
 
392
 
 
386
  self.act_fn = ACT2FN[config.hidden_act]
387
 
388
  def forward(self, x):
389
+ down_proj = self.down_proj(torch.clip(self.act_fn(self.gate_proj(x)) * self.up_proj(x),-65504,65504))
390
  return down_proj
391
 
392