DavidAU's picture
Upload folder using huggingface_hub
215990b verified
|
raw
history blame
13.1 kB
metadata
base_model: []
library_name: transformers
tags:
  - mergekit
  - merge

MN-Wordstorm-I-Brainstorm-exp40-3x

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the passthrough merge method.

Models Merged

The following models were included in the merge:

  • H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct

Configuration

The following YAML configuration was used to produce this model:

# Six splits plus "end game
# "D" starts at plus .1 VS D/O proj.
# 40 plus.

slices:
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [0, 62]

# conc layers
# split 1

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.01
         - filter: down_proj
           value: 0.01
         - value: 0.11
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.02
         - filter: down_proj
           value: 0.02
         - value: 0.12
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.03
         - filter: down_proj
           value: 0.03
         - value: 0.13

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.04
         - filter: down_proj
           value: 0.04
         - value: 0.61

# split 2, SURGE D THEN D drop .46, continues @ D .15 (from .13)

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.05
         - filter: down_proj
           value: 0.05
         - value: 0.15
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.06
         - filter: down_proj
           value: 0.06
         - value: 0.16
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.07
         - filter: down_proj
           value: 0.07
         - value: 0.17
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.08
         - filter: down_proj
           value: 0.08
         - value: 0.41

# split 3, SURGE D to .41, D drop .21 ... follows .17 previous

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.09
         - filter: down_proj
           value: 0.09
         - value: 0.19
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.10
         - filter: down_proj
           value: 0.10
         - value: 0.20
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.11
         - filter: down_proj
           value: 0.11
         - value: .22
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.12
         - filter: down_proj
           value: 0.12
         - value: .24
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.13
         - filter: down_proj
           value: 0.13
         - value: .26
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.14
         - filter: down_proj
           value: 0.14
         - value: .28
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.15
         - filter: down_proj
           value: 0.15
         - value: .30
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.16
         - filter: down_proj
           value: 0.16
         - value: .31
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.20
         - filter: down_proj
           value: 0.20
         - value: .32
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.21
         - filter: down_proj
           value: 0.21
         - value: .33
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.22
         - filter: down_proj
           value: 0.22
         - value: .34
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.23
         - filter: down_proj
           value: 0.23
         - value: .35

# split 4 , NO SURGE D, "D" down drop of .24 ; reverts to .11 (the very first "D" setting )

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.24
         - filter: down_proj
           value: 0.24
         - value: 0.11
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.241
         - filter: down_proj
           value: 0.241
         - value: 0.12
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.242
         - filter: down_proj
           value: 0.243
         - value: 0.13
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.244
         - filter: down_proj
           value: 0.244
         - value: 0.61

# split 5, D Surge to .61, drop to .15 (following .13)

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.245
         - filter: down_proj
           value: 0.245
         - value: 0.15
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.246
         - filter: down_proj
           value: 0.246
         - value: 0.16
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.247
         - filter: down_proj
           value: 0.247
         - value: 0.17
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.248
         - filter: down_proj
           value: 0.248
         - value: 0.41

# split 6, D surge to .41 , then follows .17 

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.249
         - filter: down_proj
           value: 0.249
         - value: 0.19
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.250
         - filter: down_proj
           value: 0.250
         - value: 0.20
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.251
         - filter: down_proj
           value: 0.251
         - value: .22
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.252
         - filter: down_proj
           value: 0.252
         - value: .24
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.253
         - filter: down_proj
           value: 0.254
         - value: .26
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.255
         - filter: down_proj
           value: 0.255
         - value: .28
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.256
         - filter: down_proj
           value: 0.256
         - value: .60

# O PROJ, DPROJ to .3333 / 
# end game

 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.3333333333333
         - filter: down_proj
           value: 0.3333333333333
         - value: 0.3333333333333
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.4444444444444
         - filter: down_proj
           value: 0.4444444444444
         - value: 0.4444444444444
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.5555555555555
         - filter: down_proj
           value: 0.5555555555555
         - value: 0.5555555555555
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.6666666666666
         - filter: down_proj
           value: 0.6666666666666
         - value: 0.6666666666666
 - sources:
   - model: H:/David_au-RCM-11-models/MN-WORDSTORM-pt8-RCM-Emotion-Action-18.5B-Instruct
     layer_range: [62,63]
     parameters:
       scale:
         - filter: o_proj
           value: 0.85
         - filter: down_proj
           value: 0.90
         - value: 0.92
merge_method: passthrough
dtype: bfloat16