bamec66557 commited on
Commit
6a02001
ยท
verified ยท
1 Parent(s): 6e1177b

Update mergekit_config.yml

Browse files
Files changed (1) hide show
  1. mergekit_config.yml +19 -19
mergekit_config.yml CHANGED
@@ -1,34 +1,34 @@
1
  slices:
2
- - sources:
3
  - model: bamec66557/MNRP_0.5
4
- layer_range: [0, 40] # MNRP_0.5 ๋ชจ๋ธ์˜ ๋ณ‘ํ•ฉ ๋ ˆ์ด์–ด ๋ฒ”์œ„
5
  - model: bamec66557/MISCHIEVOUS-12B
6
- layer_range: [0, 40] # MISCHIEVOUS-12B ๋ชจ๋ธ์˜ ๋ณ‘ํ•ฉ ๋ ˆ์ด์–ด ๋ฒ”์œ„
7
-
8
- # Layer๋ณ„ ๋ณ‘ํ•ฉ ๋น„์œจ์„ ์กฐ์ •ํ•˜์—ฌ ๋” ๋ถ€๋“œ๋Ÿฌ์šด ํ†ตํ•ฉ์„ ์œ ๋„
9
- # ๊ฐ ํ•„ํ„ฐ๋Š” ๋ชจ๋ธ ๋‚ด ํŠน์ • ๋ฉ”์ปค๋‹ˆ์ฆ˜์— ์˜ํ–ฅ์„ ๋ฏธ์นจ
10
  parameters:
11
  t:
12
- - filter: self_attn
13
- value: [0.2, 0.4, 0.6, 0.8, 1.0] # Self-attention ๋ ˆ์ด์–ด์˜ ์ ์ง„์  ๋ณ‘ํ•ฉ
14
  - filter: mlp
15
- value: [0.8, 0.6, 0.4, 0.2, 0.0] # MLP ๋ ˆ์ด์–ด๋Š” ๋ฐ˜๋Œ€ ๋น„์œจ๋กœ ๋ณ‘ํ•ฉ
16
  - filter: layer_norm
17
- value: [0.5, 0.5, 0.5, 0.5, 0.5] # Layer Normalization์€ ๊ท ์ผ ๋ณ‘ํ•ฉ
18
- - value: 0.7 # ๊ธฐ๋ณธ๊ฐ’
19
 
20
- merge_method: slerp # ๋ณ‘ํ•ฉ ๋ฐฉ์‹์„ slerp๋กœ ๋ณ€๊ฒฝ
21
 
22
- base_model: bamec66557/MISCHIEVOUS-12B # ๋ณ‘ํ•ฉ์˜ ๊ธฐ๋ณธ ๋ชจ๋ธ
23
 
24
- dtype: bfloat16 # ๋ณ‘ํ•ฉ ์‹œ ํšจ์œจ์ ์ด๊ณ  ๋น ๋ฅธ ์—ฐ์‚ฐ์„ ์œ„ํ•œ ๋ฐ์ดํ„ฐ ํƒ€์ž…
25
 
26
- # ์ถ”๊ฐ€์ ์œผ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ์˜ต์…˜
27
- regularization:
28
- - method: l2_norm # L2 ์ •๊ทœํ™”๋ฅผ ํ†ตํ•ด ๋ณ‘ํ•ฉ๋œ ๋ชจ๋ธ ๊ฐ€์ค‘์น˜ ์•ˆ์ •ํ™”
29
  scale: 0.01
30
 
31
  postprocessing:
32
- - operation: smoothing # ๋ณ‘ํ•ฉ ํ›„ ๊ฐ€์ค‘์น˜๋ฅผ ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์กฐ์ •
33
  kernel_size: 3
34
- - operation: normalize # ์ „์ฒด ๊ฐ€์ค‘์น˜๋ฅผ ์ •๊ทœํ™”
 
1
  slices:
2
+ - Sources:
3
  - model: bamec66557/MNRP_0.5
4
+ layer_range: [0, 40] # Merge layer range for MNRP_0.5 model
5
  - model: bamec66557/MISCHIEVOUS-12B
6
+ layer_range: [0, 40] # Merge layer range for MISCHIEVOUS-12B model.
7
+
8
+ # Adjust the merge ratio per layer to drive smoother integration
9
+ # Each filter affects a specific mechanism within the model
10
  parameters:
11
  t:
12
+ - Filter: self_attn
13
+ value: [0.2, 0.4, 0.6, 0.8, 1.0] # Progressive merging of self-attention layers
14
  - filter: mlp
15
+ value: [0.8, 0.6, 0.4, 0.2, 0.0] # Merge MLP layers with opposite proportions
16
  - filter: layer_norm
17
+ value: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # Layer Normalisation should be merged uniformly
18
+ - value: 0.7 # Default
19
 
20
+ merge_method: slerp # change merge method to slerp
21
 
22
+ base_model: bamec66557/MISCHIEVOUS-12B # base model for merge
23
 
24
+ dtype: bfloat16 # data type for efficient and fast operations when merging
25
 
26
+ # Additional available options
27
+ regularisation:
28
+ - method: l2_norm # Stabilise merged model weights with L2 normalisation
29
  scale: 0.01
30
 
31
  postprocessing:
32
+ - operation: smoothing # Smooth the weights after merging
33
  kernel_size: 3
34
+ - operation: normalise # normalise the overall weights