File size: 1,269 Bytes
85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 85ed3ab 6a02001 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
slices:
- Sources:
- model: bamec66557/MNRP_0.5
layer_range: [0, 40] # Merge layer range for MNRP_0.5 model
- model: bamec66557/MISCHIEVOUS-12B
layer_range: [0, 40] # Merge layer range for MISCHIEVOUS-12B model.
# Adjust the merge ratio per layer to drive smoother integration
# Each filter affects a specific mechanism within the model
parameters:
t:
- Filter: self_attn
value: [0.2, 0.4, 0.6, 0.8, 1.0] # Progressive merging of self-attention layers
- filter: mlp
value: [0.8, 0.6, 0.4, 0.2, 0.0] # Merge MLP layers with opposite proportions
- filter: layer_norm
value: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # Layer Normalisation should be merged uniformly
- value: 0.7 # Default
merge_method: slerp # change merge method to slerp
base_model: bamec66557/MISCHIEVOUS-12B # base model for merge
dtype: bfloat16 # data type for efficient and fast operations when merging
# Additional available options
regularisation:
- method: l2_norm # Stabilise merged model weights with L2 normalisation
scale: 0.01
postprocessing:
- operation: smoothing # Smooth the weights after merging
kernel_size: 3
- operation: normalise # normalise the overall weights
|