slices: | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [0, 4] | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [4, 8] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 1.5 | |
- filter: down_proj | |
value: 1.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [8, 12] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 1.5 | |
- filter: down_proj | |
value: 1.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [12, 16] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 2.0 | |
- filter: down_proj | |
value: 2.0 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [16, 20] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 2.0 | |
- filter: down_proj | |
value: 2.0 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [20, 24] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 2.5 | |
- filter: down_proj | |
value: 2.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [24, 28] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 2.5 | |
- filter: down_proj | |
value: 2.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [28, 32] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 3.0 | |
- filter: down_proj | |
value: 3.0 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [32, 36] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 3.0 | |
- filter: down_proj | |
value: 3.0 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [36, 40] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 3.5 | |
- filter: down_proj | |
value: 3.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [40, 44] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 3.5 | |
- filter: down_proj | |
value: 3.5 | |
- sources: | |
- model: Sao10K/Fimbulvetr-11B-v2 | |
layer_range: [44, 47] | |
parameters: | |
scale: | |
- filter: o_proj | |
value: 4.0 | |
- filter: down_proj | |
value: 4.0 | |
merge_method: passthrough | |
dtype: bfloat16 |