init model gemma 2 2b
Browse files- model.gguf +2 -2
- model.yml +3 -3
model.gguf
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af92b6a034fd095807bd339ae9268ceba1a951201994063d5eea15e564f0a42f
|
3 |
+
size 1708582496
|
model.yml
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
name:
|
2 |
-
model:
|
3 |
version: 1
|
4 |
|
5 |
# Results Preferences
|
@@ -14,7 +14,7 @@ max_tokens: 4096 # Infer from base config.json -> max_position_embeddings
|
|
14 |
stream: true # true | false
|
15 |
|
16 |
# Engine / Model Settings
|
17 |
-
ngl:
|
18 |
ctx_len: 4096 # Infer from base config.json -> max_position_embeddings
|
19 |
engine: cortex.llamacpp
|
20 |
prompt_template: "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
|
|
|
1 |
+
name: gemma2
|
2 |
+
model: gemma2:2B
|
3 |
version: 1
|
4 |
|
5 |
# Results Preferences
|
|
|
14 |
stream: true # true | false
|
15 |
|
16 |
# Engine / Model Settings
|
17 |
+
ngl: 300 # Infer from base config.json -> num_attention_heads
|
18 |
ctx_len: 4096 # Infer from base config.json -> max_position_embeddings
|
19 |
engine: cortex.llamacpp
|
20 |
prompt_template: "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model"
|