prepare dataset
Browse files
scripts/prepare_contrain_dataset.py
CHANGED
@@ -153,10 +153,11 @@ def batch_iterator(name=None):
|
|
153 |
dataset = load_dataset('datatab/open-orca-slim-serbian', split='train')
|
154 |
role_map = {'system': 'system', 'human': 'user', 'gpt': 'assistant'}
|
155 |
|
156 |
-
for row in dataset:
|
157 |
text = [
|
158 |
f"<|im_start|>{role_map[n['from']]}\n{n['value']}<|im_end|>"
|
159 |
-
for n in row
|
|
|
160 |
]
|
161 |
|
162 |
text = '\n'.join(text) + '\n'
|
|
|
153 |
dataset = load_dataset('datatab/open-orca-slim-serbian', split='train')
|
154 |
role_map = {'system': 'system', 'human': 'user', 'gpt': 'assistant'}
|
155 |
|
156 |
+
for row in dataset['conversations']:
|
157 |
text = [
|
158 |
f"<|im_start|>{role_map[n['from']]}\n{n['value']}<|im_end|>"
|
159 |
+
for n in row
|
160 |
+
if n
|
161 |
]
|
162 |
|
163 |
text = '\n'.join(text) + '\n'
|