mtasic85 commited on
Commit
c9029e6
·
1 Parent(s): a5d4a06

prepare dataset

Browse files
scripts/prepare_contrain_dataset.py CHANGED
@@ -153,10 +153,11 @@ def batch_iterator(name=None):
153
  dataset = load_dataset('datatab/open-orca-slim-serbian', split='train')
154
  role_map = {'system': 'system', 'human': 'user', 'gpt': 'assistant'}
155
 
156
- for row in dataset:
157
  text = [
158
  f"<|im_start|>{role_map[n['from']]}\n{n['value']}<|im_end|>"
159
- for n in row['conversations']
 
160
  ]
161
 
162
  text = '\n'.join(text) + '\n'
 
153
  dataset = load_dataset('datatab/open-orca-slim-serbian', split='train')
154
  role_map = {'system': 'system', 'human': 'user', 'gpt': 'assistant'}
155
 
156
+ for row in dataset['conversations']:
157
  text = [
158
  f"<|im_start|>{role_map[n['from']]}\n{n['value']}<|im_end|>"
159
+ for n in row
160
+ if n
161
  ]
162
 
163
  text = '\n'.join(text) + '\n'