Spaces:
Runtime error
Runtime error
nsthorat-lilac
commited on
Commit
·
be17f2e
1
Parent(s):
1648180
Upload data/lilac.yml with huggingface_hub
Browse files- data/lilac.yml +27 -0
data/lilac.yml
CHANGED
@@ -11,6 +11,10 @@ datasets:
|
|
11 |
embeddings:
|
12 |
- path: premise
|
13 |
embedding: gte-small
|
|
|
|
|
|
|
|
|
14 |
signals:
|
15 |
- path: premise
|
16 |
signal:
|
@@ -25,6 +29,7 @@ datasets:
|
|
25 |
ui:
|
26 |
media_paths:
|
27 |
- premise
|
|
|
28 |
- namespace: local
|
29 |
name: glue_ax
|
30 |
source:
|
@@ -66,6 +71,7 @@ datasets:
|
|
66 |
ui:
|
67 |
media_paths:
|
68 |
- hypothesis
|
|
|
69 |
- namespace: local
|
70 |
name: imdb3
|
71 |
source:
|
@@ -75,6 +81,7 @@ datasets:
|
|
75 |
ui:
|
76 |
media_paths:
|
77 |
- text
|
|
|
78 |
- namespace: local
|
79 |
name: imdb
|
80 |
source:
|
@@ -94,6 +101,7 @@ datasets:
|
|
94 |
ui:
|
95 |
media_paths:
|
96 |
- text
|
|
|
97 |
- namespace: local
|
98 |
name: imdb2
|
99 |
source:
|
@@ -103,6 +111,7 @@ datasets:
|
|
103 |
ui:
|
104 |
media_paths:
|
105 |
- text
|
|
|
106 |
- namespace: lilac
|
107 |
name: OpenOrca-100k
|
108 |
source:
|
@@ -235,11 +244,15 @@ datasets:
|
|
235 |
- path: response
|
236 |
signal:
|
237 |
signal_name: text_statistics
|
|
|
|
|
|
|
238 |
settings:
|
239 |
ui:
|
240 |
media_paths:
|
241 |
- question
|
242 |
- response
|
|
|
243 |
- namespace: local
|
244 |
name: the_movies_dataset
|
245 |
source:
|
@@ -251,6 +264,7 @@ datasets:
|
|
251 |
ui:
|
252 |
media_paths:
|
253 |
- overview
|
|
|
254 |
- namespace: local
|
255 |
name: glue_ax_parquet
|
256 |
source:
|
@@ -261,6 +275,7 @@ datasets:
|
|
261 |
ui:
|
262 |
media_paths:
|
263 |
- premise
|
|
|
264 |
- namespace: lilac
|
265 |
name: mmlu_professional_law
|
266 |
source:
|
@@ -425,6 +440,7 @@ datasets:
|
|
425 |
- question
|
426 |
- - choices
|
427 |
- '*'
|
|
|
428 |
preferred_embedding: gte-small
|
429 |
- namespace: local
|
430 |
name: deepset-prompt-inj
|
@@ -438,6 +454,7 @@ datasets:
|
|
438 |
ui:
|
439 |
media_paths:
|
440 |
- text
|
|
|
441 |
- namespace: local
|
442 |
name: jasper-prompt-inj
|
443 |
source:
|
@@ -450,6 +467,7 @@ datasets:
|
|
450 |
ui:
|
451 |
media_paths:
|
452 |
- text
|
|
|
453 |
- namespace: local
|
454 |
name: mosaic-chat-v2
|
455 |
source:
|
@@ -550,6 +568,7 @@ datasets:
|
|
550 |
media_paths:
|
551 |
- prompt
|
552 |
- response
|
|
|
553 |
preferred_embedding: gte-small
|
554 |
- namespace: local
|
555 |
name: databricks-dolly-15k-curated-en
|
@@ -564,6 +583,8 @@ datasets:
|
|
564 |
- value
|
565 |
- '*'
|
566 |
embedding: gte-small
|
|
|
|
|
567 |
signals:
|
568 |
- path: original-instruction
|
569 |
signal:
|
@@ -793,6 +814,9 @@ datasets:
|
|
793 |
- '*'
|
794 |
signal:
|
795 |
signal_name: text_statistics
|
|
|
|
|
|
|
796 |
settings:
|
797 |
ui:
|
798 |
media_paths:
|
@@ -808,6 +832,7 @@ datasets:
|
|
808 |
- - new-response
|
809 |
- value
|
810 |
- '*'
|
|
|
811 |
preferred_embedding: gte-small
|
812 |
- namespace: local
|
813 |
name: open-asssistant-conversations
|
@@ -888,6 +913,7 @@ datasets:
|
|
888 |
ui:
|
889 |
media_paths:
|
890 |
- text
|
|
|
891 |
preferred_embedding: gte-small
|
892 |
- namespace: local
|
893 |
name: enron-emails
|
@@ -964,4 +990,5 @@ datasets:
|
|
964 |
ui:
|
965 |
media_paths:
|
966 |
- text
|
|
|
967 |
preferred_embedding: gte-small
|
|
|
11 |
embeddings:
|
12 |
- path: premise
|
13 |
embedding: gte-small
|
14 |
+
- path: premise
|
15 |
+
embedding: gte-base
|
16 |
+
- path: hypothesis
|
17 |
+
embedding: gte-small
|
18 |
signals:
|
19 |
- path: premise
|
20 |
signal:
|
|
|
29 |
ui:
|
30 |
media_paths:
|
31 |
- premise
|
32 |
+
markdown_paths: []
|
33 |
- namespace: local
|
34 |
name: glue_ax
|
35 |
source:
|
|
|
71 |
ui:
|
72 |
media_paths:
|
73 |
- hypothesis
|
74 |
+
markdown_paths: []
|
75 |
- namespace: local
|
76 |
name: imdb3
|
77 |
source:
|
|
|
81 |
ui:
|
82 |
media_paths:
|
83 |
- text
|
84 |
+
markdown_paths: []
|
85 |
- namespace: local
|
86 |
name: imdb
|
87 |
source:
|
|
|
101 |
ui:
|
102 |
media_paths:
|
103 |
- text
|
104 |
+
markdown_paths: []
|
105 |
- namespace: local
|
106 |
name: imdb2
|
107 |
source:
|
|
|
111 |
ui:
|
112 |
media_paths:
|
113 |
- text
|
114 |
+
markdown_paths: []
|
115 |
- namespace: lilac
|
116 |
name: OpenOrca-100k
|
117 |
source:
|
|
|
244 |
- path: response
|
245 |
signal:
|
246 |
signal_name: text_statistics
|
247 |
+
- path: system_prompt
|
248 |
+
signal:
|
249 |
+
signal_name: pii
|
250 |
settings:
|
251 |
ui:
|
252 |
media_paths:
|
253 |
- question
|
254 |
- response
|
255 |
+
markdown_paths: []
|
256 |
- namespace: local
|
257 |
name: the_movies_dataset
|
258 |
source:
|
|
|
264 |
ui:
|
265 |
media_paths:
|
266 |
- overview
|
267 |
+
markdown_paths: []
|
268 |
- namespace: local
|
269 |
name: glue_ax_parquet
|
270 |
source:
|
|
|
275 |
ui:
|
276 |
media_paths:
|
277 |
- premise
|
278 |
+
markdown_paths: []
|
279 |
- namespace: lilac
|
280 |
name: mmlu_professional_law
|
281 |
source:
|
|
|
440 |
- question
|
441 |
- - choices
|
442 |
- '*'
|
443 |
+
markdown_paths: []
|
444 |
preferred_embedding: gte-small
|
445 |
- namespace: local
|
446 |
name: deepset-prompt-inj
|
|
|
454 |
ui:
|
455 |
media_paths:
|
456 |
- text
|
457 |
+
markdown_paths: []
|
458 |
- namespace: local
|
459 |
name: jasper-prompt-inj
|
460 |
source:
|
|
|
467 |
ui:
|
468 |
media_paths:
|
469 |
- text
|
470 |
+
markdown_paths: []
|
471 |
- namespace: local
|
472 |
name: mosaic-chat-v2
|
473 |
source:
|
|
|
568 |
media_paths:
|
569 |
- prompt
|
570 |
- response
|
571 |
+
markdown_paths: []
|
572 |
preferred_embedding: gte-small
|
573 |
- namespace: local
|
574 |
name: databricks-dolly-15k-curated-en
|
|
|
583 |
- value
|
584 |
- '*'
|
585 |
embedding: gte-small
|
586 |
+
- path: original-instruction
|
587 |
+
embedding: gte-small
|
588 |
signals:
|
589 |
- path: original-instruction
|
590 |
signal:
|
|
|
814 |
- '*'
|
815 |
signal:
|
816 |
signal_name: text_statistics
|
817 |
+
- path: original-instruction
|
818 |
+
signal:
|
819 |
+
signal_name: spacy_ner
|
820 |
settings:
|
821 |
ui:
|
822 |
media_paths:
|
|
|
832 |
- - new-response
|
833 |
- value
|
834 |
- '*'
|
835 |
+
markdown_paths: []
|
836 |
preferred_embedding: gte-small
|
837 |
- namespace: local
|
838 |
name: open-asssistant-conversations
|
|
|
913 |
ui:
|
914 |
media_paths:
|
915 |
- text
|
916 |
+
markdown_paths: []
|
917 |
preferred_embedding: gte-small
|
918 |
- namespace: local
|
919 |
name: enron-emails
|
|
|
990 |
ui:
|
991 |
media_paths:
|
992 |
- text
|
993 |
+
markdown_paths: []
|
994 |
preferred_embedding: gte-small
|