nsthorat-lilac commited on
Commit
faf3244
·
1 Parent(s): f9a34e6

Upload folder using huggingface_hub

Browse files
Files changed (32) hide show
  1. .env +0 -48
  2. .env.demo +0 -5
  3. Dockerfile +0 -41
  4. README.md +0 -10
  5. data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl +0 -0
  6. data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl +0 -0
  7. data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl +0 -0
  8. data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl +0 -0
  9. data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl +0 -0
  10. data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
  11. data/.cache/lilac/concept/lilac/non-english/gte-base.pkl +0 -0
  12. data/.cache/lilac/concept/lilac/non-english/gte-small.pkl +0 -0
  13. data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
  14. data/.cache/lilac/concept/lilac/profanity/gte-base.pkl +0 -3
  15. data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
  16. data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl +0 -0
  17. data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl +0 -0
  18. data/.cache/lilac/concept/lilac/question/cohere.pkl +0 -3
  19. data/.cache/lilac/concept/lilac/question/gte-base.pkl +0 -3
  20. data/.cache/lilac/concept/lilac/question/gte-small.pkl +0 -0
  21. data/.cache/lilac/concept/lilac/question/openai.pkl +0 -3
  22. data/.cache/lilac/concept/lilac/question/palm.pkl +0 -3
  23. data/.cache/lilac/concept/lilac/question/sbert.pkl +0 -0
  24. data/.cache/lilac/concept/lilac/source-code/gte-base.pkl +0 -0
  25. data/.cache/lilac/concept/lilac/source-code/gte-small.pkl +0 -0
  26. data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
  27. data/.cache/lilac/concept/local/aliens/gte-small.pkl +0 -0
  28. data/lilac.yml +0 -1024
  29. dist/README.md +0 -2
  30. dist/lilac-0.1.3-py3-none-any.whl +0 -3
  31. docker_start.py +0 -110
  32. docker_start.sh +1 -1
.env DELETED
@@ -1,48 +0,0 @@
1
- # To overwrite these variables, create a .env.local file
2
-
3
- # The path to the project directory. When used, this will be the global project directory for lilac.
4
- # When not defined, define the project directory with `lilac start ./data`.
5
- # LILAC_PROJECT_DIR=./data
6
-
7
- # Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
8
- DUCKDB_USE_VIEWS=0
9
-
10
- # Set to true to enable read-only mode, disabling the ability to add datasets & compute dataset
11
- # signals.
12
- # LILAC_AUTH_ENABLED=true
13
-
14
- # Variables that can be set in .env.local
15
- #
16
- # Get key from https://dashboard.cohere.ai/api-keys
17
- # COHERE_API_KEY=
18
-
19
- # GCS_REGION=
20
- # GCS_ACCESS_KEY=
21
- # GCS_SECRET_KEY=
22
-
23
- # Get key from https://platform.openai.com/account/api-keys
24
- # OPENAI_API_KEY=
25
- # Get key from https://makersuite.google.com/app/apikey
26
- # PALM_API_KEY=
27
-
28
- # HuggingFace demos: machine that uploads to HuggingFace.
29
-
30
- # For authenticating with HuggingFace to deploy to a Space.
31
- # HF_USERNAME=
32
- # The default repo to deploy to for a staging demo. Can be overridden by a command line flag.
33
- # HF_STAGING_DEMO_REPO='HF_ORG/HF_REPO_NAME'
34
-
35
- # For Google-login. This is generated from the Google Cloud Console for a web client.
36
- # See: https://developers.google.com/identity/protocols/oauth2
37
- GOOGLE_CLIENT_ID='279475920249-i8llm8vbos1vj5m1qocir8narb3r0enu.apps.googleusercontent.com'
38
- # The client secret of the above client.
39
- # GOOGLE_CLIENT_SECRET=
40
- # A random string for oauth sessions.
41
- # LILAC_OAUTH_SECRET_KEY=
42
-
43
- # LangSmith source setup.
44
- # LANGCHAIN_API_KEY=
45
- # LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
46
-
47
- # Firebase deployment token.
48
- # FIREBASE_TOKEN=
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.env.demo DELETED
@@ -1,5 +0,0 @@
1
- LILAC_PROJECT_DIR='/data'
2
- HF_HOME=/data/.huggingface
3
- TRANSFORMERS_CACHE=/data/.cache
4
- XDG_CACHE_HOME=/data/.cache
5
- GOOGLE_ANALYTICS_ENABLED=true
 
 
 
 
 
 
Dockerfile DELETED
@@ -1,41 +0,0 @@
1
- # NOTE: When we upgrade to 3.11 we can use a slimmer docker image which comes with gcc.
2
- FROM python:3.9-bullseye
3
-
4
- # Allow statements and log messages to immediately appear in the Knative logs
5
- ENV PYTHONUNBUFFERED True
6
-
7
- # See: https://huggingface.co/docs/hub/spaces-sdks-docker#permissions
8
- RUN useradd -m -u 1000 user
9
- USER user
10
- ENV HOME=/home/user \
11
- PATH=/home/user/.local/bin:$PATH
12
-
13
- # Set the working directory in the container.
14
- WORKDIR $HOME/app
15
-
16
- # Install the dependencies. This will look in ./dist for any wheels that match lilac. If they are
17
- # not found, it will use the public pip package.
18
-
19
- # Pip install lilac[all] and dependencies before trying to install the local image. This allows us
20
- # to get cache hits on dependency installations when using a local wheel. When using the public pip
21
- # package, the second call will be a no-op.
22
- RUN python -m pip install lilac[all]
23
-
24
- # Install from the local wheel inside ./dist. This will be a no-op if the wheel is not found.
25
- COPY --chown=user /dist ./dist/
26
- RUN python -m pip install --find-links=dist --upgrade lilac[all]
27
-
28
- COPY --chown=user .env .
29
- COPY --chown=user .env.demo .
30
- # Copy the README so we can read the datasets from the HuggingFace config.
31
- COPY --chown=user README.md .
32
- # Copy the license just in case.
33
- COPY --chown=user LICENSE .
34
-
35
- COPY --chown=user docker_start.sh docker_start.py ./
36
-
37
- # Make a local data directory for non-persistent storage demos.
38
- RUN mkdir -p ./data
39
- RUN chown -R user ./data
40
-
41
- CMD ["bash", "docker_start.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Lilac
3
- emoji: "\U0001F337"
4
- colorFrom: purple
5
- colorTo: purple
6
- sdk: docker
7
- app_port: 5432
8
- datasets: []
9
-
10
- ---
 
 
 
 
 
 
 
 
 
 
 
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl DELETED
Binary file (10.8 kB)
 
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl DELETED
Binary file (21.7 kB)
 
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl DELETED
Binary file (21.8 kB)
 
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED
Binary file (60.6 kB)
 
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED
Binary file (202 kB)
 
data/.cache/lilac/concept/lilac/non-english/gte-base.pkl DELETED
Binary file (645 kB)
 
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl DELETED
Binary file (330 kB)
 
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED
Binary file (180 kB)
 
data/.cache/lilac/concept/lilac/profanity/gte-base.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ac21aa8bd428688a64f75221338be8c676d208de61a9eba948300e8aa43af3
3
- size 3301300
 
 
 
 
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89495a1f968ddcb3f76ad46dbe7b6503a7b76afcdac37abbeb15c81d38c2f9d4
3
- size 1672934
 
 
 
 
data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl DELETED
Binary file (71.3 kB)
 
data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl DELETED
Binary file (69.6 kB)
 
data/.cache/lilac/concept/lilac/question/cohere.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9768c28d6ed72e4a1a5819fef4157fb1f30a50f1e165bfcdd87d0fa761146902
3
- size 6254174
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-base.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2ae5bf4275be11be99cb2e90c03e35c9d2749efc3b34a2d1db1e9f0c99325d6
3
- size 1194925
 
 
 
 
data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/question/openai.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a14c6df6924f45391654fe78dee8cf996de3abb8acf8ca0f81a65814572d493
3
- size 2362432
 
 
 
 
data/.cache/lilac/concept/lilac/question/palm.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce86748bca57bd328f68b97ee80b4e3343ee4008d8951c5b061b6dd9335df7e
3
- size 1194921
 
 
 
 
data/.cache/lilac/concept/lilac/question/sbert.pkl DELETED
Binary file (611 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-base.pkl DELETED
Binary file (287 kB)
 
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED
Binary file (147 kB)
 
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6a074a3ac60cc9bfb82c4bf19d0e8c8d3837cb2b68b97efe8960c16675477f1
3
- size 1886420
 
 
 
 
data/.cache/lilac/concept/local/aliens/gte-small.pkl DELETED
Binary file (28.4 kB)
 
data/lilac.yml DELETED
@@ -1,1024 +0,0 @@
1
- # Lilac project config.
2
- # See https://lilacml.com/api_reference/index.html#lilac.Config for details.
3
-
4
- datasets:
5
- - namespace: local
6
- name: glue
7
- source:
8
- dataset_name: glue
9
- config_name: ax
10
- source_name: huggingface
11
- embeddings:
12
- - path: premise
13
- embedding: gte-small
14
- - path: premise
15
- embedding: gte-base
16
- - path: hypothesis
17
- embedding: gte-small
18
- signals:
19
- - path: premise
20
- signal:
21
- signal_name: pii
22
- - path: hypothesis
23
- signal:
24
- signal_name: pii
25
- - path: premise
26
- signal:
27
- signal_name: text_statistics
28
- settings:
29
- ui:
30
- media_paths:
31
- - premise
32
- markdown_paths: []
33
- - namespace: local
34
- name: glue_ax
35
- source:
36
- dataset_name: glue
37
- config_name: ax
38
- source_name: huggingface
39
- embeddings:
40
- - path: hypothesis
41
- embedding: gte-small
42
- signals:
43
- - path: premise
44
- signal:
45
- signal_name: text_statistics
46
- - path: premise
47
- signal:
48
- signal_name: pii
49
- - path: premise
50
- signal:
51
- signal_name: near_dup
52
- - path: hypothesis
53
- signal:
54
- embedding: gte-small
55
- namespace: ''
56
- concept_name: ''
57
- signal_name: concept_score
58
- - path: hypothesis
59
- signal:
60
- embedding: gte-small
61
- namespace: lilac
62
- concept_name: positive-sentiment
63
- signal_name: concept_score
64
- - path: hypothesis
65
- signal:
66
- embedding: gte-small
67
- namespace: lilac
68
- concept_name: non-english
69
- signal_name: concept_score
70
- settings:
71
- ui:
72
- media_paths:
73
- - hypothesis
74
- markdown_paths: []
75
- - namespace: local
76
- name: imdb3
77
- source:
78
- dataset_name: imdb
79
- source_name: huggingface
80
- settings:
81
- ui:
82
- media_paths:
83
- - text
84
- markdown_paths: []
85
- - namespace: local
86
- name: imdb
87
- source:
88
- dataset_name: imdb
89
- source_name: huggingface
90
- embeddings:
91
- - path: text
92
- embedding: gte-small
93
- signals:
94
- - path: text
95
- signal:
96
- signal_name: pii
97
- - path: text
98
- signal:
99
- signal_name: text_statistics
100
- settings:
101
- ui:
102
- media_paths:
103
- - text
104
- markdown_paths: []
105
- - namespace: local
106
- name: imdb2
107
- source:
108
- dataset_name: imdb
109
- source_name: huggingface
110
- settings:
111
- ui:
112
- media_paths:
113
- - text
114
- markdown_paths: []
115
- - namespace: lilac
116
- name: OpenOrca-100k
117
- source:
118
- dataset_name: Open-Orca/OpenOrca
119
- sample_size: 100000
120
- source_name: huggingface
121
- embeddings:
122
- - path: question
123
- embedding: gte-small
124
- - path: response
125
- embedding: gte-small
126
- signals:
127
- - path: question
128
- signal:
129
- signal_name: near_dup
130
- - path: question
131
- signal:
132
- signal_name: pii
133
- - path: question
134
- signal:
135
- signal_name: lang_detection
136
- - path: question
137
- signal:
138
- embedding: gte-small
139
- namespace: lilac
140
- concept_name: positive-sentiment
141
- signal_name: concept_score
142
- - path: question
143
- signal:
144
- embedding: gte-small
145
- namespace: lilac
146
- concept_name: non-english
147
- signal_name: concept_score
148
- - path: question
149
- signal:
150
- embedding: gte-small
151
- namespace: lilac
152
- concept_name: toxicity
153
- signal_name: concept_score
154
- - path: question
155
- signal:
156
- embedding: gte-small
157
- namespace: lilac
158
- concept_name: question
159
- signal_name: concept_score
160
- - path: question
161
- signal:
162
- embedding: gte-small
163
- namespace: lilac
164
- concept_name: legal-termination
165
- signal_name: concept_score
166
- - path: question
167
- signal:
168
- embedding: gte-small
169
- namespace: lilac
170
- concept_name: source-code
171
- signal_name: concept_score
172
- - path: question
173
- signal:
174
- embedding: gte-small
175
- namespace: lilac
176
- concept_name: negative-sentiment
177
- signal_name: concept_score
178
- - path: question
179
- signal:
180
- embedding: gte-small
181
- namespace: lilac
182
- concept_name: profanity
183
- signal_name: concept_score
184
- - path: question
185
- signal:
186
- signal_name: text_statistics
187
- - path: response
188
- signal:
189
- signal_name: near_dup
190
- - path: response
191
- signal:
192
- signal_name: pii
193
- - path: response
194
- signal:
195
- signal_name: lang_detection
196
- - path: response
197
- signal:
198
- embedding: gte-small
199
- namespace: lilac
200
- concept_name: positive-sentiment
201
- signal_name: concept_score
202
- - path: response
203
- signal:
204
- embedding: gte-small
205
- namespace: lilac
206
- concept_name: non-english
207
- signal_name: concept_score
208
- - path: response
209
- signal:
210
- embedding: gte-small
211
- namespace: lilac
212
- concept_name: toxicity
213
- signal_name: concept_score
214
- - path: response
215
- signal:
216
- embedding: gte-small
217
- namespace: lilac
218
- concept_name: question
219
- signal_name: concept_score
220
- - path: response
221
- signal:
222
- embedding: gte-small
223
- namespace: lilac
224
- concept_name: legal-termination
225
- signal_name: concept_score
226
- - path: response
227
- signal:
228
- embedding: gte-small
229
- namespace: lilac
230
- concept_name: source-code
231
- signal_name: concept_score
232
- - path: response
233
- signal:
234
- embedding: gte-small
235
- namespace: lilac
236
- concept_name: negative-sentiment
237
- signal_name: concept_score
238
- - path: response
239
- signal:
240
- embedding: gte-small
241
- namespace: lilac
242
- concept_name: profanity
243
- signal_name: concept_score
244
- - path: response
245
- signal:
246
- signal_name: text_statistics
247
- - path: system_prompt
248
- signal:
249
- signal_name: pii
250
- settings:
251
- ui:
252
- media_paths:
253
- - question
254
- - response
255
- markdown_paths: []
256
- - namespace: local
257
- name: the_movies_dataset
258
- source:
259
- filepaths:
260
- - gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
261
- names: []
262
- source_name: csv
263
- settings:
264
- ui:
265
- media_paths:
266
- - overview
267
- markdown_paths: []
268
- - namespace: local
269
- name: glue_ax_parquet
270
- source:
271
- filepaths:
272
- - gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet
273
- source_name: parquet
274
- settings:
275
- ui:
276
- media_paths:
277
- - premise
278
- markdown_paths: []
279
- - namespace: lilac
280
- name: mmlu_professional_law
281
- source:
282
- dataset_name: cais/mmlu
283
- config_name: professional_law
284
- source_name: huggingface
285
- embeddings:
286
- - path: question
287
- embedding: gte-small
288
- - path:
289
- - choices
290
- - '*'
291
- embedding: gte-small
292
- signals:
293
- - path: question
294
- signal:
295
- signal_name: near_dup
296
- - path: question
297
- signal:
298
- signal_name: pii
299
- - path: question
300
- signal:
301
- signal_name: lang_detection
302
- - path: question
303
- signal:
304
- embedding: gte-small
305
- namespace: lilac
306
- concept_name: positive-sentiment
307
- signal_name: concept_score
308
- - path: question
309
- signal:
310
- embedding: gte-small
311
- namespace: lilac
312
- concept_name: non-english
313
- signal_name: concept_score
314
- - path: question
315
- signal:
316
- embedding: gte-small
317
- namespace: lilac
318
- concept_name: toxicity
319
- signal_name: concept_score
320
- - path: question
321
- signal:
322
- embedding: gte-small
323
- namespace: lilac
324
- concept_name: question
325
- signal_name: concept_score
326
- - path: question
327
- signal:
328
- embedding: gte-small
329
- namespace: lilac
330
- concept_name: legal-termination
331
- signal_name: concept_score
332
- - path: question
333
- signal:
334
- embedding: gte-small
335
- namespace: lilac
336
- concept_name: source-code
337
- signal_name: concept_score
338
- - path: question
339
- signal:
340
- embedding: gte-small
341
- namespace: lilac
342
- concept_name: negative-sentiment
343
- signal_name: concept_score
344
- - path: question
345
- signal:
346
- embedding: gte-small
347
- namespace: lilac
348
- concept_name: profanity
349
- signal_name: concept_score
350
- - path: question
351
- signal:
352
- signal_name: text_statistics
353
- - path:
354
- - choices
355
- - '*'
356
- signal:
357
- signal_name: near_dup
358
- - path:
359
- - choices
360
- - '*'
361
- signal:
362
- signal_name: pii
363
- - path:
364
- - choices
365
- - '*'
366
- signal:
367
- signal_name: lang_detection
368
- - path:
369
- - choices
370
- - '*'
371
- signal:
372
- embedding: gte-small
373
- namespace: lilac
374
- concept_name: positive-sentiment
375
- signal_name: concept_score
376
- - path:
377
- - choices
378
- - '*'
379
- signal:
380
- embedding: gte-small
381
- namespace: lilac
382
- concept_name: non-english
383
- signal_name: concept_score
384
- - path:
385
- - choices
386
- - '*'
387
- signal:
388
- embedding: gte-small
389
- namespace: lilac
390
- concept_name: toxicity
391
- signal_name: concept_score
392
- - path:
393
- - choices
394
- - '*'
395
- signal:
396
- embedding: gte-small
397
- namespace: lilac
398
- concept_name: question
399
- signal_name: concept_score
400
- - path:
401
- - choices
402
- - '*'
403
- signal:
404
- embedding: gte-small
405
- namespace: lilac
406
- concept_name: legal-termination
407
- signal_name: concept_score
408
- - path:
409
- - choices
410
- - '*'
411
- signal:
412
- embedding: gte-small
413
- namespace: lilac
414
- concept_name: source-code
415
- signal_name: concept_score
416
- - path:
417
- - choices
418
- - '*'
419
- signal:
420
- embedding: gte-small
421
- namespace: lilac
422
- concept_name: negative-sentiment
423
- signal_name: concept_score
424
- - path:
425
- - choices
426
- - '*'
427
- signal:
428
- embedding: gte-small
429
- namespace: lilac
430
- concept_name: profanity
431
- signal_name: concept_score
432
- - path:
433
- - choices
434
- - '*'
435
- signal:
436
- signal_name: text_statistics
437
- settings:
438
- ui:
439
- media_paths:
440
- - question
441
- - - choices
442
- - '*'
443
- markdown_paths: []
444
- preferred_embedding: gte-small
445
- - namespace: local
446
- name: deepset-prompt-inj
447
- source:
448
- dataset_name: deepset/prompt-injections
449
- source_name: huggingface
450
- embeddings:
451
- - path: text
452
- embedding: gte-small
453
- settings:
454
- ui:
455
- media_paths:
456
- - text
457
- markdown_paths: []
458
- - namespace: local
459
- name: jasper-prompt-inj
460
- source:
461
- dataset_name: JasperLS/prompt-injections
462
- source_name: huggingface
463
- embeddings:
464
- - path: text
465
- embedding: gte-small
466
- settings:
467
- ui:
468
- media_paths:
469
- - text
470
- markdown_paths: []
471
- - namespace: local
472
- name: mosaic-chat-v2
473
- source:
474
- dataset_name: sam-mosaic/chat-v2
475
- source_name: huggingface
476
- embeddings:
477
- - path: prompt
478
- embedding: gte-small
479
- - path: response
480
- embedding: gte-small
481
- signals:
482
- - path: prompt
483
- signal:
484
- signal_name: near_dup
485
- - path: prompt
486
- signal:
487
- signal_name: pii
488
- - path: prompt
489
- signal:
490
- signal_name: lang_detection
491
- - path: prompt
492
- signal:
493
- embedding: gte-small
494
- namespace: lilac
495
- concept_name: non-english
496
- signal_name: concept_score
497
- - path: prompt
498
- signal:
499
- embedding: gte-small
500
- namespace: lilac
501
- concept_name: toxicity
502
- signal_name: concept_score
503
- - path: prompt
504
- signal:
505
- embedding: gte-small
506
- namespace: lilac
507
- concept_name: source-code
508
- signal_name: concept_score
509
- - path: prompt
510
- signal:
511
- embedding: gte-small
512
- namespace: lilac
513
- concept_name: negative-sentiment
514
- signal_name: concept_score
515
- - path: prompt
516
- signal:
517
- embedding: gte-small
518
- namespace: lilac
519
- concept_name: profanity
520
- signal_name: concept_score
521
- - path: prompt
522
- signal:
523
- signal_name: text_statistics
524
- - path: response
525
- signal:
526
- signal_name: near_dup
527
- - path: response
528
- signal:
529
- signal_name: pii
530
- - path: response
531
- signal:
532
- signal_name: lang_detection
533
- - path: response
534
- signal:
535
- embedding: gte-small
536
- namespace: lilac
537
- concept_name: non-english
538
- signal_name: concept_score
539
- - path: response
540
- signal:
541
- embedding: gte-small
542
- namespace: lilac
543
- concept_name: toxicity
544
- signal_name: concept_score
545
- - path: response
546
- signal:
547
- embedding: gte-small
548
- namespace: lilac
549
- concept_name: source-code
550
- signal_name: concept_score
551
- - path: response
552
- signal:
553
- embedding: gte-small
554
- namespace: lilac
555
- concept_name: negative-sentiment
556
- signal_name: concept_score
557
- - path: response
558
- signal:
559
- embedding: gte-small
560
- namespace: lilac
561
- concept_name: profanity
562
- signal_name: concept_score
563
- - path: response
564
- signal:
565
- signal_name: text_statistics
566
- settings:
567
- ui:
568
- media_paths:
569
- - prompt
570
- - response
571
- markdown_paths: []
572
- preferred_embedding: gte-small
573
- - namespace: local
574
- name: databricks-dolly-15k-curated-en
575
- source:
576
- dataset_name: argilla/databricks-dolly-15k-curated-en
577
- source_name: huggingface
578
- embeddings:
579
- - path: original-context
580
- embedding: gte-small
581
- - path:
582
- - new-context
583
- - value
584
- - '*'
585
- embedding: gte-small
586
- - path: original-instruction
587
- embedding: gte-small
588
- signals:
589
- - path: original-instruction
590
- signal:
591
- signal_name: near_dup
592
- - path: original-instruction
593
- signal:
594
- signal_name: pii
595
- - path: original-instruction
596
- signal:
597
- signal_name: lang_detection
598
- - path: original-instruction
599
- signal:
600
- signal_name: text_statistics
601
- - path: original-context
602
- signal:
603
- signal_name: near_dup
604
- - path: original-context
605
- signal:
606
- signal_name: pii
607
- - path: original-context
608
- signal:
609
- signal_name: lang_detection
610
- - path: original-context
611
- signal:
612
- embedding: gte-small
613
- namespace: lilac
614
- concept_name: positive-sentiment
615
- signal_name: concept_score
616
- - path: original-context
617
- signal:
618
- embedding: gte-small
619
- namespace: lilac
620
- concept_name: non-english
621
- signal_name: concept_score
622
- - path: original-context
623
- signal:
624
- embedding: gte-small
625
- namespace: lilac
626
- concept_name: toxicity
627
- signal_name: concept_score
628
- - path: original-context
629
- signal:
630
- embedding: gte-small
631
- namespace: lilac
632
- concept_name: question
633
- signal_name: concept_score
634
- - path: original-context
635
- signal:
636
- embedding: gte-small
637
- namespace: lilac
638
- concept_name: legal-termination
639
- signal_name: concept_score
640
- - path: original-context
641
- signal:
642
- embedding: gte-small
643
- namespace: lilac
644
- concept_name: source-code
645
- signal_name: concept_score
646
- - path: original-context
647
- signal:
648
- embedding: gte-small
649
- namespace: lilac
650
- concept_name: negative-sentiment
651
- signal_name: concept_score
652
- - path: original-context
653
- signal:
654
- embedding: gte-small
655
- namespace: lilac
656
- concept_name: profanity
657
- signal_name: concept_score
658
- - path: original-context
659
- signal:
660
- signal_name: text_statistics
661
- - path: original-response
662
- signal:
663
- signal_name: near_dup
664
- - path: original-response
665
- signal:
666
- signal_name: pii
667
- - path: original-response
668
- signal:
669
- signal_name: lang_detection
670
- - path: original-response
671
- signal:
672
- signal_name: text_statistics
673
- - path:
674
- - new-instruction
675
- - value
676
- - '*'
677
- signal:
678
- signal_name: near_dup
679
- - path:
680
- - new-instruction
681
- - value
682
- - '*'
683
- signal:
684
- signal_name: pii
685
- - path:
686
- - new-instruction
687
- - value
688
- - '*'
689
- signal:
690
- signal_name: lang_detection
691
- - path:
692
- - new-instruction
693
- - value
694
- - '*'
695
- signal:
696
- signal_name: text_statistics
697
- - path:
698
- - new-context
699
- - value
700
- - '*'
701
- signal:
702
- signal_name: near_dup
703
- - path:
704
- - new-context
705
- - value
706
- - '*'
707
- signal:
708
- signal_name: pii
709
- - path:
710
- - new-context
711
- - value
712
- - '*'
713
- signal:
714
- signal_name: lang_detection
715
- - path:
716
- - new-context
717
- - value
718
- - '*'
719
- signal:
720
- embedding: gte-small
721
- namespace: lilac
722
- concept_name: positive-sentiment
723
- signal_name: concept_score
724
- - path:
725
- - new-context
726
- - value
727
- - '*'
728
- signal:
729
- embedding: gte-small
730
- namespace: lilac
731
- concept_name: non-english
732
- signal_name: concept_score
733
- - path:
734
- - new-context
735
- - value
736
- - '*'
737
- signal:
738
- embedding: gte-small
739
- namespace: lilac
740
- concept_name: toxicity
741
- signal_name: concept_score
742
- - path:
743
- - new-context
744
- - value
745
- - '*'
746
- signal:
747
- embedding: gte-small
748
- namespace: lilac
749
- concept_name: question
750
- signal_name: concept_score
751
- - path:
752
- - new-context
753
- - value
754
- - '*'
755
- signal:
756
- embedding: gte-small
757
- namespace: lilac
758
- concept_name: legal-termination
759
- signal_name: concept_score
760
- - path:
761
- - new-context
762
- - value
763
- - '*'
764
- signal:
765
- embedding: gte-small
766
- namespace: lilac
767
- concept_name: source-code
768
- signal_name: concept_score
769
- - path:
770
- - new-context
771
- - value
772
- - '*'
773
- signal:
774
- embedding: gte-small
775
- namespace: lilac
776
- concept_name: negative-sentiment
777
- signal_name: concept_score
778
- - path:
779
- - new-context
780
- - value
781
- - '*'
782
- signal:
783
- embedding: gte-small
784
- namespace: lilac
785
- concept_name: profanity
786
- signal_name: concept_score
787
- - path:
788
- - new-context
789
- - value
790
- - '*'
791
- signal:
792
- signal_name: text_statistics
793
- - path:
794
- - new-response
795
- - value
796
- - '*'
797
- signal:
798
- signal_name: near_dup
799
- - path:
800
- - new-response
801
- - value
802
- - '*'
803
- signal:
804
- signal_name: pii
805
- - path:
806
- - new-response
807
- - value
808
- - '*'
809
- signal:
810
- signal_name: lang_detection
811
- - path:
812
- - new-response
813
- - value
814
- - '*'
815
- signal:
816
- signal_name: text_statistics
817
- - path: original-instruction
818
- signal:
819
- signal_name: spacy_ner
820
- settings:
821
- ui:
822
- media_paths:
823
- - original-instruction
824
- - original-context
825
- - original-response
826
- - - new-instruction
827
- - value
828
- - '*'
829
- - - new-context
830
- - value
831
- - '*'
832
- - - new-response
833
- - value
834
- - '*'
835
- markdown_paths: []
836
- preferred_embedding: gte-small
837
- - namespace: local
838
- name: open-asssistant-conversations
839
- source:
840
- dataset_name: OpenAssistant/oasst1
841
- source_name: huggingface
842
- embeddings:
843
- - path: text
844
- embedding: gte-small
845
- signals:
846
- - path: text
847
- signal:
848
- signal_name: near_dup
849
- - path: text
850
- signal:
851
- signal_name: pii
852
- - path: text
853
- signal:
854
- signal_name: lang_detection
855
- - path: text
856
- signal:
857
- embedding: gte-small
858
- namespace: lilac
859
- concept_name: positive-sentiment
860
- signal_name: concept_score
861
- - path: text
862
- signal:
863
- embedding: gte-small
864
- namespace: lilac
865
- concept_name: non-english
866
- signal_name: concept_score
867
- - path: text
868
- signal:
869
- embedding: gte-small
870
- namespace: lilac
871
- concept_name: toxicity
872
- signal_name: concept_score
873
- - path: text
874
- signal:
875
- embedding: gte-small
876
- namespace: lilac
877
- concept_name: question
878
- signal_name: concept_score
879
- - path: text
880
- signal:
881
- embedding: gte-small
882
- namespace: lilac
883
- concept_name: legal-termination
884
- signal_name: concept_score
885
- - path: text
886
- signal:
887
- embedding: gte-small
888
- namespace: lilac
889
- concept_name: source-code
890
- signal_name: concept_score
891
- - path: text
892
- signal:
893
- embedding: gte-small
894
- namespace: lilac
895
- concept_name: negative-sentiment
896
- signal_name: concept_score
897
- - path: text
898
- signal:
899
- embedding: gte-small
900
- namespace: lilac
901
- concept_name: negative-sentiment
902
- signal_name: concept_score
903
- - path: text
904
- signal:
905
- embedding: gte-small
906
- namespace: lilac
907
- concept_name: profanity
908
- signal_name: concept_score
909
- - path: text
910
- signal:
911
- signal_name: text_statistics
912
- settings:
913
- ui:
914
- media_paths:
915
- - text
916
- markdown_paths: []
917
- preferred_embedding: gte-small
918
- - namespace: local
919
- name: enron-emails
920
- source:
921
- dataset_name: EleutherAI/pile
922
- config_name: enron_emails
923
- sample_size: 100000
924
- source_name: huggingface
925
- embeddings:
926
- - path: text
927
- embedding: gte-small
928
- signals:
929
- - path: text
930
- signal:
931
- signal_name: near_dup
932
- - path: text
933
- signal:
934
- signal_name: pii
935
- - path: text
936
- signal:
937
- signal_name: lang_detection
938
- - path: text
939
- signal:
940
- embedding: gte-small
941
- namespace: lilac
942
- concept_name: positive-sentiment
943
- signal_name: concept_score
944
- - path: text
945
- signal:
946
- embedding: gte-small
947
- namespace: lilac
948
- concept_name: non-english
949
- signal_name: concept_score
950
- - path: text
951
- signal:
952
- embedding: gte-small
953
- namespace: lilac
954
- concept_name: toxicity
955
- signal_name: concept_score
956
- - path: text
957
- signal:
958
- embedding: gte-small
959
- namespace: lilac
960
- concept_name: question
961
- signal_name: concept_score
962
- - path: text
963
- signal:
964
- embedding: gte-small
965
- namespace: lilac
966
- concept_name: legal-termination
967
- signal_name: concept_score
968
- - path: text
969
- signal:
970
- embedding: gte-small
971
- namespace: lilac
972
- concept_name: source-code
973
- signal_name: concept_score
974
- - path: text
975
- signal:
976
- embedding: gte-small
977
- namespace: lilac
978
- concept_name: negative-sentiment
979
- signal_name: concept_score
980
- - path: text
981
- signal:
982
- embedding: gte-small
983
- namespace: lilac
984
- concept_name: profanity
985
- signal_name: concept_score
986
- - path: text
987
- signal:
988
- signal_name: text_statistics
989
- settings:
990
- ui:
991
- media_paths:
992
- - text
993
- markdown_paths: []
994
- preferred_embedding: gte-small
995
- - namespace: local
996
- name: OpenOrca
997
- source:
998
- dataset_name: Open-Orca/OpenOrca
999
- source_name: huggingface
1000
- embeddings:
1001
- - path: question
1002
- embedding: gte-small
1003
- - path: response
1004
- embedding: gte-small
1005
- settings:
1006
- ui:
1007
- media_paths:
1008
- - question
1009
- - response
1010
- markdown_paths: []
1011
- - namespace: local
1012
- name: langsmith-finetuning-rag
1013
- source:
1014
- filepaths:
1015
- - https://storage.googleapis.com/lilac-data/datasets/langsmith-finetuning-rag/rag.jsonl
1016
- source_name: json
1017
- settings:
1018
- ui:
1019
- media_paths:
1020
- - - inputs
1021
- - question
1022
- - - outputs
1023
- - output
1024
- markdown_paths: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dist/README.md DELETED
@@ -1,2 +0,0 @@
1
- This directory is used for locally built whl files.
2
- We write a README.md to ensure an empty folder is uploaded when there is no whl.
 
 
 
dist/lilac-0.1.3-py3-none-any.whl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8263c29c0b61f57530cb419f858282c0c3dcc8b037f6634cb084edbd4ba0ae63
3
- size 1170417
 
 
 
 
docker_start.py DELETED
@@ -1,110 +0,0 @@
1
- """Startup work before running the web server."""
2
-
3
- import os
4
- import shutil
5
- from typing import TypedDict
6
-
7
- import yaml
8
- from huggingface_hub import scan_cache_dir, snapshot_download
9
-
10
- from lilac.concepts.db_concept import DiskConceptDB, get_concept_output_dir
11
- from lilac.env import env, get_project_dir
12
- from lilac.project import PROJECT_CONFIG_FILENAME
13
- from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
14
-
15
-
16
- def delete_old_files() -> None:
17
- """Delete old files from the cache."""
18
- # Scan cache
19
- try:
20
- scan = scan_cache_dir()
21
- except BaseException:
22
- # Cache was not found.
23
- return
24
-
25
- # Select revisions to delete
26
- to_delete = []
27
- for repo in scan.repos:
28
- latest_revision = max(repo.revisions, key=lambda x: x.last_modified)
29
- to_delete.extend(
30
- [revision.commit_hash for revision in repo.revisions if revision != latest_revision])
31
- strategy = scan.delete_revisions(*to_delete)
32
-
33
- # Delete them
34
- log(f'Will delete {len(to_delete)} old revisions and save {strategy.expected_freed_size_str}')
35
- strategy.execute()
36
-
37
-
38
- class HfSpaceConfig(TypedDict):
39
- """The huggingface space config, defined in README.md.
40
-
41
- See:
42
- https://huggingface.co/docs/hub/spaces-config-reference
43
- """
44
- title: str
45
- datasets: list[str]
46
-
47
-
48
- def main() -> None:
49
- """Download dataset files from the HF space that was uploaded before building the image."""
50
- # SPACE_ID is the HuggingFace Space ID environment variable that is automatically set by HF.
51
- repo_id = env('SPACE_ID', None)
52
- if not repo_id:
53
- return
54
-
55
- delete_old_files()
56
-
57
- with open(os.path.abspath('README.md')) as f:
58
- # Strip the '---' for the huggingface readme config.
59
- readme = f.read().strip().strip('---')
60
- hf_config: HfSpaceConfig = yaml.safe_load(readme)
61
-
62
- # Download the huggingface space data. This includes code and datasets, so we move the datasets
63
- # alone to the data directory.
64
-
65
- datasets_dir = get_datasets_dir(get_project_dir())
66
- os.makedirs(datasets_dir, exist_ok=True)
67
- for lilac_hf_dataset in hf_config['datasets']:
68
- print('Downloading dataset from HuggingFace: ', lilac_hf_dataset)
69
- snapshot_download(
70
- repo_id=lilac_hf_dataset,
71
- repo_type='dataset',
72
- token=env('HF_ACCESS_TOKEN'),
73
- local_dir=datasets_dir,
74
- ignore_patterns=['.gitattributes', 'README.md'])
75
-
76
- snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
77
-
78
- spaces_data_dir = os.path.join(snapshot_dir, 'data')
79
- # Copy the config file.
80
- project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
81
- if os.path.exists(project_config_file):
82
- shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
83
-
84
- # Delete cache files from persistent storage.
85
- cache_dir = get_lilac_cache_dir(get_project_dir())
86
- if os.path.exists(cache_dir):
87
- shutil.rmtree(cache_dir)
88
-
89
- # Copy cache files from the space if they exist.
90
- spaces_cache_dir = get_lilac_cache_dir(spaces_data_dir)
91
- if os.path.exists(spaces_cache_dir):
92
- shutil.copytree(spaces_cache_dir, cache_dir)
93
-
94
- # Copy concepts.
95
- concepts = DiskConceptDB(spaces_data_dir).list()
96
- for concept in concepts:
97
- # Ignore lilac concepts, they're already part of the source code.
98
- if concept.namespace == 'lilac':
99
- continue
100
- spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
101
- concept.name)
102
- persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
103
- concept.name)
104
- shutil.rmtree(persistent_output_dir, ignore_errors=True)
105
- shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
106
- shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
107
-
108
-
109
- if __name__ == '__main__':
110
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docker_start.sh CHANGED
@@ -3,7 +3,7 @@
3
  # Fail if any of the commands below fail.
4
  set -e
5
 
6
- python docker_start.py
7
  gunicorn lilac.server:app \
8
  --bind 0.0.0.0:5432 \
9
  --preload -k uvicorn.workers.UvicornWorker \
 
3
  # Fail if any of the commands below fail.
4
  set -e
5
 
6
+ lilac hf-docker-start
7
  gunicorn lilac.server:app \
8
  --bind 0.0.0.0:5432 \
9
  --preload -k uvicorn.workers.UvicornWorker \