Spaces:
Runtime error
Runtime error
Push
Browse files- .env +6 -1
- .env.demo +1 -1
- .gitattributes +0 -3
- data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/non-english/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
- data/.cache/lilac/concept/lilac/question/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/source-code/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
- data/.cache/lilac/concept/local/aliens/gte-small.pkl +0 -0
- data/lilac.yml +165 -0
- dist/README.md +0 -2
- dist/lilac-0.0.19-py3-none-any.whl +0 -3
- docker_start.py +7 -6
.env
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
# To overwrite these variables, create a .env.local file
|
2 |
|
3 |
# The path to the project directory. When used, this will be the global project directory for lilac.
|
4 |
-
# When not defined, define the project
|
|
|
|
|
5 |
# LILAC_DATA_PATH=./data
|
6 |
|
7 |
# Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
|
@@ -43,3 +45,6 @@ GOOGLE_CLIENT_ID='279475920249-i8llm8vbos1vj5m1qocir8narb3r0enu.apps.googleuserc
|
|
43 |
# LangSmith source setup.
|
44 |
# LANGCHAIN_API_KEY=
|
45 |
LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
|
|
|
|
|
|
|
|
1 |
# To overwrite these variables, create a .env.local file
|
2 |
|
3 |
# The path to the project directory. When used, this will be the global project directory for lilac.
|
4 |
+
# When not defined, define the project directory with `lilac start ./data`.
|
5 |
+
# LILAC_PROJECT_DIR=./data
|
6 |
+
# NOTE: This is deprecated in favor of LILAC_PROJECT_DIR.
|
7 |
# LILAC_DATA_PATH=./data
|
8 |
|
9 |
# Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
|
|
|
45 |
# LangSmith source setup.
|
46 |
# LANGCHAIN_API_KEY=
|
47 |
LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
|
48 |
+
|
49 |
+
# Firebase deployment token.
|
50 |
+
# FIREBASE_TOKEN=
|
.env.demo
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
HF_HOME=/data/.huggingface
|
3 |
TRANSFORMERS_CACHE=/data/.cache
|
4 |
XDG_CACHE_HOME=/data/.cache
|
|
|
1 |
+
LILAC_PROJECT_DIR='/data'
|
2 |
HF_HOME=/data/.huggingface
|
3 |
TRANSFORMERS_CACHE=/data/.cache
|
4 |
XDG_CACHE_HOME=/data/.cache
|
.gitattributes
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
dist/lilac-0.0.19-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
2 |
-
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
|
3 |
-
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl
DELETED
Binary file (10.8 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl
DELETED
Binary file (21.7 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl
DELETED
Binary file (21.7 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl
DELETED
Binary file (21.8 kB)
|
|
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl
DELETED
Binary file (60.6 kB)
|
|
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl
DELETED
Binary file (202 kB)
|
|
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl
DELETED
Binary file (331 kB)
|
|
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl
DELETED
Binary file (180 kB)
|
|
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ed7340614b1dea910ddeb26bbda0167b1f4fe2479071a62a70b63c18bc6232d0
|
3 |
-
size 1672960
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/question/gte-small.pkl
DELETED
Binary file (611 kB)
|
|
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl
DELETED
Binary file (147 kB)
|
|
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f2af2736f3d749391a431f9c24d3fc78cf8e58457cc4f0d1ce770185b92d879c
|
3 |
-
size 1886446
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/local/aliens/gte-small.pkl
DELETED
Binary file (28.5 kB)
|
|
data/lilac.yml
CHANGED
@@ -258,3 +258,168 @@ datasets:
|
|
258 |
ui:
|
259 |
media_paths:
|
260 |
- premise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
ui:
|
259 |
media_paths:
|
260 |
- premise
|
261 |
+
- namespace: lilac
|
262 |
+
name: mmlu_professional_law
|
263 |
+
source:
|
264 |
+
dataset_name: cais/mmlu
|
265 |
+
config_name: professional_law
|
266 |
+
source_name: huggingface
|
267 |
+
embeddings:
|
268 |
+
- path: question
|
269 |
+
embedding: gte-small
|
270 |
+
- path:
|
271 |
+
- choices
|
272 |
+
- '*'
|
273 |
+
embedding: gte-small
|
274 |
+
signals:
|
275 |
+
- path: question
|
276 |
+
signal:
|
277 |
+
signal_name: near_dup
|
278 |
+
- path: question
|
279 |
+
signal:
|
280 |
+
signal_name: pii
|
281 |
+
- path: question
|
282 |
+
signal:
|
283 |
+
signal_name: lang_detection
|
284 |
+
- path: question
|
285 |
+
signal:
|
286 |
+
embedding: gte-small
|
287 |
+
namespace: lilac
|
288 |
+
concept_name: positive-sentiment
|
289 |
+
signal_name: concept_score
|
290 |
+
- path: question
|
291 |
+
signal:
|
292 |
+
embedding: gte-small
|
293 |
+
namespace: lilac
|
294 |
+
concept_name: non-english
|
295 |
+
signal_name: concept_score
|
296 |
+
- path: question
|
297 |
+
signal:
|
298 |
+
embedding: gte-small
|
299 |
+
namespace: lilac
|
300 |
+
concept_name: toxicity
|
301 |
+
signal_name: concept_score
|
302 |
+
- path: question
|
303 |
+
signal:
|
304 |
+
embedding: gte-small
|
305 |
+
namespace: lilac
|
306 |
+
concept_name: question
|
307 |
+
signal_name: concept_score
|
308 |
+
- path: question
|
309 |
+
signal:
|
310 |
+
embedding: gte-small
|
311 |
+
namespace: lilac
|
312 |
+
concept_name: legal-termination
|
313 |
+
signal_name: concept_score
|
314 |
+
- path: question
|
315 |
+
signal:
|
316 |
+
embedding: gte-small
|
317 |
+
namespace: lilac
|
318 |
+
concept_name: source-code
|
319 |
+
signal_name: concept_score
|
320 |
+
- path: question
|
321 |
+
signal:
|
322 |
+
embedding: gte-small
|
323 |
+
namespace: lilac
|
324 |
+
concept_name: negative-sentiment
|
325 |
+
signal_name: concept_score
|
326 |
+
- path: question
|
327 |
+
signal:
|
328 |
+
embedding: gte-small
|
329 |
+
namespace: lilac
|
330 |
+
concept_name: profanity
|
331 |
+
signal_name: concept_score
|
332 |
+
- path: question
|
333 |
+
signal:
|
334 |
+
signal_name: text_statistics
|
335 |
+
- path:
|
336 |
+
- choices
|
337 |
+
- '*'
|
338 |
+
signal:
|
339 |
+
signal_name: near_dup
|
340 |
+
- path:
|
341 |
+
- choices
|
342 |
+
- '*'
|
343 |
+
signal:
|
344 |
+
signal_name: pii
|
345 |
+
- path:
|
346 |
+
- choices
|
347 |
+
- '*'
|
348 |
+
signal:
|
349 |
+
signal_name: lang_detection
|
350 |
+
- path:
|
351 |
+
- choices
|
352 |
+
- '*'
|
353 |
+
signal:
|
354 |
+
embedding: gte-small
|
355 |
+
namespace: lilac
|
356 |
+
concept_name: positive-sentiment
|
357 |
+
signal_name: concept_score
|
358 |
+
- path:
|
359 |
+
- choices
|
360 |
+
- '*'
|
361 |
+
signal:
|
362 |
+
embedding: gte-small
|
363 |
+
namespace: lilac
|
364 |
+
concept_name: non-english
|
365 |
+
signal_name: concept_score
|
366 |
+
- path:
|
367 |
+
- choices
|
368 |
+
- '*'
|
369 |
+
signal:
|
370 |
+
embedding: gte-small
|
371 |
+
namespace: lilac
|
372 |
+
concept_name: toxicity
|
373 |
+
signal_name: concept_score
|
374 |
+
- path:
|
375 |
+
- choices
|
376 |
+
- '*'
|
377 |
+
signal:
|
378 |
+
embedding: gte-small
|
379 |
+
namespace: lilac
|
380 |
+
concept_name: question
|
381 |
+
signal_name: concept_score
|
382 |
+
- path:
|
383 |
+
- choices
|
384 |
+
- '*'
|
385 |
+
signal:
|
386 |
+
embedding: gte-small
|
387 |
+
namespace: lilac
|
388 |
+
concept_name: legal-termination
|
389 |
+
signal_name: concept_score
|
390 |
+
- path:
|
391 |
+
- choices
|
392 |
+
- '*'
|
393 |
+
signal:
|
394 |
+
embedding: gte-small
|
395 |
+
namespace: lilac
|
396 |
+
concept_name: source-code
|
397 |
+
signal_name: concept_score
|
398 |
+
- path:
|
399 |
+
- choices
|
400 |
+
- '*'
|
401 |
+
signal:
|
402 |
+
embedding: gte-small
|
403 |
+
namespace: lilac
|
404 |
+
concept_name: negative-sentiment
|
405 |
+
signal_name: concept_score
|
406 |
+
- path:
|
407 |
+
- choices
|
408 |
+
- '*'
|
409 |
+
signal:
|
410 |
+
embedding: gte-small
|
411 |
+
namespace: lilac
|
412 |
+
concept_name: profanity
|
413 |
+
signal_name: concept_score
|
414 |
+
- path:
|
415 |
+
- choices
|
416 |
+
- '*'
|
417 |
+
signal:
|
418 |
+
signal_name: text_statistics
|
419 |
+
settings:
|
420 |
+
ui:
|
421 |
+
media_paths:
|
422 |
+
- question
|
423 |
+
- - choices
|
424 |
+
- '*'
|
425 |
+
preferred_embedding: gte-small
|
dist/README.md
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
This directory is used for locally built whl files.
|
2 |
-
We write a README.md to ensure an empty folder is uploaded when there is no whl.
|
|
|
|
|
|
dist/lilac-0.0.19-py3-none-any.whl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:09c9663b9e8ad2e05eea12ecf1324ad990c0e410454b5ea35afd687c33fff60f
|
3 |
-
size 1142074
|
|
|
|
|
|
|
|
docker_start.py
CHANGED
@@ -8,7 +8,7 @@ import yaml
|
|
8 |
from huggingface_hub import scan_cache_dir, snapshot_download
|
9 |
|
10 |
from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
|
11 |
-
from lilac.env import
|
12 |
from lilac.project import PROJECT_CONFIG_FILENAME
|
13 |
from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
|
14 |
|
@@ -67,7 +67,7 @@ def main() -> None:
|
|
67 |
repo_id=lilac_hf_dataset,
|
68 |
repo_type='dataset',
|
69 |
token=env('HF_ACCESS_TOKEN'),
|
70 |
-
local_dir=get_datasets_dir(
|
71 |
ignore_patterns=['.gitattributes', 'README.md'])
|
72 |
|
73 |
snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
|
@@ -76,16 +76,16 @@ def main() -> None:
|
|
76 |
# Copy the config file.
|
77 |
project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
|
78 |
if os.path.exists(project_config_file):
|
79 |
-
shutil.copy(project_config_file, os.path.join(
|
80 |
|
81 |
# Delete cache files from persistent storage.
|
82 |
-
cache_dir = get_lilac_cache_dir(
|
83 |
if os.path.exists(cache_dir):
|
84 |
shutil.rmtree(cache_dir)
|
85 |
|
86 |
# NOTE: This is temporary during the move of concepts into the pip package. Once all the demos
|
87 |
# have been updated, this block can be deleted.
|
88 |
-
old_lilac_concepts_data_dir = os.path.join(
|
89 |
if os.path.exists(old_lilac_concepts_data_dir):
|
90 |
shutil.rmtree(old_lilac_concepts_data_dir)
|
91 |
|
@@ -102,7 +102,8 @@ def main() -> None:
|
|
102 |
continue
|
103 |
spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
|
104 |
concept.name)
|
105 |
-
persistent_output_dir = get_concept_output_dir(
|
|
|
106 |
shutil.rmtree(persistent_output_dir, ignore_errors=True)
|
107 |
shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
|
108 |
shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
|
|
|
8 |
from huggingface_hub import scan_cache_dir, snapshot_download
|
9 |
|
10 |
from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
|
11 |
+
from lilac.env import env, get_project_dir
|
12 |
from lilac.project import PROJECT_CONFIG_FILENAME
|
13 |
from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
|
14 |
|
|
|
67 |
repo_id=lilac_hf_dataset,
|
68 |
repo_type='dataset',
|
69 |
token=env('HF_ACCESS_TOKEN'),
|
70 |
+
local_dir=get_datasets_dir(get_project_dir()),
|
71 |
ignore_patterns=['.gitattributes', 'README.md'])
|
72 |
|
73 |
snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
|
|
|
76 |
# Copy the config file.
|
77 |
project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
|
78 |
if os.path.exists(project_config_file):
|
79 |
+
shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
|
80 |
|
81 |
# Delete cache files from persistent storage.
|
82 |
+
cache_dir = get_lilac_cache_dir(get_project_dir())
|
83 |
if os.path.exists(cache_dir):
|
84 |
shutil.rmtree(cache_dir)
|
85 |
|
86 |
# NOTE: This is temporary during the move of concepts into the pip package. Once all the demos
|
87 |
# have been updated, this block can be deleted.
|
88 |
+
old_lilac_concepts_data_dir = os.path.join(get_project_dir(), CONCEPTS_DIR, 'lilac')
|
89 |
if os.path.exists(old_lilac_concepts_data_dir):
|
90 |
shutil.rmtree(old_lilac_concepts_data_dir)
|
91 |
|
|
|
102 |
continue
|
103 |
spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
|
104 |
concept.name)
|
105 |
+
persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
|
106 |
+
concept.name)
|
107 |
shutil.rmtree(persistent_output_dir, ignore_errors=True)
|
108 |
shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
|
109 |
shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
|