Spaces:
Runtime error
Runtime error
nsthorat-lilac
commited on
Commit
·
faf3244
1
Parent(s):
f9a34e6
Upload folder using huggingface_hub
Browse files- .env +0 -48
- .env.demo +0 -5
- Dockerfile +0 -41
- README.md +0 -10
- data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl +0 -0
- data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/non-english/gte-base.pkl +0 -0
- data/.cache/lilac/concept/lilac/non-english/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/profanity/gte-base.pkl +0 -3
- data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
- data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/question/cohere.pkl +0 -3
- data/.cache/lilac/concept/lilac/question/gte-base.pkl +0 -3
- data/.cache/lilac/concept/lilac/question/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/question/openai.pkl +0 -3
- data/.cache/lilac/concept/lilac/question/palm.pkl +0 -3
- data/.cache/lilac/concept/lilac/question/sbert.pkl +0 -0
- data/.cache/lilac/concept/lilac/source-code/gte-base.pkl +0 -0
- data/.cache/lilac/concept/lilac/source-code/gte-small.pkl +0 -0
- data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
- data/.cache/lilac/concept/local/aliens/gte-small.pkl +0 -0
- data/lilac.yml +0 -1024
- dist/README.md +0 -2
- dist/lilac-0.1.3-py3-none-any.whl +0 -3
- docker_start.py +0 -110
- docker_start.sh +1 -1
.env
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
# To overwrite these variables, create a .env.local file
|
2 |
-
|
3 |
-
# The path to the project directory. When used, this will be the global project directory for lilac.
|
4 |
-
# When not defined, define the project directory with `lilac start ./data`.
|
5 |
-
# LILAC_PROJECT_DIR=./data
|
6 |
-
|
7 |
-
# Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
|
8 |
-
DUCKDB_USE_VIEWS=0
|
9 |
-
|
10 |
-
# Set to true to enable read-only mode, disabling the ability to add datasets & compute dataset
|
11 |
-
# signals.
|
12 |
-
# LILAC_AUTH_ENABLED=true
|
13 |
-
|
14 |
-
# Variables that can be set in .env.local
|
15 |
-
#
|
16 |
-
# Get key from https://dashboard.cohere.ai/api-keys
|
17 |
-
# COHERE_API_KEY=
|
18 |
-
|
19 |
-
# GCS_REGION=
|
20 |
-
# GCS_ACCESS_KEY=
|
21 |
-
# GCS_SECRET_KEY=
|
22 |
-
|
23 |
-
# Get key from https://platform.openai.com/account/api-keys
|
24 |
-
# OPENAI_API_KEY=
|
25 |
-
# Get key from https://makersuite.google.com/app/apikey
|
26 |
-
# PALM_API_KEY=
|
27 |
-
|
28 |
-
# HuggingFace demos: machine that uploads to HuggingFace.
|
29 |
-
|
30 |
-
# For authenticating with HuggingFace to deploy to a Space.
|
31 |
-
# HF_USERNAME=
|
32 |
-
# The default repo to deploy to for a staging demo. Can be overridden by a command line flag.
|
33 |
-
# HF_STAGING_DEMO_REPO='HF_ORG/HF_REPO_NAME'
|
34 |
-
|
35 |
-
# For Google-login. This is generated from the Google Cloud Console for a web client.
|
36 |
-
# See: https://developers.google.com/identity/protocols/oauth2
|
37 |
-
GOOGLE_CLIENT_ID='279475920249-i8llm8vbos1vj5m1qocir8narb3r0enu.apps.googleusercontent.com'
|
38 |
-
# The client secret of the above client.
|
39 |
-
# GOOGLE_CLIENT_SECRET=
|
40 |
-
# A random string for oauth sessions.
|
41 |
-
# LILAC_OAUTH_SECRET_KEY=
|
42 |
-
|
43 |
-
# LangSmith source setup.
|
44 |
-
# LANGCHAIN_API_KEY=
|
45 |
-
# LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
|
46 |
-
|
47 |
-
# Firebase deployment token.
|
48 |
-
# FIREBASE_TOKEN=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.env.demo
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
LILAC_PROJECT_DIR='/data'
|
2 |
-
HF_HOME=/data/.huggingface
|
3 |
-
TRANSFORMERS_CACHE=/data/.cache
|
4 |
-
XDG_CACHE_HOME=/data/.cache
|
5 |
-
GOOGLE_ANALYTICS_ENABLED=true
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
# NOTE: When we upgrade to 3.11 we can use a slimmer docker image which comes with gcc.
|
2 |
-
FROM python:3.9-bullseye
|
3 |
-
|
4 |
-
# Allow statements and log messages to immediately appear in the Knative logs
|
5 |
-
ENV PYTHONUNBUFFERED True
|
6 |
-
|
7 |
-
# See: https://huggingface.co/docs/hub/spaces-sdks-docker#permissions
|
8 |
-
RUN useradd -m -u 1000 user
|
9 |
-
USER user
|
10 |
-
ENV HOME=/home/user \
|
11 |
-
PATH=/home/user/.local/bin:$PATH
|
12 |
-
|
13 |
-
# Set the working directory in the container.
|
14 |
-
WORKDIR $HOME/app
|
15 |
-
|
16 |
-
# Install the dependencies. This will look in ./dist for any wheels that match lilac. If they are
|
17 |
-
# not found, it will use the public pip package.
|
18 |
-
|
19 |
-
# Pip install lilac[all] and dependencies before trying to install the local image. This allows us
|
20 |
-
# to get cache hits on dependency installations when using a local wheel. When using the public pip
|
21 |
-
# package, the second call will be a no-op.
|
22 |
-
RUN python -m pip install lilac[all]
|
23 |
-
|
24 |
-
# Install from the local wheel inside ./dist. This will be a no-op if the wheel is not found.
|
25 |
-
COPY --chown=user /dist ./dist/
|
26 |
-
RUN python -m pip install --find-links=dist --upgrade lilac[all]
|
27 |
-
|
28 |
-
COPY --chown=user .env .
|
29 |
-
COPY --chown=user .env.demo .
|
30 |
-
# Copy the README so we can read the datasets from the HuggingFace config.
|
31 |
-
COPY --chown=user README.md .
|
32 |
-
# Copy the license just in case.
|
33 |
-
COPY --chown=user LICENSE .
|
34 |
-
|
35 |
-
COPY --chown=user docker_start.sh docker_start.py ./
|
36 |
-
|
37 |
-
# Make a local data directory for non-persistent storage demos.
|
38 |
-
RUN mkdir -p ./data
|
39 |
-
RUN chown -R user ./data
|
40 |
-
|
41 |
-
CMD ["bash", "docker_start.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Lilac
|
3 |
-
emoji: "\U0001F337"
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: purple
|
6 |
-
sdk: docker
|
7 |
-
app_port: 5432
|
8 |
-
datasets: []
|
9 |
-
|
10 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl
DELETED
Binary file (10.8 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl
DELETED
Binary file (21.7 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl
DELETED
Binary file (21.7 kB)
|
|
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl
DELETED
Binary file (21.8 kB)
|
|
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl
DELETED
Binary file (60.6 kB)
|
|
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl
DELETED
Binary file (202 kB)
|
|
data/.cache/lilac/concept/lilac/non-english/gte-base.pkl
DELETED
Binary file (645 kB)
|
|
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl
DELETED
Binary file (330 kB)
|
|
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl
DELETED
Binary file (180 kB)
|
|
data/.cache/lilac/concept/lilac/profanity/gte-base.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d4ac21aa8bd428688a64f75221338be8c676d208de61a9eba948300e8aa43af3
|
3 |
-
size 3301300
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:89495a1f968ddcb3f76ad46dbe7b6503a7b76afcdac37abbeb15c81d38c2f9d4
|
3 |
-
size 1672934
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl
DELETED
Binary file (71.3 kB)
|
|
data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl
DELETED
Binary file (69.6 kB)
|
|
data/.cache/lilac/concept/lilac/question/cohere.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9768c28d6ed72e4a1a5819fef4157fb1f30a50f1e165bfcdd87d0fa761146902
|
3 |
-
size 6254174
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/question/gte-base.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d2ae5bf4275be11be99cb2e90c03e35c9d2749efc3b34a2d1db1e9f0c99325d6
|
3 |
-
size 1194925
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/question/gte-small.pkl
DELETED
Binary file (611 kB)
|
|
data/.cache/lilac/concept/lilac/question/openai.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4a14c6df6924f45391654fe78dee8cf996de3abb8acf8ca0f81a65814572d493
|
3 |
-
size 2362432
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/question/palm.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cce86748bca57bd328f68b97ee80b4e3343ee4008d8951c5b061b6dd9335df7e
|
3 |
-
size 1194921
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/lilac/question/sbert.pkl
DELETED
Binary file (611 kB)
|
|
data/.cache/lilac/concept/lilac/source-code/gte-base.pkl
DELETED
Binary file (287 kB)
|
|
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl
DELETED
Binary file (147 kB)
|
|
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b6a074a3ac60cc9bfb82c4bf19d0e8c8d3837cb2b68b97efe8960c16675477f1
|
3 |
-
size 1886420
|
|
|
|
|
|
|
|
data/.cache/lilac/concept/local/aliens/gte-small.pkl
DELETED
Binary file (28.4 kB)
|
|
data/lilac.yml
DELETED
@@ -1,1024 +0,0 @@
|
|
1 |
-
# Lilac project config.
|
2 |
-
# See https://lilacml.com/api_reference/index.html#lilac.Config for details.
|
3 |
-
|
4 |
-
datasets:
|
5 |
-
- namespace: local
|
6 |
-
name: glue
|
7 |
-
source:
|
8 |
-
dataset_name: glue
|
9 |
-
config_name: ax
|
10 |
-
source_name: huggingface
|
11 |
-
embeddings:
|
12 |
-
- path: premise
|
13 |
-
embedding: gte-small
|
14 |
-
- path: premise
|
15 |
-
embedding: gte-base
|
16 |
-
- path: hypothesis
|
17 |
-
embedding: gte-small
|
18 |
-
signals:
|
19 |
-
- path: premise
|
20 |
-
signal:
|
21 |
-
signal_name: pii
|
22 |
-
- path: hypothesis
|
23 |
-
signal:
|
24 |
-
signal_name: pii
|
25 |
-
- path: premise
|
26 |
-
signal:
|
27 |
-
signal_name: text_statistics
|
28 |
-
settings:
|
29 |
-
ui:
|
30 |
-
media_paths:
|
31 |
-
- premise
|
32 |
-
markdown_paths: []
|
33 |
-
- namespace: local
|
34 |
-
name: glue_ax
|
35 |
-
source:
|
36 |
-
dataset_name: glue
|
37 |
-
config_name: ax
|
38 |
-
source_name: huggingface
|
39 |
-
embeddings:
|
40 |
-
- path: hypothesis
|
41 |
-
embedding: gte-small
|
42 |
-
signals:
|
43 |
-
- path: premise
|
44 |
-
signal:
|
45 |
-
signal_name: text_statistics
|
46 |
-
- path: premise
|
47 |
-
signal:
|
48 |
-
signal_name: pii
|
49 |
-
- path: premise
|
50 |
-
signal:
|
51 |
-
signal_name: near_dup
|
52 |
-
- path: hypothesis
|
53 |
-
signal:
|
54 |
-
embedding: gte-small
|
55 |
-
namespace: ''
|
56 |
-
concept_name: ''
|
57 |
-
signal_name: concept_score
|
58 |
-
- path: hypothesis
|
59 |
-
signal:
|
60 |
-
embedding: gte-small
|
61 |
-
namespace: lilac
|
62 |
-
concept_name: positive-sentiment
|
63 |
-
signal_name: concept_score
|
64 |
-
- path: hypothesis
|
65 |
-
signal:
|
66 |
-
embedding: gte-small
|
67 |
-
namespace: lilac
|
68 |
-
concept_name: non-english
|
69 |
-
signal_name: concept_score
|
70 |
-
settings:
|
71 |
-
ui:
|
72 |
-
media_paths:
|
73 |
-
- hypothesis
|
74 |
-
markdown_paths: []
|
75 |
-
- namespace: local
|
76 |
-
name: imdb3
|
77 |
-
source:
|
78 |
-
dataset_name: imdb
|
79 |
-
source_name: huggingface
|
80 |
-
settings:
|
81 |
-
ui:
|
82 |
-
media_paths:
|
83 |
-
- text
|
84 |
-
markdown_paths: []
|
85 |
-
- namespace: local
|
86 |
-
name: imdb
|
87 |
-
source:
|
88 |
-
dataset_name: imdb
|
89 |
-
source_name: huggingface
|
90 |
-
embeddings:
|
91 |
-
- path: text
|
92 |
-
embedding: gte-small
|
93 |
-
signals:
|
94 |
-
- path: text
|
95 |
-
signal:
|
96 |
-
signal_name: pii
|
97 |
-
- path: text
|
98 |
-
signal:
|
99 |
-
signal_name: text_statistics
|
100 |
-
settings:
|
101 |
-
ui:
|
102 |
-
media_paths:
|
103 |
-
- text
|
104 |
-
markdown_paths: []
|
105 |
-
- namespace: local
|
106 |
-
name: imdb2
|
107 |
-
source:
|
108 |
-
dataset_name: imdb
|
109 |
-
source_name: huggingface
|
110 |
-
settings:
|
111 |
-
ui:
|
112 |
-
media_paths:
|
113 |
-
- text
|
114 |
-
markdown_paths: []
|
115 |
-
- namespace: lilac
|
116 |
-
name: OpenOrca-100k
|
117 |
-
source:
|
118 |
-
dataset_name: Open-Orca/OpenOrca
|
119 |
-
sample_size: 100000
|
120 |
-
source_name: huggingface
|
121 |
-
embeddings:
|
122 |
-
- path: question
|
123 |
-
embedding: gte-small
|
124 |
-
- path: response
|
125 |
-
embedding: gte-small
|
126 |
-
signals:
|
127 |
-
- path: question
|
128 |
-
signal:
|
129 |
-
signal_name: near_dup
|
130 |
-
- path: question
|
131 |
-
signal:
|
132 |
-
signal_name: pii
|
133 |
-
- path: question
|
134 |
-
signal:
|
135 |
-
signal_name: lang_detection
|
136 |
-
- path: question
|
137 |
-
signal:
|
138 |
-
embedding: gte-small
|
139 |
-
namespace: lilac
|
140 |
-
concept_name: positive-sentiment
|
141 |
-
signal_name: concept_score
|
142 |
-
- path: question
|
143 |
-
signal:
|
144 |
-
embedding: gte-small
|
145 |
-
namespace: lilac
|
146 |
-
concept_name: non-english
|
147 |
-
signal_name: concept_score
|
148 |
-
- path: question
|
149 |
-
signal:
|
150 |
-
embedding: gte-small
|
151 |
-
namespace: lilac
|
152 |
-
concept_name: toxicity
|
153 |
-
signal_name: concept_score
|
154 |
-
- path: question
|
155 |
-
signal:
|
156 |
-
embedding: gte-small
|
157 |
-
namespace: lilac
|
158 |
-
concept_name: question
|
159 |
-
signal_name: concept_score
|
160 |
-
- path: question
|
161 |
-
signal:
|
162 |
-
embedding: gte-small
|
163 |
-
namespace: lilac
|
164 |
-
concept_name: legal-termination
|
165 |
-
signal_name: concept_score
|
166 |
-
- path: question
|
167 |
-
signal:
|
168 |
-
embedding: gte-small
|
169 |
-
namespace: lilac
|
170 |
-
concept_name: source-code
|
171 |
-
signal_name: concept_score
|
172 |
-
- path: question
|
173 |
-
signal:
|
174 |
-
embedding: gte-small
|
175 |
-
namespace: lilac
|
176 |
-
concept_name: negative-sentiment
|
177 |
-
signal_name: concept_score
|
178 |
-
- path: question
|
179 |
-
signal:
|
180 |
-
embedding: gte-small
|
181 |
-
namespace: lilac
|
182 |
-
concept_name: profanity
|
183 |
-
signal_name: concept_score
|
184 |
-
- path: question
|
185 |
-
signal:
|
186 |
-
signal_name: text_statistics
|
187 |
-
- path: response
|
188 |
-
signal:
|
189 |
-
signal_name: near_dup
|
190 |
-
- path: response
|
191 |
-
signal:
|
192 |
-
signal_name: pii
|
193 |
-
- path: response
|
194 |
-
signal:
|
195 |
-
signal_name: lang_detection
|
196 |
-
- path: response
|
197 |
-
signal:
|
198 |
-
embedding: gte-small
|
199 |
-
namespace: lilac
|
200 |
-
concept_name: positive-sentiment
|
201 |
-
signal_name: concept_score
|
202 |
-
- path: response
|
203 |
-
signal:
|
204 |
-
embedding: gte-small
|
205 |
-
namespace: lilac
|
206 |
-
concept_name: non-english
|
207 |
-
signal_name: concept_score
|
208 |
-
- path: response
|
209 |
-
signal:
|
210 |
-
embedding: gte-small
|
211 |
-
namespace: lilac
|
212 |
-
concept_name: toxicity
|
213 |
-
signal_name: concept_score
|
214 |
-
- path: response
|
215 |
-
signal:
|
216 |
-
embedding: gte-small
|
217 |
-
namespace: lilac
|
218 |
-
concept_name: question
|
219 |
-
signal_name: concept_score
|
220 |
-
- path: response
|
221 |
-
signal:
|
222 |
-
embedding: gte-small
|
223 |
-
namespace: lilac
|
224 |
-
concept_name: legal-termination
|
225 |
-
signal_name: concept_score
|
226 |
-
- path: response
|
227 |
-
signal:
|
228 |
-
embedding: gte-small
|
229 |
-
namespace: lilac
|
230 |
-
concept_name: source-code
|
231 |
-
signal_name: concept_score
|
232 |
-
- path: response
|
233 |
-
signal:
|
234 |
-
embedding: gte-small
|
235 |
-
namespace: lilac
|
236 |
-
concept_name: negative-sentiment
|
237 |
-
signal_name: concept_score
|
238 |
-
- path: response
|
239 |
-
signal:
|
240 |
-
embedding: gte-small
|
241 |
-
namespace: lilac
|
242 |
-
concept_name: profanity
|
243 |
-
signal_name: concept_score
|
244 |
-
- path: response
|
245 |
-
signal:
|
246 |
-
signal_name: text_statistics
|
247 |
-
- path: system_prompt
|
248 |
-
signal:
|
249 |
-
signal_name: pii
|
250 |
-
settings:
|
251 |
-
ui:
|
252 |
-
media_paths:
|
253 |
-
- question
|
254 |
-
- response
|
255 |
-
markdown_paths: []
|
256 |
-
- namespace: local
|
257 |
-
name: the_movies_dataset
|
258 |
-
source:
|
259 |
-
filepaths:
|
260 |
-
- gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
|
261 |
-
names: []
|
262 |
-
source_name: csv
|
263 |
-
settings:
|
264 |
-
ui:
|
265 |
-
media_paths:
|
266 |
-
- overview
|
267 |
-
markdown_paths: []
|
268 |
-
- namespace: local
|
269 |
-
name: glue_ax_parquet
|
270 |
-
source:
|
271 |
-
filepaths:
|
272 |
-
- gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet
|
273 |
-
source_name: parquet
|
274 |
-
settings:
|
275 |
-
ui:
|
276 |
-
media_paths:
|
277 |
-
- premise
|
278 |
-
markdown_paths: []
|
279 |
-
- namespace: lilac
|
280 |
-
name: mmlu_professional_law
|
281 |
-
source:
|
282 |
-
dataset_name: cais/mmlu
|
283 |
-
config_name: professional_law
|
284 |
-
source_name: huggingface
|
285 |
-
embeddings:
|
286 |
-
- path: question
|
287 |
-
embedding: gte-small
|
288 |
-
- path:
|
289 |
-
- choices
|
290 |
-
- '*'
|
291 |
-
embedding: gte-small
|
292 |
-
signals:
|
293 |
-
- path: question
|
294 |
-
signal:
|
295 |
-
signal_name: near_dup
|
296 |
-
- path: question
|
297 |
-
signal:
|
298 |
-
signal_name: pii
|
299 |
-
- path: question
|
300 |
-
signal:
|
301 |
-
signal_name: lang_detection
|
302 |
-
- path: question
|
303 |
-
signal:
|
304 |
-
embedding: gte-small
|
305 |
-
namespace: lilac
|
306 |
-
concept_name: positive-sentiment
|
307 |
-
signal_name: concept_score
|
308 |
-
- path: question
|
309 |
-
signal:
|
310 |
-
embedding: gte-small
|
311 |
-
namespace: lilac
|
312 |
-
concept_name: non-english
|
313 |
-
signal_name: concept_score
|
314 |
-
- path: question
|
315 |
-
signal:
|
316 |
-
embedding: gte-small
|
317 |
-
namespace: lilac
|
318 |
-
concept_name: toxicity
|
319 |
-
signal_name: concept_score
|
320 |
-
- path: question
|
321 |
-
signal:
|
322 |
-
embedding: gte-small
|
323 |
-
namespace: lilac
|
324 |
-
concept_name: question
|
325 |
-
signal_name: concept_score
|
326 |
-
- path: question
|
327 |
-
signal:
|
328 |
-
embedding: gte-small
|
329 |
-
namespace: lilac
|
330 |
-
concept_name: legal-termination
|
331 |
-
signal_name: concept_score
|
332 |
-
- path: question
|
333 |
-
signal:
|
334 |
-
embedding: gte-small
|
335 |
-
namespace: lilac
|
336 |
-
concept_name: source-code
|
337 |
-
signal_name: concept_score
|
338 |
-
- path: question
|
339 |
-
signal:
|
340 |
-
embedding: gte-small
|
341 |
-
namespace: lilac
|
342 |
-
concept_name: negative-sentiment
|
343 |
-
signal_name: concept_score
|
344 |
-
- path: question
|
345 |
-
signal:
|
346 |
-
embedding: gte-small
|
347 |
-
namespace: lilac
|
348 |
-
concept_name: profanity
|
349 |
-
signal_name: concept_score
|
350 |
-
- path: question
|
351 |
-
signal:
|
352 |
-
signal_name: text_statistics
|
353 |
-
- path:
|
354 |
-
- choices
|
355 |
-
- '*'
|
356 |
-
signal:
|
357 |
-
signal_name: near_dup
|
358 |
-
- path:
|
359 |
-
- choices
|
360 |
-
- '*'
|
361 |
-
signal:
|
362 |
-
signal_name: pii
|
363 |
-
- path:
|
364 |
-
- choices
|
365 |
-
- '*'
|
366 |
-
signal:
|
367 |
-
signal_name: lang_detection
|
368 |
-
- path:
|
369 |
-
- choices
|
370 |
-
- '*'
|
371 |
-
signal:
|
372 |
-
embedding: gte-small
|
373 |
-
namespace: lilac
|
374 |
-
concept_name: positive-sentiment
|
375 |
-
signal_name: concept_score
|
376 |
-
- path:
|
377 |
-
- choices
|
378 |
-
- '*'
|
379 |
-
signal:
|
380 |
-
embedding: gte-small
|
381 |
-
namespace: lilac
|
382 |
-
concept_name: non-english
|
383 |
-
signal_name: concept_score
|
384 |
-
- path:
|
385 |
-
- choices
|
386 |
-
- '*'
|
387 |
-
signal:
|
388 |
-
embedding: gte-small
|
389 |
-
namespace: lilac
|
390 |
-
concept_name: toxicity
|
391 |
-
signal_name: concept_score
|
392 |
-
- path:
|
393 |
-
- choices
|
394 |
-
- '*'
|
395 |
-
signal:
|
396 |
-
embedding: gte-small
|
397 |
-
namespace: lilac
|
398 |
-
concept_name: question
|
399 |
-
signal_name: concept_score
|
400 |
-
- path:
|
401 |
-
- choices
|
402 |
-
- '*'
|
403 |
-
signal:
|
404 |
-
embedding: gte-small
|
405 |
-
namespace: lilac
|
406 |
-
concept_name: legal-termination
|
407 |
-
signal_name: concept_score
|
408 |
-
- path:
|
409 |
-
- choices
|
410 |
-
- '*'
|
411 |
-
signal:
|
412 |
-
embedding: gte-small
|
413 |
-
namespace: lilac
|
414 |
-
concept_name: source-code
|
415 |
-
signal_name: concept_score
|
416 |
-
- path:
|
417 |
-
- choices
|
418 |
-
- '*'
|
419 |
-
signal:
|
420 |
-
embedding: gte-small
|
421 |
-
namespace: lilac
|
422 |
-
concept_name: negative-sentiment
|
423 |
-
signal_name: concept_score
|
424 |
-
- path:
|
425 |
-
- choices
|
426 |
-
- '*'
|
427 |
-
signal:
|
428 |
-
embedding: gte-small
|
429 |
-
namespace: lilac
|
430 |
-
concept_name: profanity
|
431 |
-
signal_name: concept_score
|
432 |
-
- path:
|
433 |
-
- choices
|
434 |
-
- '*'
|
435 |
-
signal:
|
436 |
-
signal_name: text_statistics
|
437 |
-
settings:
|
438 |
-
ui:
|
439 |
-
media_paths:
|
440 |
-
- question
|
441 |
-
- - choices
|
442 |
-
- '*'
|
443 |
-
markdown_paths: []
|
444 |
-
preferred_embedding: gte-small
|
445 |
-
- namespace: local
|
446 |
-
name: deepset-prompt-inj
|
447 |
-
source:
|
448 |
-
dataset_name: deepset/prompt-injections
|
449 |
-
source_name: huggingface
|
450 |
-
embeddings:
|
451 |
-
- path: text
|
452 |
-
embedding: gte-small
|
453 |
-
settings:
|
454 |
-
ui:
|
455 |
-
media_paths:
|
456 |
-
- text
|
457 |
-
markdown_paths: []
|
458 |
-
- namespace: local
|
459 |
-
name: jasper-prompt-inj
|
460 |
-
source:
|
461 |
-
dataset_name: JasperLS/prompt-injections
|
462 |
-
source_name: huggingface
|
463 |
-
embeddings:
|
464 |
-
- path: text
|
465 |
-
embedding: gte-small
|
466 |
-
settings:
|
467 |
-
ui:
|
468 |
-
media_paths:
|
469 |
-
- text
|
470 |
-
markdown_paths: []
|
471 |
-
- namespace: local
|
472 |
-
name: mosaic-chat-v2
|
473 |
-
source:
|
474 |
-
dataset_name: sam-mosaic/chat-v2
|
475 |
-
source_name: huggingface
|
476 |
-
embeddings:
|
477 |
-
- path: prompt
|
478 |
-
embedding: gte-small
|
479 |
-
- path: response
|
480 |
-
embedding: gte-small
|
481 |
-
signals:
|
482 |
-
- path: prompt
|
483 |
-
signal:
|
484 |
-
signal_name: near_dup
|
485 |
-
- path: prompt
|
486 |
-
signal:
|
487 |
-
signal_name: pii
|
488 |
-
- path: prompt
|
489 |
-
signal:
|
490 |
-
signal_name: lang_detection
|
491 |
-
- path: prompt
|
492 |
-
signal:
|
493 |
-
embedding: gte-small
|
494 |
-
namespace: lilac
|
495 |
-
concept_name: non-english
|
496 |
-
signal_name: concept_score
|
497 |
-
- path: prompt
|
498 |
-
signal:
|
499 |
-
embedding: gte-small
|
500 |
-
namespace: lilac
|
501 |
-
concept_name: toxicity
|
502 |
-
signal_name: concept_score
|
503 |
-
- path: prompt
|
504 |
-
signal:
|
505 |
-
embedding: gte-small
|
506 |
-
namespace: lilac
|
507 |
-
concept_name: source-code
|
508 |
-
signal_name: concept_score
|
509 |
-
- path: prompt
|
510 |
-
signal:
|
511 |
-
embedding: gte-small
|
512 |
-
namespace: lilac
|
513 |
-
concept_name: negative-sentiment
|
514 |
-
signal_name: concept_score
|
515 |
-
- path: prompt
|
516 |
-
signal:
|
517 |
-
embedding: gte-small
|
518 |
-
namespace: lilac
|
519 |
-
concept_name: profanity
|
520 |
-
signal_name: concept_score
|
521 |
-
- path: prompt
|
522 |
-
signal:
|
523 |
-
signal_name: text_statistics
|
524 |
-
- path: response
|
525 |
-
signal:
|
526 |
-
signal_name: near_dup
|
527 |
-
- path: response
|
528 |
-
signal:
|
529 |
-
signal_name: pii
|
530 |
-
- path: response
|
531 |
-
signal:
|
532 |
-
signal_name: lang_detection
|
533 |
-
- path: response
|
534 |
-
signal:
|
535 |
-
embedding: gte-small
|
536 |
-
namespace: lilac
|
537 |
-
concept_name: non-english
|
538 |
-
signal_name: concept_score
|
539 |
-
- path: response
|
540 |
-
signal:
|
541 |
-
embedding: gte-small
|
542 |
-
namespace: lilac
|
543 |
-
concept_name: toxicity
|
544 |
-
signal_name: concept_score
|
545 |
-
- path: response
|
546 |
-
signal:
|
547 |
-
embedding: gte-small
|
548 |
-
namespace: lilac
|
549 |
-
concept_name: source-code
|
550 |
-
signal_name: concept_score
|
551 |
-
- path: response
|
552 |
-
signal:
|
553 |
-
embedding: gte-small
|
554 |
-
namespace: lilac
|
555 |
-
concept_name: negative-sentiment
|
556 |
-
signal_name: concept_score
|
557 |
-
- path: response
|
558 |
-
signal:
|
559 |
-
embedding: gte-small
|
560 |
-
namespace: lilac
|
561 |
-
concept_name: profanity
|
562 |
-
signal_name: concept_score
|
563 |
-
- path: response
|
564 |
-
signal:
|
565 |
-
signal_name: text_statistics
|
566 |
-
settings:
|
567 |
-
ui:
|
568 |
-
media_paths:
|
569 |
-
- prompt
|
570 |
-
- response
|
571 |
-
markdown_paths: []
|
572 |
-
preferred_embedding: gte-small
|
573 |
-
- namespace: local
|
574 |
-
name: databricks-dolly-15k-curated-en
|
575 |
-
source:
|
576 |
-
dataset_name: argilla/databricks-dolly-15k-curated-en
|
577 |
-
source_name: huggingface
|
578 |
-
embeddings:
|
579 |
-
- path: original-context
|
580 |
-
embedding: gte-small
|
581 |
-
- path:
|
582 |
-
- new-context
|
583 |
-
- value
|
584 |
-
- '*'
|
585 |
-
embedding: gte-small
|
586 |
-
- path: original-instruction
|
587 |
-
embedding: gte-small
|
588 |
-
signals:
|
589 |
-
- path: original-instruction
|
590 |
-
signal:
|
591 |
-
signal_name: near_dup
|
592 |
-
- path: original-instruction
|
593 |
-
signal:
|
594 |
-
signal_name: pii
|
595 |
-
- path: original-instruction
|
596 |
-
signal:
|
597 |
-
signal_name: lang_detection
|
598 |
-
- path: original-instruction
|
599 |
-
signal:
|
600 |
-
signal_name: text_statistics
|
601 |
-
- path: original-context
|
602 |
-
signal:
|
603 |
-
signal_name: near_dup
|
604 |
-
- path: original-context
|
605 |
-
signal:
|
606 |
-
signal_name: pii
|
607 |
-
- path: original-context
|
608 |
-
signal:
|
609 |
-
signal_name: lang_detection
|
610 |
-
- path: original-context
|
611 |
-
signal:
|
612 |
-
embedding: gte-small
|
613 |
-
namespace: lilac
|
614 |
-
concept_name: positive-sentiment
|
615 |
-
signal_name: concept_score
|
616 |
-
- path: original-context
|
617 |
-
signal:
|
618 |
-
embedding: gte-small
|
619 |
-
namespace: lilac
|
620 |
-
concept_name: non-english
|
621 |
-
signal_name: concept_score
|
622 |
-
- path: original-context
|
623 |
-
signal:
|
624 |
-
embedding: gte-small
|
625 |
-
namespace: lilac
|
626 |
-
concept_name: toxicity
|
627 |
-
signal_name: concept_score
|
628 |
-
- path: original-context
|
629 |
-
signal:
|
630 |
-
embedding: gte-small
|
631 |
-
namespace: lilac
|
632 |
-
concept_name: question
|
633 |
-
signal_name: concept_score
|
634 |
-
- path: original-context
|
635 |
-
signal:
|
636 |
-
embedding: gte-small
|
637 |
-
namespace: lilac
|
638 |
-
concept_name: legal-termination
|
639 |
-
signal_name: concept_score
|
640 |
-
- path: original-context
|
641 |
-
signal:
|
642 |
-
embedding: gte-small
|
643 |
-
namespace: lilac
|
644 |
-
concept_name: source-code
|
645 |
-
signal_name: concept_score
|
646 |
-
- path: original-context
|
647 |
-
signal:
|
648 |
-
embedding: gte-small
|
649 |
-
namespace: lilac
|
650 |
-
concept_name: negative-sentiment
|
651 |
-
signal_name: concept_score
|
652 |
-
- path: original-context
|
653 |
-
signal:
|
654 |
-
embedding: gte-small
|
655 |
-
namespace: lilac
|
656 |
-
concept_name: profanity
|
657 |
-
signal_name: concept_score
|
658 |
-
- path: original-context
|
659 |
-
signal:
|
660 |
-
signal_name: text_statistics
|
661 |
-
- path: original-response
|
662 |
-
signal:
|
663 |
-
signal_name: near_dup
|
664 |
-
- path: original-response
|
665 |
-
signal:
|
666 |
-
signal_name: pii
|
667 |
-
- path: original-response
|
668 |
-
signal:
|
669 |
-
signal_name: lang_detection
|
670 |
-
- path: original-response
|
671 |
-
signal:
|
672 |
-
signal_name: text_statistics
|
673 |
-
- path:
|
674 |
-
- new-instruction
|
675 |
-
- value
|
676 |
-
- '*'
|
677 |
-
signal:
|
678 |
-
signal_name: near_dup
|
679 |
-
- path:
|
680 |
-
- new-instruction
|
681 |
-
- value
|
682 |
-
- '*'
|
683 |
-
signal:
|
684 |
-
signal_name: pii
|
685 |
-
- path:
|
686 |
-
- new-instruction
|
687 |
-
- value
|
688 |
-
- '*'
|
689 |
-
signal:
|
690 |
-
signal_name: lang_detection
|
691 |
-
- path:
|
692 |
-
- new-instruction
|
693 |
-
- value
|
694 |
-
- '*'
|
695 |
-
signal:
|
696 |
-
signal_name: text_statistics
|
697 |
-
- path:
|
698 |
-
- new-context
|
699 |
-
- value
|
700 |
-
- '*'
|
701 |
-
signal:
|
702 |
-
signal_name: near_dup
|
703 |
-
- path:
|
704 |
-
- new-context
|
705 |
-
- value
|
706 |
-
- '*'
|
707 |
-
signal:
|
708 |
-
signal_name: pii
|
709 |
-
- path:
|
710 |
-
- new-context
|
711 |
-
- value
|
712 |
-
- '*'
|
713 |
-
signal:
|
714 |
-
signal_name: lang_detection
|
715 |
-
- path:
|
716 |
-
- new-context
|
717 |
-
- value
|
718 |
-
- '*'
|
719 |
-
signal:
|
720 |
-
embedding: gte-small
|
721 |
-
namespace: lilac
|
722 |
-
concept_name: positive-sentiment
|
723 |
-
signal_name: concept_score
|
724 |
-
- path:
|
725 |
-
- new-context
|
726 |
-
- value
|
727 |
-
- '*'
|
728 |
-
signal:
|
729 |
-
embedding: gte-small
|
730 |
-
namespace: lilac
|
731 |
-
concept_name: non-english
|
732 |
-
signal_name: concept_score
|
733 |
-
- path:
|
734 |
-
- new-context
|
735 |
-
- value
|
736 |
-
- '*'
|
737 |
-
signal:
|
738 |
-
embedding: gte-small
|
739 |
-
namespace: lilac
|
740 |
-
concept_name: toxicity
|
741 |
-
signal_name: concept_score
|
742 |
-
- path:
|
743 |
-
- new-context
|
744 |
-
- value
|
745 |
-
- '*'
|
746 |
-
signal:
|
747 |
-
embedding: gte-small
|
748 |
-
namespace: lilac
|
749 |
-
concept_name: question
|
750 |
-
signal_name: concept_score
|
751 |
-
- path:
|
752 |
-
- new-context
|
753 |
-
- value
|
754 |
-
- '*'
|
755 |
-
signal:
|
756 |
-
embedding: gte-small
|
757 |
-
namespace: lilac
|
758 |
-
concept_name: legal-termination
|
759 |
-
signal_name: concept_score
|
760 |
-
- path:
|
761 |
-
- new-context
|
762 |
-
- value
|
763 |
-
- '*'
|
764 |
-
signal:
|
765 |
-
embedding: gte-small
|
766 |
-
namespace: lilac
|
767 |
-
concept_name: source-code
|
768 |
-
signal_name: concept_score
|
769 |
-
- path:
|
770 |
-
- new-context
|
771 |
-
- value
|
772 |
-
- '*'
|
773 |
-
signal:
|
774 |
-
embedding: gte-small
|
775 |
-
namespace: lilac
|
776 |
-
concept_name: negative-sentiment
|
777 |
-
signal_name: concept_score
|
778 |
-
- path:
|
779 |
-
- new-context
|
780 |
-
- value
|
781 |
-
- '*'
|
782 |
-
signal:
|
783 |
-
embedding: gte-small
|
784 |
-
namespace: lilac
|
785 |
-
concept_name: profanity
|
786 |
-
signal_name: concept_score
|
787 |
-
- path:
|
788 |
-
- new-context
|
789 |
-
- value
|
790 |
-
- '*'
|
791 |
-
signal:
|
792 |
-
signal_name: text_statistics
|
793 |
-
- path:
|
794 |
-
- new-response
|
795 |
-
- value
|
796 |
-
- '*'
|
797 |
-
signal:
|
798 |
-
signal_name: near_dup
|
799 |
-
- path:
|
800 |
-
- new-response
|
801 |
-
- value
|
802 |
-
- '*'
|
803 |
-
signal:
|
804 |
-
signal_name: pii
|
805 |
-
- path:
|
806 |
-
- new-response
|
807 |
-
- value
|
808 |
-
- '*'
|
809 |
-
signal:
|
810 |
-
signal_name: lang_detection
|
811 |
-
- path:
|
812 |
-
- new-response
|
813 |
-
- value
|
814 |
-
- '*'
|
815 |
-
signal:
|
816 |
-
signal_name: text_statistics
|
817 |
-
- path: original-instruction
|
818 |
-
signal:
|
819 |
-
signal_name: spacy_ner
|
820 |
-
settings:
|
821 |
-
ui:
|
822 |
-
media_paths:
|
823 |
-
- original-instruction
|
824 |
-
- original-context
|
825 |
-
- original-response
|
826 |
-
- - new-instruction
|
827 |
-
- value
|
828 |
-
- '*'
|
829 |
-
- - new-context
|
830 |
-
- value
|
831 |
-
- '*'
|
832 |
-
- - new-response
|
833 |
-
- value
|
834 |
-
- '*'
|
835 |
-
markdown_paths: []
|
836 |
-
preferred_embedding: gte-small
|
837 |
-
- namespace: local
|
838 |
-
name: open-asssistant-conversations
|
839 |
-
source:
|
840 |
-
dataset_name: OpenAssistant/oasst1
|
841 |
-
source_name: huggingface
|
842 |
-
embeddings:
|
843 |
-
- path: text
|
844 |
-
embedding: gte-small
|
845 |
-
signals:
|
846 |
-
- path: text
|
847 |
-
signal:
|
848 |
-
signal_name: near_dup
|
849 |
-
- path: text
|
850 |
-
signal:
|
851 |
-
signal_name: pii
|
852 |
-
- path: text
|
853 |
-
signal:
|
854 |
-
signal_name: lang_detection
|
855 |
-
- path: text
|
856 |
-
signal:
|
857 |
-
embedding: gte-small
|
858 |
-
namespace: lilac
|
859 |
-
concept_name: positive-sentiment
|
860 |
-
signal_name: concept_score
|
861 |
-
- path: text
|
862 |
-
signal:
|
863 |
-
embedding: gte-small
|
864 |
-
namespace: lilac
|
865 |
-
concept_name: non-english
|
866 |
-
signal_name: concept_score
|
867 |
-
- path: text
|
868 |
-
signal:
|
869 |
-
embedding: gte-small
|
870 |
-
namespace: lilac
|
871 |
-
concept_name: toxicity
|
872 |
-
signal_name: concept_score
|
873 |
-
- path: text
|
874 |
-
signal:
|
875 |
-
embedding: gte-small
|
876 |
-
namespace: lilac
|
877 |
-
concept_name: question
|
878 |
-
signal_name: concept_score
|
879 |
-
- path: text
|
880 |
-
signal:
|
881 |
-
embedding: gte-small
|
882 |
-
namespace: lilac
|
883 |
-
concept_name: legal-termination
|
884 |
-
signal_name: concept_score
|
885 |
-
- path: text
|
886 |
-
signal:
|
887 |
-
embedding: gte-small
|
888 |
-
namespace: lilac
|
889 |
-
concept_name: source-code
|
890 |
-
signal_name: concept_score
|
891 |
-
- path: text
|
892 |
-
signal:
|
893 |
-
embedding: gte-small
|
894 |
-
namespace: lilac
|
895 |
-
concept_name: negative-sentiment
|
896 |
-
signal_name: concept_score
|
897 |
-
- path: text
|
898 |
-
signal:
|
899 |
-
embedding: gte-small
|
900 |
-
namespace: lilac
|
901 |
-
concept_name: negative-sentiment
|
902 |
-
signal_name: concept_score
|
903 |
-
- path: text
|
904 |
-
signal:
|
905 |
-
embedding: gte-small
|
906 |
-
namespace: lilac
|
907 |
-
concept_name: profanity
|
908 |
-
signal_name: concept_score
|
909 |
-
- path: text
|
910 |
-
signal:
|
911 |
-
signal_name: text_statistics
|
912 |
-
settings:
|
913 |
-
ui:
|
914 |
-
media_paths:
|
915 |
-
- text
|
916 |
-
markdown_paths: []
|
917 |
-
preferred_embedding: gte-small
|
918 |
-
- namespace: local
|
919 |
-
name: enron-emails
|
920 |
-
source:
|
921 |
-
dataset_name: EleutherAI/pile
|
922 |
-
config_name: enron_emails
|
923 |
-
sample_size: 100000
|
924 |
-
source_name: huggingface
|
925 |
-
embeddings:
|
926 |
-
- path: text
|
927 |
-
embedding: gte-small
|
928 |
-
signals:
|
929 |
-
- path: text
|
930 |
-
signal:
|
931 |
-
signal_name: near_dup
|
932 |
-
- path: text
|
933 |
-
signal:
|
934 |
-
signal_name: pii
|
935 |
-
- path: text
|
936 |
-
signal:
|
937 |
-
signal_name: lang_detection
|
938 |
-
- path: text
|
939 |
-
signal:
|
940 |
-
embedding: gte-small
|
941 |
-
namespace: lilac
|
942 |
-
concept_name: positive-sentiment
|
943 |
-
signal_name: concept_score
|
944 |
-
- path: text
|
945 |
-
signal:
|
946 |
-
embedding: gte-small
|
947 |
-
namespace: lilac
|
948 |
-
concept_name: non-english
|
949 |
-
signal_name: concept_score
|
950 |
-
- path: text
|
951 |
-
signal:
|
952 |
-
embedding: gte-small
|
953 |
-
namespace: lilac
|
954 |
-
concept_name: toxicity
|
955 |
-
signal_name: concept_score
|
956 |
-
- path: text
|
957 |
-
signal:
|
958 |
-
embedding: gte-small
|
959 |
-
namespace: lilac
|
960 |
-
concept_name: question
|
961 |
-
signal_name: concept_score
|
962 |
-
- path: text
|
963 |
-
signal:
|
964 |
-
embedding: gte-small
|
965 |
-
namespace: lilac
|
966 |
-
concept_name: legal-termination
|
967 |
-
signal_name: concept_score
|
968 |
-
- path: text
|
969 |
-
signal:
|
970 |
-
embedding: gte-small
|
971 |
-
namespace: lilac
|
972 |
-
concept_name: source-code
|
973 |
-
signal_name: concept_score
|
974 |
-
- path: text
|
975 |
-
signal:
|
976 |
-
embedding: gte-small
|
977 |
-
namespace: lilac
|
978 |
-
concept_name: negative-sentiment
|
979 |
-
signal_name: concept_score
|
980 |
-
- path: text
|
981 |
-
signal:
|
982 |
-
embedding: gte-small
|
983 |
-
namespace: lilac
|
984 |
-
concept_name: profanity
|
985 |
-
signal_name: concept_score
|
986 |
-
- path: text
|
987 |
-
signal:
|
988 |
-
signal_name: text_statistics
|
989 |
-
settings:
|
990 |
-
ui:
|
991 |
-
media_paths:
|
992 |
-
- text
|
993 |
-
markdown_paths: []
|
994 |
-
preferred_embedding: gte-small
|
995 |
-
- namespace: local
|
996 |
-
name: OpenOrca
|
997 |
-
source:
|
998 |
-
dataset_name: Open-Orca/OpenOrca
|
999 |
-
source_name: huggingface
|
1000 |
-
embeddings:
|
1001 |
-
- path: question
|
1002 |
-
embedding: gte-small
|
1003 |
-
- path: response
|
1004 |
-
embedding: gte-small
|
1005 |
-
settings:
|
1006 |
-
ui:
|
1007 |
-
media_paths:
|
1008 |
-
- question
|
1009 |
-
- response
|
1010 |
-
markdown_paths: []
|
1011 |
-
- namespace: local
|
1012 |
-
name: langsmith-finetuning-rag
|
1013 |
-
source:
|
1014 |
-
filepaths:
|
1015 |
-
- https://storage.googleapis.com/lilac-data/datasets/langsmith-finetuning-rag/rag.jsonl
|
1016 |
-
source_name: json
|
1017 |
-
settings:
|
1018 |
-
ui:
|
1019 |
-
media_paths:
|
1020 |
-
- - inputs
|
1021 |
-
- question
|
1022 |
-
- - outputs
|
1023 |
-
- output
|
1024 |
-
markdown_paths: []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dist/README.md
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
This directory is used for locally built whl files.
|
2 |
-
We write a README.md to ensure an empty folder is uploaded when there is no whl.
|
|
|
|
|
|
dist/lilac-0.1.3-py3-none-any.whl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8263c29c0b61f57530cb419f858282c0c3dcc8b037f6634cb084edbd4ba0ae63
|
3 |
-
size 1170417
|
|
|
|
|
|
|
|
docker_start.py
DELETED
@@ -1,110 +0,0 @@
|
|
1 |
-
"""Startup work before running the web server."""
|
2 |
-
|
3 |
-
import os
|
4 |
-
import shutil
|
5 |
-
from typing import TypedDict
|
6 |
-
|
7 |
-
import yaml
|
8 |
-
from huggingface_hub import scan_cache_dir, snapshot_download
|
9 |
-
|
10 |
-
from lilac.concepts.db_concept import DiskConceptDB, get_concept_output_dir
|
11 |
-
from lilac.env import env, get_project_dir
|
12 |
-
from lilac.project import PROJECT_CONFIG_FILENAME
|
13 |
-
from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
|
14 |
-
|
15 |
-
|
16 |
-
def delete_old_files() -> None:
|
17 |
-
"""Delete old files from the cache."""
|
18 |
-
# Scan cache
|
19 |
-
try:
|
20 |
-
scan = scan_cache_dir()
|
21 |
-
except BaseException:
|
22 |
-
# Cache was not found.
|
23 |
-
return
|
24 |
-
|
25 |
-
# Select revisions to delete
|
26 |
-
to_delete = []
|
27 |
-
for repo in scan.repos:
|
28 |
-
latest_revision = max(repo.revisions, key=lambda x: x.last_modified)
|
29 |
-
to_delete.extend(
|
30 |
-
[revision.commit_hash for revision in repo.revisions if revision != latest_revision])
|
31 |
-
strategy = scan.delete_revisions(*to_delete)
|
32 |
-
|
33 |
-
# Delete them
|
34 |
-
log(f'Will delete {len(to_delete)} old revisions and save {strategy.expected_freed_size_str}')
|
35 |
-
strategy.execute()
|
36 |
-
|
37 |
-
|
38 |
-
class HfSpaceConfig(TypedDict):
|
39 |
-
"""The huggingface space config, defined in README.md.
|
40 |
-
|
41 |
-
See:
|
42 |
-
https://huggingface.co/docs/hub/spaces-config-reference
|
43 |
-
"""
|
44 |
-
title: str
|
45 |
-
datasets: list[str]
|
46 |
-
|
47 |
-
|
48 |
-
def main() -> None:
|
49 |
-
"""Download dataset files from the HF space that was uploaded before building the image."""
|
50 |
-
# SPACE_ID is the HuggingFace Space ID environment variable that is automatically set by HF.
|
51 |
-
repo_id = env('SPACE_ID', None)
|
52 |
-
if not repo_id:
|
53 |
-
return
|
54 |
-
|
55 |
-
delete_old_files()
|
56 |
-
|
57 |
-
with open(os.path.abspath('README.md')) as f:
|
58 |
-
# Strip the '---' for the huggingface readme config.
|
59 |
-
readme = f.read().strip().strip('---')
|
60 |
-
hf_config: HfSpaceConfig = yaml.safe_load(readme)
|
61 |
-
|
62 |
-
# Download the huggingface space data. This includes code and datasets, so we move the datasets
|
63 |
-
# alone to the data directory.
|
64 |
-
|
65 |
-
datasets_dir = get_datasets_dir(get_project_dir())
|
66 |
-
os.makedirs(datasets_dir, exist_ok=True)
|
67 |
-
for lilac_hf_dataset in hf_config['datasets']:
|
68 |
-
print('Downloading dataset from HuggingFace: ', lilac_hf_dataset)
|
69 |
-
snapshot_download(
|
70 |
-
repo_id=lilac_hf_dataset,
|
71 |
-
repo_type='dataset',
|
72 |
-
token=env('HF_ACCESS_TOKEN'),
|
73 |
-
local_dir=datasets_dir,
|
74 |
-
ignore_patterns=['.gitattributes', 'README.md'])
|
75 |
-
|
76 |
-
snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
|
77 |
-
|
78 |
-
spaces_data_dir = os.path.join(snapshot_dir, 'data')
|
79 |
-
# Copy the config file.
|
80 |
-
project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
|
81 |
-
if os.path.exists(project_config_file):
|
82 |
-
shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
|
83 |
-
|
84 |
-
# Delete cache files from persistent storage.
|
85 |
-
cache_dir = get_lilac_cache_dir(get_project_dir())
|
86 |
-
if os.path.exists(cache_dir):
|
87 |
-
shutil.rmtree(cache_dir)
|
88 |
-
|
89 |
-
# Copy cache files from the space if they exist.
|
90 |
-
spaces_cache_dir = get_lilac_cache_dir(spaces_data_dir)
|
91 |
-
if os.path.exists(spaces_cache_dir):
|
92 |
-
shutil.copytree(spaces_cache_dir, cache_dir)
|
93 |
-
|
94 |
-
# Copy concepts.
|
95 |
-
concepts = DiskConceptDB(spaces_data_dir).list()
|
96 |
-
for concept in concepts:
|
97 |
-
# Ignore lilac concepts, they're already part of the source code.
|
98 |
-
if concept.namespace == 'lilac':
|
99 |
-
continue
|
100 |
-
spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
|
101 |
-
concept.name)
|
102 |
-
persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
|
103 |
-
concept.name)
|
104 |
-
shutil.rmtree(persistent_output_dir, ignore_errors=True)
|
105 |
-
shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
|
106 |
-
shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
|
107 |
-
|
108 |
-
|
109 |
-
if __name__ == '__main__':
|
110 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docker_start.sh
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
# Fail if any of the commands below fail.
|
4 |
set -e
|
5 |
|
6 |
-
|
7 |
gunicorn lilac.server:app \
|
8 |
--bind 0.0.0.0:5432 \
|
9 |
--preload -k uvicorn.workers.UvicornWorker \
|
|
|
3 |
# Fail if any of the commands below fail.
|
4 |
set -e
|
5 |
|
6 |
+
lilac hf-docker-start
|
7 |
gunicorn lilac.server:app \
|
8 |
--bind 0.0.0.0:5432 \
|
9 |
--preload -k uvicorn.workers.UvicornWorker \
|