Spaces:
Runtime error
Runtime error
# Lilac project config. | |
# See https://lilacml.com/api_reference/index.html#lilac.Config for details. | |
datasets: | |
- namespace: local | |
name: glue | |
source: | |
dataset_name: glue | |
config_name: ax | |
source_name: huggingface | |
embeddings: | |
- path: premise | |
embedding: gte-small | |
signals: | |
- path: premise | |
signal: | |
signal_name: pii | |
- path: hypothesis | |
signal: | |
signal_name: pii | |
- path: premise | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- premise | |
- namespace: local | |
name: glue_ax | |
source: | |
dataset_name: glue | |
config_name: ax | |
source_name: huggingface | |
embeddings: | |
- path: hypothesis | |
embedding: gte-small | |
signals: | |
- path: premise | |
signal: | |
signal_name: text_statistics | |
- path: premise | |
signal: | |
signal_name: pii | |
- path: premise | |
signal: | |
signal_name: near_dup | |
- path: hypothesis | |
signal: | |
embedding: gte-small | |
namespace: '' | |
concept_name: '' | |
signal_name: concept_score | |
- path: hypothesis | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: hypothesis | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
settings: | |
ui: | |
media_paths: | |
- hypothesis | |
- namespace: local | |
name: imdb3 | |
source: | |
dataset_name: imdb | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- text | |
- namespace: local | |
name: imdb | |
source: | |
dataset_name: imdb | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
signals: | |
- path: text | |
signal: | |
signal_name: pii | |
- path: text | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- text | |
- namespace: local | |
name: imdb2 | |
source: | |
dataset_name: imdb | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- text | |
- namespace: lilac | |
name: OpenOrca-100k | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
sample_size: 100000 | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: response | |
embedding: gte-small | |
signals: | |
- path: question | |
signal: | |
signal_name: near_dup | |
- path: question | |
signal: | |
signal_name: pii | |
- path: question | |
signal: | |
signal_name: lang_detection | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: question | |
signal: | |
signal_name: text_statistics | |
- path: response | |
signal: | |
signal_name: near_dup | |
- path: response | |
signal: | |
signal_name: pii | |
- path: response | |
signal: | |
signal_name: lang_detection | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: response | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
- namespace: local | |
name: the_movies_dataset | |
source: | |
filepaths: | |
- gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv | |
names: [] | |
source_name: csv | |
settings: | |
ui: | |
media_paths: | |
- overview | |
- namespace: local | |
name: glue_ax_parquet | |
source: | |
filepaths: | |
- gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet | |
source_name: parquet | |
settings: | |
ui: | |
media_paths: | |
- premise | |
- namespace: lilac | |
name: mmlu_professional_law | |
source: | |
dataset_name: cais/mmlu | |
config_name: professional_law | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: | |
- choices | |
- '*' | |
embedding: gte-small | |
signals: | |
- path: question | |
signal: | |
signal_name: near_dup | |
- path: question | |
signal: | |
signal_name: pii | |
- path: question | |
signal: | |
signal_name: lang_detection | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: question | |
signal: | |
signal_name: text_statistics | |
- path: | |
- choices | |
- '*' | |
signal: | |
signal_name: near_dup | |
- path: | |
- choices | |
- '*' | |
signal: | |
signal_name: pii | |
- path: | |
- choices | |
- '*' | |
signal: | |
signal_name: lang_detection | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: | |
- choices | |
- '*' | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- question | |
- - choices | |
- '*' | |
preferred_embedding: gte-small | |
- namespace: local | |
name: deepset-prompt-inj | |
source: | |
dataset_name: deepset/prompt-injections | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
- namespace: local | |
name: jasper-prompt-inj | |
source: | |
dataset_name: JasperLS/prompt-injections | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
- namespace: local | |
name: mosaic-chat-v2 | |
source: | |
dataset_name: sam-mosaic/chat-v2 | |
source_name: huggingface | |
embeddings: | |
- path: prompt | |
embedding: gte-small | |
- path: response | |
embedding: gte-small | |
signals: | |
- path: prompt | |
signal: | |
signal_name: near_dup | |
- path: prompt | |
signal: | |
signal_name: pii | |
- path: prompt | |
signal: | |
signal_name: lang_detection | |
- path: prompt | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: prompt | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: prompt | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: prompt | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: prompt | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: prompt | |
signal: | |
signal_name: text_statistics | |
- path: response | |
signal: | |
signal_name: near_dup | |
- path: response | |
signal: | |
signal_name: pii | |
- path: response | |
signal: | |
signal_name: lang_detection | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: response | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: response | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- prompt | |
- response | |
preferred_embedding: gte-small | |
- namespace: local | |
name: databricks-dolly-15k-curated-en | |
source: | |
dataset_name: argilla/databricks-dolly-15k-curated-en | |
source_name: huggingface | |
embeddings: | |
- path: original-context | |
embedding: gte-small | |
- path: | |
- new-context | |
- value | |
- '*' | |
embedding: gte-small | |
signals: | |
- path: original-instruction | |
signal: | |
signal_name: near_dup | |
- path: original-instruction | |
signal: | |
signal_name: pii | |
- path: original-instruction | |
signal: | |
signal_name: lang_detection | |
- path: original-instruction | |
signal: | |
signal_name: text_statistics | |
- path: original-context | |
signal: | |
signal_name: near_dup | |
- path: original-context | |
signal: | |
signal_name: pii | |
- path: original-context | |
signal: | |
signal_name: lang_detection | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: original-context | |
signal: | |
signal_name: text_statistics | |
- path: original-response | |
signal: | |
signal_name: near_dup | |
- path: original-response | |
signal: | |
signal_name: pii | |
- path: original-response | |
signal: | |
signal_name: lang_detection | |
- path: original-response | |
signal: | |
signal_name: text_statistics | |
- path: | |
- new-instruction | |
- value | |
- '*' | |
signal: | |
signal_name: near_dup | |
- path: | |
- new-instruction | |
- value | |
- '*' | |
signal: | |
signal_name: pii | |
- path: | |
- new-instruction | |
- value | |
- '*' | |
signal: | |
signal_name: lang_detection | |
- path: | |
- new-instruction | |
- value | |
- '*' | |
signal: | |
signal_name: text_statistics | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
signal_name: near_dup | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
signal_name: pii | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
signal_name: lang_detection | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: | |
- new-context | |
- value | |
- '*' | |
signal: | |
signal_name: text_statistics | |
- path: | |
- new-response | |
- value | |
- '*' | |
signal: | |
signal_name: near_dup | |
- path: | |
- new-response | |
- value | |
- '*' | |
signal: | |
signal_name: pii | |
- path: | |
- new-response | |
- value | |
- '*' | |
signal: | |
signal_name: lang_detection | |
- path: | |
- new-response | |
- value | |
- '*' | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- original-instruction | |
- original-context | |
- original-response | |
- - new-instruction | |
- value | |
- '*' | |
- - new-context | |
- value | |
- '*' | |
- - new-response | |
- value | |
- '*' | |
preferred_embedding: gte-small | |
- namespace: local | |
name: open-asssistant-conversations | |
source: | |
dataset_name: OpenAssistant/oasst1 | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
signals: | |
- path: text | |
signal: | |
signal_name: near_dup | |
- path: text | |
signal: | |
signal_name: pii | |
- path: text | |
signal: | |
signal_name: lang_detection | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: text | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- text | |
preferred_embedding: gte-small | |
- namespace: local | |
name: enron-emails | |
source: | |
dataset_name: EleutherAI/pile | |
config_name: enron_emails | |
sample_size: 100000 | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
signals: | |
- path: text | |
signal: | |
signal_name: near_dup | |
- path: text | |
signal: | |
signal_name: pii | |
- path: text | |
signal: | |
signal_name: lang_detection | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: positive-sentiment | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: toxicity | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: question | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: legal-termination | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: source-code | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: negative-sentiment | |
signal_name: concept_score | |
- path: text | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: profanity | |
signal_name: concept_score | |
- path: text | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- text | |
preferred_embedding: gte-small | |