Spaces:

lilacai
/

nikhil_staging

Runtime error

App Files Files Community

nsthorat-lilac commited on Sep 27, 2023

Commit

faf3244

1 Parent(s): f9a34e6

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.env +0 -48
.env.demo +0 -5
Dockerfile +0 -41
README.md +0 -10
data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl +0 -0
data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl +0 -0
data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl +0 -0
data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/non-english/gte-base.pkl +0 -0
data/.cache/lilac/concept/lilac/non-english/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/profanity/gte-base.pkl +0 -3
data/.cache/lilac/concept/lilac/profanity/gte-small.pkl +0 -3
data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/question/cohere.pkl +0 -3
data/.cache/lilac/concept/lilac/question/gte-base.pkl +0 -3
data/.cache/lilac/concept/lilac/question/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/question/openai.pkl +0 -3
data/.cache/lilac/concept/lilac/question/palm.pkl +0 -3
data/.cache/lilac/concept/lilac/question/sbert.pkl +0 -0
data/.cache/lilac/concept/lilac/source-code/gte-base.pkl +0 -0
data/.cache/lilac/concept/lilac/source-code/gte-small.pkl +0 -0
data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl +0 -3
data/.cache/lilac/concept/local/aliens/gte-small.pkl +0 -0
data/lilac.yml +0 -1024
dist/README.md +0 -2
dist/lilac-0.1.3-py3-none-any.whl +0 -3
docker_start.py +0 -110
docker_start.sh +1 -1

.env DELETED Viewed

@@ -1,48 +0,0 @@
-# To overwrite these variables, create a .env.local file
-# The path to the project directory. When used, this will be the global project directory for lilac.
-# When not defined, define the project directory with `lilac start ./data`.
-# LILAC_PROJECT_DIR=./data
-# Set to 1 for duckdb to use views instead of materialized tables (lower memory usage, but slower).
-DUCKDB_USE_VIEWS=0
-# Set to true to enable read-only mode, disabling the ability to add datasets & compute dataset
-# signals.
-# LILAC_AUTH_ENABLED=true
-# Variables that can be set in .env.local
-#
-# Get key from https://dashboard.cohere.ai/api-keys
-# COHERE_API_KEY=
-# GCS_REGION=
-# GCS_ACCESS_KEY=
-# GCS_SECRET_KEY=
-# Get key from https://platform.openai.com/account/api-keys
-# OPENAI_API_KEY=
-# Get key from https://makersuite.google.com/app/apikey
-# PALM_API_KEY=
-# HuggingFace demos: machine that uploads to HuggingFace.
-# For authenticating with HuggingFace to deploy to a Space.
-# HF_USERNAME=
-# The default repo to deploy to for a staging demo. Can be overridden by a command line flag.
-# HF_STAGING_DEMO_REPO='HF_ORG/HF_REPO_NAME'
-# For Google-login. This is generated from the Google Cloud Console for a web client.
-# See: https://developers.google.com/identity/protocols/oauth2
-GOOGLE_CLIENT_ID='279475920249-i8llm8vbos1vj5m1qocir8narb3r0enu.apps.googleusercontent.com'
-# The client secret of the above client.
-# GOOGLE_CLIENT_SECRET=
-# A random string for oauth sessions.
-# LILAC_OAUTH_SECRET_KEY=
-# LangSmith source setup.
-# LANGCHAIN_API_KEY=
-# LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
-# Firebase deployment token.
-# FIREBASE_TOKEN=

.env.demo DELETED Viewed

@@ -1,5 +0,0 @@
-LILAC_PROJECT_DIR='/data'
-HF_HOME=/data/.huggingface
-TRANSFORMERS_CACHE=/data/.cache
-XDG_CACHE_HOME=/data/.cache
-GOOGLE_ANALYTICS_ENABLED=true

Dockerfile DELETED Viewed

@@ -1,41 +0,0 @@
-# NOTE: When we upgrade to 3.11 we can use a slimmer docker image which comes with gcc.
-FROM python:3.9-bullseye
-# Allow statements and log messages to immediately appear in the Knative logs
-ENV PYTHONUNBUFFERED True
-# See: https://huggingface.co/docs/hub/spaces-sdks-docker#permissions
-RUN useradd -m -u 1000 user
-USER user
-ENV HOME=/home/user \
-  PATH=/home/user/.local/bin:$PATH
-# Set the working directory in the container.
-WORKDIR $HOME/app
-# Install the dependencies. This will look in ./dist for any wheels that match lilac. If they are
-# not found, it will use the public pip package.
-# Pip install lilac[all] and dependencies before trying to install the local image. This allows us
-# to get cache hits on dependency installations when using a local wheel. When using the public pip
-# package, the second call will be a no-op.
-RUN python -m pip install lilac[all]
-# Install from the local wheel inside ./dist. This will be a no-op if the wheel is not found.
-COPY --chown=user /dist ./dist/
-RUN python -m pip install --find-links=dist --upgrade lilac[all]
-COPY --chown=user .env .
-COPY --chown=user .env.demo .
-# Copy the README so we can read the datasets from the HuggingFace config.
-COPY --chown=user README.md .
-# Copy the license just in case.
-COPY --chown=user LICENSE .
-COPY --chown=user docker_start.sh docker_start.py ./
-# Make a local data directory for non-persistent storage demos.
-RUN mkdir -p ./data
-RUN chown -R user ./data
-CMD ["bash", "docker_start.sh"]

README.md DELETED Viewed

@@ -1,10 +0,0 @@
----
-title: Lilac
-emoji: "\U0001F337"
-colorFrom: purple
-colorTo: purple
-sdk: docker
-app_port: 5432
-datasets: []
----

data/.cache/lilac/concept/100712716653593140239/aliens/gte-small.pkl DELETED Viewed

Binary file (10.8 kB)

data/.cache/lilac/concept/100712716653593140239/alienz/gte-small.pkl DELETED Viewed

Binary file (21.7 kB)

data/.cache/lilac/concept/100712716653593140239/asdf/gte-small.pkl DELETED Viewed

Binary file (21.7 kB)

data/.cache/lilac/concept/100712716653593140239/private_aliens/gte-small.pkl DELETED Viewed

Binary file (21.8 kB)

data/.cache/lilac/concept/lilac/legal-termination/gte-small.pkl DELETED Viewed

Binary file (60.6 kB)

data/.cache/lilac/concept/lilac/negative-sentiment/gte-small.pkl DELETED Viewed

Binary file (202 kB)

data/.cache/lilac/concept/lilac/non-english/gte-base.pkl DELETED Viewed

Binary file (645 kB)

data/.cache/lilac/concept/lilac/non-english/gte-small.pkl DELETED Viewed

Binary file (330 kB)

data/.cache/lilac/concept/lilac/positive-sentiment/gte-small.pkl DELETED Viewed

Binary file (180 kB)

data/.cache/lilac/concept/lilac/profanity/gte-base.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d4ac21aa8bd428688a64f75221338be8c676d208de61a9eba948300e8aa43af3
-size 3301300

data/.cache/lilac/concept/lilac/profanity/gte-small.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:89495a1f968ddcb3f76ad46dbe7b6503a7b76afcdac37abbeb15c81d38c2f9d4
-size 1672934

data/.cache/lilac/concept/lilac/prompt-injections/gte-small.pkl DELETED Viewed

Binary file (71.3 kB)

data/.cache/lilac/concept/lilac/prompt-reveal/gte-small.pkl DELETED Viewed

Binary file (69.6 kB)

data/.cache/lilac/concept/lilac/question/cohere.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9768c28d6ed72e4a1a5819fef4157fb1f30a50f1e165bfcdd87d0fa761146902
-size 6254174

data/.cache/lilac/concept/lilac/question/gte-base.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d2ae5bf4275be11be99cb2e90c03e35c9d2749efc3b34a2d1db1e9f0c99325d6
-size 1194925

data/.cache/lilac/concept/lilac/question/gte-small.pkl DELETED Viewed

Binary file (611 kB)

data/.cache/lilac/concept/lilac/question/openai.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4a14c6df6924f45391654fe78dee8cf996de3abb8acf8ca0f81a65814572d493
-size 2362432

data/.cache/lilac/concept/lilac/question/palm.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cce86748bca57bd328f68b97ee80b4e3343ee4008d8951c5b061b6dd9335df7e
-size 1194921

data/.cache/lilac/concept/lilac/question/sbert.pkl DELETED Viewed

Binary file (611 kB)

data/.cache/lilac/concept/lilac/source-code/gte-base.pkl DELETED Viewed

Binary file (287 kB)

data/.cache/lilac/concept/lilac/source-code/gte-small.pkl DELETED Viewed

Binary file (147 kB)

data/.cache/lilac/concept/lilac/toxicity/gte-small.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6a074a3ac60cc9bfb82c4bf19d0e8c8d3837cb2b68b97efe8960c16675477f1
-size 1886420

data/.cache/lilac/concept/local/aliens/gte-small.pkl DELETED Viewed

Binary file (28.4 kB)

data/lilac.yml DELETED Viewed

@@ -1,1024 +0,0 @@
-# Lilac project config.
-# See https://lilacml.com/api_reference/index.html#lilac.Config for details.
-datasets:
-  - namespace: local
-    name: glue
-    source:
-      dataset_name: glue
-      config_name: ax
-      source_name: huggingface
-    embeddings:
-      - path: premise
-        embedding: gte-small
-      - path: premise
-        embedding: gte-base
-      - path: hypothesis
-        embedding: gte-small
-    signals:
-      - path: premise
-        signal:
-          signal_name: pii
-      - path: hypothesis
-        signal:
-          signal_name: pii
-      - path: premise
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - premise
-        markdown_paths: []
-  - namespace: local
-    name: glue_ax
-    source:
-      dataset_name: glue
-      config_name: ax
-      source_name: huggingface
-    embeddings:
-      - path: hypothesis
-        embedding: gte-small
-    signals:
-      - path: premise
-        signal:
-          signal_name: text_statistics
-      - path: premise
-        signal:
-          signal_name: pii
-      - path: premise
-        signal:
-          signal_name: near_dup
-      - path: hypothesis
-        signal:
-          embedding: gte-small
-          namespace: ''
-          concept_name: ''
-          signal_name: concept_score
-      - path: hypothesis
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: hypothesis
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-    settings:
-      ui:
-        media_paths:
-          - hypothesis
-        markdown_paths: []
-  - namespace: local
-    name: imdb3
-    source:
-      dataset_name: imdb
-      source_name: huggingface
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-  - namespace: local
-    name: imdb
-    source:
-      dataset_name: imdb
-      source_name: huggingface
-    embeddings:
-      - path: text
-        embedding: gte-small
-    signals:
-      - path: text
-        signal:
-          signal_name: pii
-      - path: text
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-  - namespace: local
-    name: imdb2
-    source:
-      dataset_name: imdb
-      source_name: huggingface
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-  - namespace: lilac
-    name: OpenOrca-100k
-    source:
-      dataset_name: Open-Orca/OpenOrca
-      sample_size: 100000
-      source_name: huggingface
-    embeddings:
-      - path: question
-        embedding: gte-small
-      - path: response
-        embedding: gte-small
-    signals:
-      - path: question
-        signal:
-          signal_name: near_dup
-      - path: question
-        signal:
-          signal_name: pii
-      - path: question
-        signal:
-          signal_name: lang_detection
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: question
-        signal:
-          signal_name: text_statistics
-      - path: response
-        signal:
-          signal_name: near_dup
-      - path: response
-        signal:
-          signal_name: pii
-      - path: response
-        signal:
-          signal_name: lang_detection
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: response
-        signal:
-          signal_name: text_statistics
-      - path: system_prompt
-        signal:
-          signal_name: pii
-    settings:
-      ui:
-        media_paths:
-          - question
-          - response
-        markdown_paths: []
-  - namespace: local
-    name: the_movies_dataset
-    source:
-      filepaths:
-        - gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
-      names: []
-      source_name: csv
-    settings:
-      ui:
-        media_paths:
-          - overview
-        markdown_paths: []
-  - namespace: local
-    name: glue_ax_parquet
-    source:
-      filepaths:
-        - gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet
-      source_name: parquet
-    settings:
-      ui:
-        media_paths:
-          - premise
-        markdown_paths: []
-  - namespace: lilac
-    name: mmlu_professional_law
-    source:
-      dataset_name: cais/mmlu
-      config_name: professional_law
-      source_name: huggingface
-    embeddings:
-      - path: question
-        embedding: gte-small
-      - path:
-          - choices
-          - '*'
-        embedding: gte-small
-    signals:
-      - path: question
-        signal:
-          signal_name: near_dup
-      - path: question
-        signal:
-          signal_name: pii
-      - path: question
-        signal:
-          signal_name: lang_detection
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: question
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: question
-        signal:
-          signal_name: text_statistics
-      - path:
-          - choices
-          - '*'
-        signal:
-          signal_name: near_dup
-      - path:
-          - choices
-          - '*'
-        signal:
-          signal_name: pii
-      - path:
-          - choices
-          - '*'
-        signal:
-          signal_name: lang_detection
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path:
-          - choices
-          - '*'
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - question
-          - - choices
-            - '*'
-        markdown_paths: []
-      preferred_embedding: gte-small
-  - namespace: local
-    name: deepset-prompt-inj
-    source:
-      dataset_name: deepset/prompt-injections
-      source_name: huggingface
-    embeddings:
-      - path: text
-        embedding: gte-small
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-  - namespace: local
-    name: jasper-prompt-inj
-    source:
-      dataset_name: JasperLS/prompt-injections
-      source_name: huggingface
-    embeddings:
-      - path: text
-        embedding: gte-small
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-  - namespace: local
-    name: mosaic-chat-v2
-    source:
-      dataset_name: sam-mosaic/chat-v2
-      source_name: huggingface
-    embeddings:
-      - path: prompt
-        embedding: gte-small
-      - path: response
-        embedding: gte-small
-    signals:
-      - path: prompt
-        signal:
-          signal_name: near_dup
-      - path: prompt
-        signal:
-          signal_name: pii
-      - path: prompt
-        signal:
-          signal_name: lang_detection
-      - path: prompt
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: prompt
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: prompt
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: prompt
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: prompt
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: prompt
-        signal:
-          signal_name: text_statistics
-      - path: response
-        signal:
-          signal_name: near_dup
-      - path: response
-        signal:
-          signal_name: pii
-      - path: response
-        signal:
-          signal_name: lang_detection
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: response
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: response
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - prompt
-          - response
-        markdown_paths: []
-      preferred_embedding: gte-small
-  - namespace: local
-    name: databricks-dolly-15k-curated-en
-    source:
-      dataset_name: argilla/databricks-dolly-15k-curated-en
-      source_name: huggingface
-    embeddings:
-      - path: original-context
-        embedding: gte-small
-      - path:
-          - new-context
-          - value
-          - '*'
-        embedding: gte-small
-      - path: original-instruction
-        embedding: gte-small
-    signals:
-      - path: original-instruction
-        signal:
-          signal_name: near_dup
-      - path: original-instruction
-        signal:
-          signal_name: pii
-      - path: original-instruction
-        signal:
-          signal_name: lang_detection
-      - path: original-instruction
-        signal:
-          signal_name: text_statistics
-      - path: original-context
-        signal:
-          signal_name: near_dup
-      - path: original-context
-        signal:
-          signal_name: pii
-      - path: original-context
-        signal:
-          signal_name: lang_detection
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: original-context
-        signal:
-          signal_name: text_statistics
-      - path: original-response
-        signal:
-          signal_name: near_dup
-      - path: original-response
-        signal:
-          signal_name: pii
-      - path: original-response
-        signal:
-          signal_name: lang_detection
-      - path: original-response
-        signal:
-          signal_name: text_statistics
-      - path:
-          - new-instruction
-          - value
-          - '*'
-        signal:
-          signal_name: near_dup
-      - path:
-          - new-instruction
-          - value
-          - '*'
-        signal:
-          signal_name: pii
-      - path:
-          - new-instruction
-          - value
-          - '*'
-        signal:
-          signal_name: lang_detection
-      - path:
-          - new-instruction
-          - value
-          - '*'
-        signal:
-          signal_name: text_statistics
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          signal_name: near_dup
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          signal_name: pii
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          signal_name: lang_detection
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path:
-          - new-context
-          - value
-          - '*'
-        signal:
-          signal_name: text_statistics
-      - path:
-          - new-response
-          - value
-          - '*'
-        signal:
-          signal_name: near_dup
-      - path:
-          - new-response
-          - value
-          - '*'
-        signal:
-          signal_name: pii
-      - path:
-          - new-response
-          - value
-          - '*'
-        signal:
-          signal_name: lang_detection
-      - path:
-          - new-response
-          - value
-          - '*'
-        signal:
-          signal_name: text_statistics
-      - path: original-instruction
-        signal:
-          signal_name: spacy_ner
-    settings:
-      ui:
-        media_paths:
-          - original-instruction
-          - original-context
-          - original-response
-          - - new-instruction
-            - value
-            - '*'
-          - - new-context
-            - value
-            - '*'
-          - - new-response
-            - value
-            - '*'
-        markdown_paths: []
-      preferred_embedding: gte-small
-  - namespace: local
-    name: open-asssistant-conversations
-    source:
-      dataset_name: OpenAssistant/oasst1
-      source_name: huggingface
-    embeddings:
-      - path: text
-        embedding: gte-small
-    signals:
-      - path: text
-        signal:
-          signal_name: near_dup
-      - path: text
-        signal:
-          signal_name: pii
-      - path: text
-        signal:
-          signal_name: lang_detection
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: text
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-      preferred_embedding: gte-small
-  - namespace: local
-    name: enron-emails
-    source:
-      dataset_name: EleutherAI/pile
-      config_name: enron_emails
-      sample_size: 100000
-      source_name: huggingface
-    embeddings:
-      - path: text
-        embedding: gte-small
-    signals:
-      - path: text
-        signal:
-          signal_name: near_dup
-      - path: text
-        signal:
-          signal_name: pii
-      - path: text
-        signal:
-          signal_name: lang_detection
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: positive-sentiment
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: non-english
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: toxicity
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: question
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: legal-termination
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: source-code
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: negative-sentiment
-          signal_name: concept_score
-      - path: text
-        signal:
-          embedding: gte-small
-          namespace: lilac
-          concept_name: profanity
-          signal_name: concept_score
-      - path: text
-        signal:
-          signal_name: text_statistics
-    settings:
-      ui:
-        media_paths:
-          - text
-        markdown_paths: []
-      preferred_embedding: gte-small
-  - namespace: local
-    name: OpenOrca
-    source:
-      dataset_name: Open-Orca/OpenOrca
-      source_name: huggingface
-    embeddings:
-      - path: question
-        embedding: gte-small
-      - path: response
-        embedding: gte-small
-    settings:
-      ui:
-        media_paths:
-          - question
-          - response
-        markdown_paths: []
-  - namespace: local
-    name: langsmith-finetuning-rag
-    source:
-      filepaths:
-        - https://storage.googleapis.com/lilac-data/datasets/langsmith-finetuning-rag/rag.jsonl
-      source_name: json
-    settings:
-      ui:
-        media_paths:
-          - - inputs
-            - question
-          - - outputs
-            - output
-        markdown_paths: []

dist/README.md DELETED Viewed

	@@ -1,2 +0,0 @@
1	- This directory is used for locally built whl files.
2	- We write a README.md to ensure an empty folder is uploaded when there is no whl.

dist/lilac-0.1.3-py3-none-any.whl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8263c29c0b61f57530cb419f858282c0c3dcc8b037f6634cb084edbd4ba0ae63
-size 1170417

docker_start.py DELETED Viewed

@@ -1,110 +0,0 @@
-"""Startup work before running the web server."""
-import os
-import shutil
-from typing import TypedDict
-import yaml
-from huggingface_hub import scan_cache_dir, snapshot_download
-from lilac.concepts.db_concept import DiskConceptDB, get_concept_output_dir
-from lilac.env import env, get_project_dir
-from lilac.project import PROJECT_CONFIG_FILENAME
-from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
-def delete_old_files() -> None:
-  """Delete old files from the cache."""
-  # Scan cache
-  try:
-    scan = scan_cache_dir()
-  except BaseException:
-    # Cache was not found.
-    return
-  # Select revisions to delete
-  to_delete = []
-  for repo in scan.repos:
-    latest_revision = max(repo.revisions, key=lambda x: x.last_modified)
-    to_delete.extend(
-      [revision.commit_hash for revision in repo.revisions if revision != latest_revision])
-  strategy = scan.delete_revisions(*to_delete)
-  # Delete them
-  log(f'Will delete {len(to_delete)} old revisions and save {strategy.expected_freed_size_str}')
-  strategy.execute()
-class HfSpaceConfig(TypedDict):
-  """The huggingface space config, defined in README.md.
-  See:
-  https://huggingface.co/docs/hub/spaces-config-reference
-  """
-  title: str
-  datasets: list[str]
-def main() -> None:
-  """Download dataset files from the HF space that was uploaded before building the image."""
-  # SPACE_ID is the HuggingFace Space ID environment variable that is automatically set by HF.
-  repo_id = env('SPACE_ID', None)
-  if not repo_id:
-    return
-  delete_old_files()
-  with open(os.path.abspath('README.md')) as f:
-    # Strip the '---' for the huggingface readme config.
-    readme = f.read().strip().strip('---')
-    hf_config: HfSpaceConfig = yaml.safe_load(readme)
-  # Download the huggingface space data. This includes code and datasets, so we move the datasets
-  # alone to the data directory.
-  datasets_dir = get_datasets_dir(get_project_dir())
-  os.makedirs(datasets_dir, exist_ok=True)
-  for lilac_hf_dataset in hf_config['datasets']:
-    print('Downloading dataset from HuggingFace: ', lilac_hf_dataset)
-    snapshot_download(
-      repo_id=lilac_hf_dataset,
-      repo_type='dataset',
-      token=env('HF_ACCESS_TOKEN'),
-      local_dir=datasets_dir,
-      ignore_patterns=['.gitattributes', 'README.md'])
-  snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
-  spaces_data_dir = os.path.join(snapshot_dir, 'data')
-  # Copy the config file.
-  project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
-  if os.path.exists(project_config_file):
-    shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
-  # Delete cache files from persistent storage.
-  cache_dir = get_lilac_cache_dir(get_project_dir())
-  if os.path.exists(cache_dir):
-    shutil.rmtree(cache_dir)
-  # Copy cache files from the space if they exist.
-  spaces_cache_dir = get_lilac_cache_dir(spaces_data_dir)
-  if os.path.exists(spaces_cache_dir):
-    shutil.copytree(spaces_cache_dir, cache_dir)
-  # Copy concepts.
-  concepts = DiskConceptDB(spaces_data_dir).list()
-  for concept in concepts:
-    # Ignore lilac concepts, they're already part of the source code.
-    if concept.namespace == 'lilac':
-      continue
-    spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
-                                                       concept.name)
-    persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
-                                                   concept.name)
-    shutil.rmtree(persistent_output_dir, ignore_errors=True)
-    shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
-    shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
-if __name__ == '__main__':
-  main()

docker_start.sh CHANGED Viewed

@@ -3,7 +3,7 @@
 # Fail if any of the commands below fail.
 set -e
-python docker_start.py
 gunicorn lilac.server:app \
   --bind 0.0.0.0:5432 \
   --preload -k uvicorn.workers.UvicornWorker \

 # Fail if any of the commands below fail.
 set -e
+lilac hf-docker-start
 gunicorn lilac.server:app \
   --bind 0.0.0.0:5432 \
   --preload -k uvicorn.workers.UvicornWorker \