Spaces:
Runtime error
Runtime error
nsthorat-lilac
commited on
Commit
·
e5fe28b
1
Parent(s):
fe2e46f
Upload docker_start.py with huggingface_hub
Browse files- docker_start.py +116 -0
docker_start.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Startup work before running the web server."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
from typing import TypedDict
|
6 |
+
|
7 |
+
import yaml
|
8 |
+
from huggingface_hub import scan_cache_dir, snapshot_download
|
9 |
+
|
10 |
+
from lilac.concepts.db_concept import CONCEPTS_DIR, DiskConceptDB, get_concept_output_dir
|
11 |
+
from lilac.env import env, get_project_dir
|
12 |
+
from lilac.project import PROJECT_CONFIG_FILENAME
|
13 |
+
from lilac.utils import get_datasets_dir, get_lilac_cache_dir, log
|
14 |
+
|
15 |
+
|
16 |
+
def delete_old_files() -> None:
|
17 |
+
"""Delete old files from the cache."""
|
18 |
+
# Scan cache
|
19 |
+
try:
|
20 |
+
scan = scan_cache_dir()
|
21 |
+
except BaseException:
|
22 |
+
# Cache was not found.
|
23 |
+
return
|
24 |
+
|
25 |
+
# Select revisions to delete
|
26 |
+
to_delete = []
|
27 |
+
for repo in scan.repos:
|
28 |
+
latest_revision = max(repo.revisions, key=lambda x: x.last_modified)
|
29 |
+
to_delete.extend(
|
30 |
+
[revision.commit_hash for revision in repo.revisions if revision != latest_revision])
|
31 |
+
strategy = scan.delete_revisions(*to_delete)
|
32 |
+
|
33 |
+
# Delete them
|
34 |
+
log(f'Will delete {len(to_delete)} old revisions and save {strategy.expected_freed_size_str}')
|
35 |
+
strategy.execute()
|
36 |
+
|
37 |
+
|
38 |
+
class HfSpaceConfig(TypedDict):
|
39 |
+
"""The huggingface space config, defined in README.md.
|
40 |
+
|
41 |
+
See:
|
42 |
+
https://huggingface.co/docs/hub/spaces-config-reference
|
43 |
+
"""
|
44 |
+
title: str
|
45 |
+
datasets: list[str]
|
46 |
+
|
47 |
+
|
48 |
+
def main() -> None:
|
49 |
+
"""Download dataset files from the HF space that was uploaded before building the image."""
|
50 |
+
# SPACE_ID is the HuggingFace Space ID environment variable that is automatically set by HF.
|
51 |
+
repo_id = env('SPACE_ID', None)
|
52 |
+
if not repo_id:
|
53 |
+
return
|
54 |
+
|
55 |
+
delete_old_files()
|
56 |
+
|
57 |
+
with open(os.path.abspath('README.md')) as f:
|
58 |
+
# Strip the '---' for the huggingface readme config.
|
59 |
+
readme = f.read().strip().strip('---')
|
60 |
+
hf_config: HfSpaceConfig = yaml.safe_load(readme)
|
61 |
+
|
62 |
+
# Download the huggingface space data. This includes code and datasets, so we move the datasets
|
63 |
+
# alone to the data directory.
|
64 |
+
|
65 |
+
datasets_dir = get_datasets_dir(get_project_dir())
|
66 |
+
os.makedirs(datasets_dir, exist_ok=True)
|
67 |
+
for lilac_hf_dataset in hf_config['datasets']:
|
68 |
+
print('Downloading dataset from HuggingFace: ', lilac_hf_dataset)
|
69 |
+
snapshot_download(
|
70 |
+
repo_id=lilac_hf_dataset,
|
71 |
+
repo_type='dataset',
|
72 |
+
token=env('HF_ACCESS_TOKEN'),
|
73 |
+
local_dir=datasets_dir,
|
74 |
+
ignore_patterns=['.gitattributes', 'README.md'])
|
75 |
+
|
76 |
+
snapshot_dir = snapshot_download(repo_id=repo_id, repo_type='space', token=env('HF_ACCESS_TOKEN'))
|
77 |
+
|
78 |
+
spaces_data_dir = os.path.join(snapshot_dir, 'data')
|
79 |
+
# Copy the config file.
|
80 |
+
project_config_file = os.path.join(spaces_data_dir, PROJECT_CONFIG_FILENAME)
|
81 |
+
if os.path.exists(project_config_file):
|
82 |
+
shutil.copy(project_config_file, os.path.join(get_project_dir(), PROJECT_CONFIG_FILENAME))
|
83 |
+
|
84 |
+
# Delete cache files from persistent storage.
|
85 |
+
cache_dir = get_lilac_cache_dir(get_project_dir())
|
86 |
+
if os.path.exists(cache_dir):
|
87 |
+
shutil.rmtree(cache_dir)
|
88 |
+
|
89 |
+
# NOTE: This is temporary during the move of concepts into the pip package. Once all the demos
|
90 |
+
# have been updated, this block can be deleted.
|
91 |
+
old_lilac_concepts_data_dir = os.path.join(get_project_dir(), CONCEPTS_DIR, 'lilac')
|
92 |
+
if os.path.exists(old_lilac_concepts_data_dir):
|
93 |
+
shutil.rmtree(old_lilac_concepts_data_dir)
|
94 |
+
|
95 |
+
# Copy cache files from the space if they exist.
|
96 |
+
spaces_cache_dir = get_lilac_cache_dir(spaces_data_dir)
|
97 |
+
if os.path.exists(spaces_cache_dir):
|
98 |
+
shutil.copytree(spaces_cache_dir, cache_dir)
|
99 |
+
|
100 |
+
# Copy concepts.
|
101 |
+
concepts = DiskConceptDB(spaces_data_dir).list()
|
102 |
+
for concept in concepts:
|
103 |
+
# Ignore lilac concepts, they're already part of the source code.
|
104 |
+
if concept.namespace == 'lilac':
|
105 |
+
continue
|
106 |
+
spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
|
107 |
+
concept.name)
|
108 |
+
persistent_output_dir = get_concept_output_dir(get_project_dir(), concept.namespace,
|
109 |
+
concept.name)
|
110 |
+
shutil.rmtree(persistent_output_dir, ignore_errors=True)
|
111 |
+
shutil.copytree(spaces_concept_output_dir, persistent_output_dir, dirs_exist_ok=True)
|
112 |
+
shutil.rmtree(spaces_concept_output_dir, ignore_errors=True)
|
113 |
+
|
114 |
+
|
115 |
+
if __name__ == '__main__':
|
116 |
+
main()
|