File size: 1,819 Bytes
839621c 3c9d064 839621c cdb761d 839621c cdb761d 8c543d4 cdb761d 8c543d4 cdb761d 8c543d4 cdb761d 8c543d4 cdb761d 839621c cdb761d 839621c cdb761d 839621c cdb761d f92d1a9 cdb761d f92d1a9 cdb761d 839621c 3c9d064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import json
from tempfile import mktemp
from huggingface_hub import duplicate_space, HfApi
hf_api = HfApi()
def setup_dataset_on_hub(repo_id, hub_token):
# create an empty dataset repo on the hub
hf_api.create_repo(
repo_id=repo_id,
token=hub_token,
repo_type="dataset",
)
# upload the seed data
hf_api.upload_file(
path_or_fileobj="seed_data.json",
path_in_repo="seed_data.json",
repo_id=repo_id,
repo_type="dataset",
token=hub_token,
)
def duplicate_space_on_hub(source_repo, target_repo, hub_token, private=False):
duplicate_space(
from_id=source_repo,
to_id=target_repo,
token=hub_token,
private=private,
exist_ok=True,
)
def add_project_config_to_space_repo(
dataset_repo_id,
hub_token,
project_name,
argilla_space_repo_id,
project_space_repo_id,
):
# upload the seed data and readme to the hub
with open("project_config.json", "w") as f:
json.dump(
{
"project_name": project_name,
"argilla_space_repo_id": argilla_space_repo_id,
"project_space_repo_id": project_space_repo_id,
"dataset_repo_id": dataset_repo_id,
},
f,
)
hf_api.upload_file(
path_or_fileobj="project_config.json",
path_in_repo="project_config.json",
token=hub_token,
repo_id=project_space_repo_id,
repo_type="space",
)
def pull_seed_data_from_repo(repo_id, hub_token):
tempfile_path = mktemp()
# pull the dataset repo from the hub
hf_api.hf_hub_download(
repo_id=repo_id, token=hub_token, repo_type="dataset", filename=tempfile_path
)
return json.load(open(tempfile_path))
|