import os from huggingface_hub import snapshot_download import json TOKEN = os.environ.get("DEBUG") requests_dataset = snapshot_download('EnergyStarAI/requests_debug', token=TOKEN, repo_type="dataset") def normalize_task(task): # Makes assumption about how the task names are being written, and called. return '_'.join(task.split()).lower() for dir, path, files in os.walk(requests_dataset): for fid in files: if fid.endswith('.json'): file_path = os.path.join(dir, fid) with open(file_path) as fp: request = json.load(fp) status = request['status'] if status == 'PENDING': model = request['model'] task = normalize_task(request['task']) print("%s,%s" % (model, task))