I am testing some language models in my research. I have a problem with gated models specifically with the meta-llama/Llama-2-7b-hf. As I can only use the environment provided by the university where I work, I use docker for this purpose. With the not gated models, I had no problems but trying the llama model I am getting an error even in the case I provide my access token. The CLI cannot be used in this case, but I have managed to log in via the notebook. What is the issue? Please help with the correct process.
from huggingface_hub import login
login(token = 'my token')
It was successfull.
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token='my token')
It has failed even I tried use_auth_token=True
The error message:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:261, in hf_raise_for_status(response, endpoint_name)
260 try:
--> 261 response.raise_for_status()
262 except HTTPError as e:
File /opt/conda/lib/python3.10/site-packages/requests/models.py:1021, in Response.raise_for_status(self)
1020 if http_error_msg:
-> 1021 raise HTTPError(http_error_msg, response=self)
HTTPError: 403 Client Error: Forbidden for url: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer_config.json
The above exception was the direct cause of the following exception:
GatedRepoError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/transformers/utils/hub.py:417, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash)
415 try:
416 # Load from URL or cache if already cached
--> 417 resolved_file = hf_hub_download(
418 path_or_repo_id,
419 filename,
420 subfolder=None if len(subfolder) == 0 else subfolder,
421 repo_type=repo_type,
422 revision=revision,
423 cache_dir=cache_dir,
424 user_agent=user_agent,
425 force_download=force_download,
426 proxies=proxies,
427 resume_download=resume_download,
428 use_auth_token=use_auth_token,
429 local_files_only=local_files_only,
430 )
432 except RepositoryNotFoundError:
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
116 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1195, in hf_hub_download(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout)
1194 try:
-> 1195 metadata = get_hf_file_metadata(
1196 url=url,
1197 token=token,
1198 proxies=proxies,
1199 timeout=etag_timeout,
1200 )
1201 except EntryNotFoundError as http_error:
1202 # Cache the non-existence of the file and raise
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:118, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
116 kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
--> 118 return fn(*args, **kwargs)
File ~/.local/lib/python3.10/site-packages/huggingface_hub/file_download.py:1541, in get_hf_file_metadata(url, token, proxies, timeout)
1532 r = _request_wrapper(
1533 method="HEAD",
1534 url=url,
(...)
1539 timeout=timeout,
1540 )
-> 1541 hf_raise_for_status(r)
1543 # Return
File ~/.local/lib/python3.10/site-packages/huggingface_hub/utils/_errors.py:277, in hf_raise_for_status(response, endpoint_name)
274 message = (
275 f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
276 )
--> 277 raise GatedRepoError(message, response) from e
279 elif error_code == "RepoNotFound" or response.status_code == 401:
280 # 401 is misleading as it is returned for:
281 # - private and gated repos if user is not authenticated
282 # - missing repos
283 # => for now, we process them as `RepoNotFound` anyway.
284 # See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9
GatedRepoError: 403 Client Error. (Request ID: Root=1-64f2ec62-606f496c0d95f5a34a4a0d0f;01831139-7411-49fd-9c24-89b875b4dcf0)
Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer_config.json.
Your request to access model meta-llama/Llama-2-7b-hf is awaiting a review from the repo authors.
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In[25], line 2
1 # Load the tokenizer
----> 2 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", token='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', use_auth_token=True)
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:652, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
649 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
651 # Next, let's try to use the tokenizer_config file to get the tokenizer class.
--> 652 tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
653 if "_commit_hash" in tokenizer_config:
654 kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:496, in get_tokenizer_config(pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, subfolder, **kwargs)
434 """
435 Loads the tokenizer configuration from a pretrained model tokenizer configuration.
436
(...)
493 tokenizer_config = get_tokenizer_config("tokenizer-test")
494 ```"""
495 commit_hash = kwargs.get("_commit_hash", None)
--> 496 resolved_config_file = cached_file(
497 pretrained_model_name_or_path,
498 TOKENIZER_CONFIG_FILE,
499 cache_dir=cache_dir,
500 force_download=force_download,
501 resume_download=resume_download,
502 proxies=proxies,
503 use_auth_token=use_auth_token,
504 revision=revision,
505 local_files_only=local_files_only,
506 subfolder=subfolder,
507 _raise_exceptions_for_missing_entries=False,
508 _raise_exceptions_for_connection_errors=False,
509 _commit_hash=commit_hash,
510 )
511 if resolved_config_file is None:
512 logger.info("Could not locate the tokenizer configuration file, will try to use the model config instead.")
File ~/.local/lib/python3.10/site-packages/transformers/utils/hub.py:433, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash)
417 resolved_file = hf_hub_download(
418 path_or_repo_id,
419 filename,
(...)
429 local_files_only=local_files_only,
430 )
432 except RepositoryNotFoundError:
--> 433 raise EnvironmentError(
434 f"{path_or_repo_id} is not a local folder and is not a valid model identifier "
435 "listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to "
436 "pass a token having permission to this repo with `use_auth_token` or log in with "
437 "`huggingface-cli login` and pass `use_auth_token=True`."
438 )
439 except RevisionNotFoundError:
440 raise EnvironmentError(
441 f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists "
442 "for this model name. Check the model page at "
443 f"'https://huggingface.co/{path_or_repo_id}' for available revisions."
444 )
OSError: meta-llama/Llama-2-7b-hf is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo with `use_auth_token` or log in with `huggingface-cli login` and pass `use_auth_token=True`.