convert : make hf token optional (#14717)

* make hf token optional * fail if we can't get necessary tokenizer config
2025-07-20 09:37:40 +00:00 · 2025-07-16 23:17:43 +02:00
parent 496957e1cb
commit 19e5943d9e
1 changed files with 7 additions and 11 deletions
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@ -7,7 +7,6 @@ import pathlib
 import re
 import requests
 import sys
 import json
 import shutil
 import argparse
@ -69,8 +68,7 @@ args = parser.parse_args()
 hf_token = args.hf_token if args.hf_token is not None else hf_token
 if hf_token is None:
-    logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
+    logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
    sys.exit(1)
 # TODO: this string has to exercise as much pre-tokenizer functionality as possible
 #       will be updated with time - contributions welcome
@ -151,7 +149,7 @@ pre_computed_hashes = [
 def download_file_with_auth(url, token, save_path):
-    headers = {"Authorization": f"Bearer {token}"}
+    headers = {"Authorization": f"Bearer {token}"} if token else None
    response = sess.get(url, headers=headers)
    response.raise_for_status()
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
@ -250,10 +248,9 @@ for model in [*pre_computed_hashes, *all_models]:
    else:
        # otherwise, compute the hash of the tokenizer
-        # Skip if the tokenizer folder does not exist or there are other download issues previously
+        # Fail if the tokenizer folder with config does not exist or there are other download issues previously
-        if not os.path.exists(f"models/tokenizers/{name}"):
+        if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
-            logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
+            raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")
            continue
        try:
            logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
@ -261,9 +258,8 @@ for model in [*pre_computed_hashes, *all_models]:
                tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
            else:
                tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
-        except OSError as e:
+        except Exception as e:
-            logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
+            raise OSError(f"Error loading tokenizer for model {name}.") from e
            continue  # Skip to the next model if the tokenizer can't be loaded
        chktok = tokenizer.encode(CHK_TXT)
        chkhsh = sha256(str(chktok).encode()).hexdigest()